]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Add HDR information to formats
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
b5ae35ee 12import functools
8222d8de 13import io
b82f815f 14import itertools
8694c600 15import json
62fec3b2 16import locale
083c9df9 17import operator
8222d8de 18import os
dca08720 19import platform
8222d8de
JMF
20import re
21import shutil
dca08720 22import subprocess
8222d8de 23import sys
21cd8fae 24import tempfile
8222d8de 25import time
67134eab 26import tokenize
8222d8de 27import traceback
75822ca7 28import random
524e2e4f 29import unicodedata
8222d8de 30
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
003c69a8 35 compat_get_terminal_size,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
edf65256 39 compat_pycrypto_AES,
7d1eb38a 40 compat_shlex_quote,
ce02ed60 41 compat_str,
67134eab 42 compat_tokenize_tokenize,
ce02ed60
PH
43 compat_urllib_error,
44 compat_urllib_request,
8b172c2e 45 compat_urllib_request_DataHandler,
819e0531 46 windows_enable_vt_mode,
8c25f81b 47)
982ee69a 48from .cookies import load_cookies
8c25f81b 49from .utils import (
eedb7ba5
S
50 age_restricted,
51 args_to_str,
ce02ed60
PH
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
acd69589 55 DEFAULT_OUTTMPL,
ce02ed60 56 determine_ext,
b5559424 57 determine_protocol,
732044af 58 DOT_DESKTOP_LINK_TEMPLATE,
59 DOT_URL_LINK_TEMPLATE,
60 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 61 DownloadError,
c0384f22 62 encode_compat_str,
ce02ed60 63 encodeFilename,
498f5606 64 EntryNotInPlaylist,
a06916d9 65 error_to_compat_str,
8b0d7497 66 ExistingVideoReached,
590bc6f6 67 expand_path,
ce02ed60 68 ExtractorError,
e29663c6 69 float_or_none,
02dbf93f 70 format_bytes,
76d321f6 71 format_field,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
56a8fb4f 78 LazyList,
ce02ed60 79 locked_file,
0202b52a 80 make_dir,
dca08720 81 make_HTTPS_handler,
ce02ed60 82 MaxDownloadsReached,
3158150c 83 network_exceptions,
cd6fc19e 84 orderedSet,
a06916d9 85 OUTTMPL_TYPES,
b7ab0590 86 PagedList,
083c9df9 87 parse_filesize,
91410c9b 88 PerRequestProxyHandler,
dca08720 89 platform_name,
eedb7ba5 90 PostProcessingError,
ce02ed60 91 preferredencoding,
eedb7ba5 92 prepend_extension,
a06916d9 93 process_communicate_or_kill,
51fb4995 94 register_socks_protocols,
a06916d9 95 RejectedVideoReached,
cfb56d1a 96 render_table,
eedb7ba5 97 replace_extension,
ce02ed60
PH
98 SameFileError,
99 sanitize_filename,
1bb5c511 100 sanitize_path,
dcf77cf1 101 sanitize_url,
67dda517 102 sanitized_Request,
e5660ee6 103 std_headers,
819e0531 104 STR_FORMAT_RE_TMPL,
105 STR_FORMAT_TYPES,
1211bb6d 106 str_or_none,
e29663c6 107 strftime_or_none,
ce02ed60 108 subtitles_filename,
819e0531 109 supports_terminal_sequences,
110 TERMINAL_SEQUENCES,
51d9739f 111 ThrottledDownload,
732044af 112 to_high_limit_path,
324ad820 113 traverse_obj,
6033d980 114 try_get,
ce02ed60 115 UnavailableVideoError,
29eb5174 116 url_basename,
7d1eb38a 117 variadic,
58b1f00d 118 version_tuple,
ce02ed60
PH
119 write_json_file,
120 write_string,
6a3f4c3f 121 YoutubeDLCookieProcessor,
dca08720 122 YoutubeDLHandler,
fca6dba8 123 YoutubeDLRedirectHandler,
ce02ed60 124)
a0e07d31 125from .cache import Cache
52a8a1e1 126from .extractor import (
127 gen_extractor_classes,
128 get_info_extractor,
129 _LAZY_LOADER,
3ae5e797 130 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 131)
4c54b89e 132from .extractor.openload import PhantomJSwrapper
52a8a1e1 133from .downloader import (
dbf5416a 134 FFmpegFD,
52a8a1e1 135 get_suitable_downloader,
136 shorten_protocol_name
137)
4c83c967 138from .downloader.rtmp import rtmpdump_version
4f026faf 139from .postprocessor import (
e36d50c5 140 get_postprocessor,
4e3b637d 141 EmbedThumbnailPP,
e36d50c5 142 FFmpegFixupDurationPP,
f17f8651 143 FFmpegFixupM3u8PP,
62cd676c 144 FFmpegFixupM4aPP,
6271f1ca 145 FFmpegFixupStretchedPP,
e36d50c5 146 FFmpegFixupTimestampPP,
4f026faf
PH
147 FFmpegMergerPP,
148 FFmpegPostProcessor,
0202b52a 149 MoveFilesAfterDownloadPP,
3ae5e797 150 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 151)
4c88ff87 152from .update import detect_variant
dca08720 153from .version import __version__
8222d8de 154
e9c0cdd3
YCH
155if compat_os_name == 'nt':
156 import ctypes
157
2459b6e1 158
8222d8de
JMF
159class YoutubeDL(object):
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
180940e0 188 videopassword: Password for accessing a video.
1da50aa3
S
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
8222d8de
JMF
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
ad8915b7 195 no_warnings: Do not print out anything for warnings.
53c18592 196 forceprint: A list of templates to force print
197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
8694c600 204 forcejson: Force printing info_dict as JSON.
63e0be34
PH
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
c25228e5 207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
b7b04c78 209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 211 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
c25228e5 216 format_sort: How to sort the video formats. see "Sorting Formats"
217 for more details.
218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
220 allow_multiple_video_streams: Allow multiple video streams to be merged
221 into a single file
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 into a single file
0ba692ac 224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none)
226 or None (check only if requested by extractor)
4524baf0 227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 229 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 231 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
4524baf0 235 windowsfilenames: Force the filenames to be windows compatible
b1940459 236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
26e2805c 239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
d22dec74 241 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
34488702 245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
8222d8de
JMF
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
c14e88f0 249 playlist_items: Specific indices of playlist to download.
ff815fe6 250 playlistreverse: Download playlist items in reverse order.
75822ca7 251 playlistrandom: Download playlist items in random order.
8222d8de
JMF
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
8bf9319e 254 logger: Log messages to a logging.Logger instance.
8222d8de 255 logtostderr: Log messages to stderr instead of stdout.
819e0531 256 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
75d43ca0 259 clean_infojson: Remove private fields from the infojson
34488702 260 getcomments: Extract video comments. This will not be written to disk
06167fbb 261 unless writeinfojson is also given
1fb07d10 262 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 263 writethumbnail: Write the thumbnail image to a file
c25228e5 264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
ec82d85a 266 write_all_thumbnails: Write all thumbnail formats to files
732044af 267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 272 writesubtitles: Write the video subtitles to a file
741dd8ea 273 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 274 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 275 Downloads all the subtitles of the video
0b7f3118 276 (requires writesubtitles or writeautomaticsub)
8222d8de 277 listsubtitles: Lists all available subtitles for the video
a504ced0 278 subtitlesformat: The format code for subtitles
c32b0aab 279 subtitleslangs: List of languages of the subtitles to download (can be regex).
280 The list may contain "all" to refer to all the available
281 subtitles. The language can be prefixed with a "-" to
282 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
283 keepvideo: Keep the video file after post-processing
284 daterange: A DateRange object, download only if the upload_date is in the range.
285 skip_download: Skip the actual download of the video file
c35f9e72 286 cachedir: Location of the cache files in the filesystem.
a0e07d31 287 False to disable filesystem cache.
47192f92 288 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
289 age_limit: An integer representing the user's age in years.
290 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
291 min_views: An integer representing the minimum view count the video
292 must have in order to not be skipped.
293 Videos without view count information are always
294 downloaded. None for no limit.
295 max_views: An integer representing the maximum view count.
296 Videos that are more popular than that are not
297 downloaded.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
301 Videos already present in the file are not downloaded
302 again.
8a51f564 303 break_on_existing: Stop the download process after attempting to download a
304 file that is in the archive.
305 break_on_reject: Stop the download process when encountering a video that
306 has been filtered out.
307 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
308 cookiesfrombrowser: A tuple containing the name of the browser and the profile
309 name/path from where cookies are loaded.
310 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 311 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
312 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
313 At the moment, this is only supported by YouTube.
a1ee09e8 314 proxy: URL of the proxy server to use
38cce791 315 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 316 on geo-restricted sites.
e344693b 317 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
318 bidi_workaround: Work around buggy terminals without bidirectional text
319 support, using fridibi
a0ddb8a2 320 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 321 include_ads: Download ads as well
04b4d394
PH
322 default_search: Prepend this string if an input url is not valid.
323 'auto' for elaborate guessing
62fec3b2 324 encoding: Use this encoding instead of the system-specified.
e8ee972c 325 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
326 Pass in 'in_playlist' to only show this behavior for
327 playlist items.
4f026faf 328 postprocessors: A list of dictionaries, each with an entry
71b640cc 329 * key: The name of the postprocessor. See
7a5c1cfe 330 yt_dlp/postprocessor/__init__.py for a list.
56d868db 331 * when: When to run the postprocessor. Can be one of
332 pre_process|before_dl|post_process|after_move.
333 Assumed to be 'post_process' if not given
b5ae35ee 334 post_hooks: Deprecated - Register a custom postprocessor instead
335 A list of functions that get called as the final step
ab8e5e51
AM
336 for each video file, after all postprocessors have been
337 called. The filename will be passed as the only argument.
71b640cc
PH
338 progress_hooks: A list of functions that get called on download
339 progress, with a dictionary with the entries
5cda4eda 340 * status: One of "downloading", "error", or "finished".
ee69b99a 341 Check this first and ignore unknown values.
3ba7740d 342 * info_dict: The extracted info_dict
71b640cc 343
5cda4eda 344 If status is one of "downloading", or "finished", the
ee69b99a
PH
345 following properties may also be present:
346 * filename: The final filename (always present)
5cda4eda 347 * tmpfilename: The filename we're currently writing to
71b640cc
PH
348 * downloaded_bytes: Bytes on disk
349 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
350 * total_bytes_estimate: Guess of the eventual file size,
351 None if unavailable.
352 * elapsed: The number of seconds since download started.
71b640cc
PH
353 * eta: The estimated time in seconds, None if unknown
354 * speed: The download speed in bytes/second, None if
355 unknown
5cda4eda
PH
356 * fragment_index: The counter of the currently
357 downloaded video fragment.
358 * fragment_count: The number of fragments (= individual
359 files that will be merged)
71b640cc
PH
360
361 Progress hooks are guaranteed to be called at least once
362 (with status "finished") if the download is successful.
819e0531 363 postprocessor_hooks: A list of functions that get called on postprocessing
364 progress, with a dictionary with the entries
365 * status: One of "started", "processing", or "finished".
366 Check this first and ignore unknown values.
367 * postprocessor: Name of the postprocessor
368 * info_dict: The extracted info_dict
369
370 Progress hooks are guaranteed to be called at least twice
371 (with status "started" and "finished") if the processing is successful.
45598f15 372 merge_output_format: Extension to use when merging formats.
6b591b29 373 final_ext: Expected final extension; used to detect when the file was
374 already downloaded and converted. "merge_output_format" is
375 replaced by this extension when given
6271f1ca
PH
376 fixup: Automatically correct known faults of the file.
377 One of:
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
62cd676c 381 about it, warn otherwise (default)
504f20dd 382 source_address: Client-side IP address to bind to.
6ec6cb4e 383 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 384 yt-dlp servers for debugging. (BROKEN)
1cf376f5 385 sleep_interval_requests: Number of seconds to sleep between requests
386 during extraction
7aa589a5
S
387 sleep_interval: Number of seconds to sleep before each download when
388 used alone or a lower bound of a range for randomized
389 sleep before each download (minimum possible number
390 of seconds to sleep) when used along with
391 max_sleep_interval.
392 max_sleep_interval:Upper bound of a range for randomized sleep before each
393 download (maximum possible number of seconds to sleep).
394 Must only be used along with sleep_interval.
395 Actual sleep time will be a random float from range
396 [sleep_interval; max_sleep_interval].
1cf376f5 397 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
398 listformats: Print an overview of available video formats and exit.
399 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
400 match_filter: A function that gets called with the info_dict of
401 every video.
402 If it returns a message, the video is ignored.
403 If it returns None, the video is downloaded.
404 match_filter_func in utils.py is one example for this.
7e5db8c9 405 no_color: Do not emit color codes in output.
0a840f58 406 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 407 HTTP header
0a840f58 408 geo_bypass_country:
773f291d
S
409 Two-letter ISO 3166-2 country code that will be used for
410 explicit geographic restriction bypassing via faking
504f20dd 411 X-Forwarded-For HTTP header
5f95927a
S
412 geo_bypass_ip_block:
413 IP range in CIDR notation that will be used similarly to
504f20dd 414 geo_bypass_country
71b640cc 415
85729c51 416 The following options determine which downloader is picked:
52a8a1e1 417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
421 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
422 or {'m3u8': 'ffmpeg'} instead.
423 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
424 if True, otherwise use ffmpeg/avconv if False, otherwise
425 use downloader suggested by extractor if None.
53ed7066 426 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 427 The following options do not work when used through the API:
b5ae35ee 428 filename, abort-on-error, multistreams, no-live-chat, format-sort
b51d2ae3 429 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 430 Refer __init__.py for their implementation
819e0531 431 progress_template: Dictionary of templates for progress outputs.
432 Allowed keys are 'download', 'postprocess',
433 'download-title' (console title) and 'postprocess-title'.
434 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 435
8222d8de 436 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 437 the downloader (see yt_dlp/downloader/common.py):
51d9739f 438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
b5ae35ee 439 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
440 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args.
76b1bd67
JMF
442
443 The following options are used by the post processors:
d4a24f40 444 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 445 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
446 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
447 to the binary or its containing directory.
43820c03 448 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 449 and a list of additional command-line arguments for the
450 postprocessor/executable. The dict can also have "PP+EXE" keys
451 which are used when the given exe is used by the given PP.
452 Use 'default' as the name for arguments to passed to all PP
453 For compatibility with youtube-dl, a single list of args
454 can also be used
e409895f 455
456 The following options are used by the extractors:
62bff2c1 457 extractor_retries: Number of times to retry for known errors
458 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 459 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 460 discontinuities such as ad breaks (default: False)
5d3a0e79 461 extractor_args: A dictionary of arguments to be passed to the extractors.
462 See "EXTRACTOR ARGUMENTS" for details.
463 Eg: {'youtube': {'skip': ['dash', 'hls']}}
464 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
465 If True (default), DASH manifests and related
62bff2c1 466 data will be downloaded and processed by extractor.
467 You can reduce network I/O by disabling it if you don't
468 care about DASH. (only for youtube)
5d3a0e79 469 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
470 If True (default), HLS manifests and related
62bff2c1 471 data will be downloaded and processed by extractor.
472 You can reduce network I/O by disabling it if you don't
473 care about HLS. (only for youtube)
8222d8de
JMF
474 """
475
c9969434
S
476 _NUMERIC_FIELDS = set((
477 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 478 'timestamp', 'release_timestamp',
c9969434
S
479 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
480 'average_rating', 'comment_count', 'age_limit',
481 'start_time', 'end_time',
482 'chapter_number', 'season_number', 'episode_number',
483 'track_number', 'disc_number', 'release_year',
c9969434
S
484 ))
485
48ee10ee 486 _format_selection_exts = {
487 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
488 'video': {'mp4', 'flv', 'webm', '3gp'},
489 'storyboards': {'mhtml'},
490 }
491
8222d8de 492 params = None
8b7491c8 493 _ies = {}
56d868db 494 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 495 _printed_messages = set()
1cf376f5 496 _first_webpage_request = True
8222d8de
JMF
497 _download_retcode = None
498 _num_downloads = None
30a074c2 499 _playlist_level = 0
500 _playlist_urls = set()
8222d8de
JMF
501 _screen_file = None
502
3511266b 503 def __init__(self, params=None, auto_init=True):
883d4b1e 504 """Create a FileDownloader object with the given options.
505 @param auto_init Whether to load the default extractors and print header (if verbose).
506 Set to 'no_verbose_header' to not ptint the header
507 """
e9f9a10f
JMF
508 if params is None:
509 params = {}
8b7491c8 510 self._ies = {}
56c73665 511 self._ies_instances = {}
56d868db 512 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 513 self._printed_messages = set()
1cf376f5 514 self._first_webpage_request = True
ab8e5e51 515 self._post_hooks = []
933605d7 516 self._progress_hooks = []
819e0531 517 self._postprocessor_hooks = []
8222d8de
JMF
518 self._download_retcode = 0
519 self._num_downloads = 0
520 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 521 self._err_file = sys.stderr
819e0531 522 self.params = params
a0e07d31 523 self.cache = Cache(self)
34308b30 524
819e0531 525 windows_enable_vt_mode()
d1d5c08f 526 # FIXME: This will break if we ever print color to stdout
819e0531 527 self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
528
a61f4b28 529 if sys.version_info < (3, 6):
530 self.report_warning(
0181adef 531 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 532
88acdbc2 533 if self.params.get('allow_unplayable_formats'):
534 self.report_warning(
819e0531 535 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
536 'This is a developer option intended for debugging. \n'
537 ' If you experience any issues while using this option, '
538 f'{self._color_text("DO NOT", "red")} open a bug report')
88acdbc2 539
be5df5ee
S
540 def check_deprecated(param, option, suggestion):
541 if self.params.get(param) is not None:
53ed7066 542 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
543 return True
544 return False
545
546 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
547 if self.params.get('geo_verification_proxy') is None:
548 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
549
0d1bb027 550 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
551 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 552 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 553
554 for msg in self.params.get('warnings', []):
555 self.report_warning(msg)
556
b5ae35ee 557 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 558 # nooverwrites was unnecessarily changed to overwrites
559 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
560 # This ensures compatibility with both keys
561 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 562 elif self.params.get('overwrites') is None:
563 self.params.pop('overwrites', None)
b868936c 564 else:
565 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 566
0783b09b 567 if params.get('bidi_workaround', False):
1c088fa8
PH
568 try:
569 import pty
570 master, slave = pty.openpty()
003c69a8 571 width = compat_get_terminal_size().columns
1c088fa8
PH
572 if width is None:
573 width_args = []
574 else:
575 width_args = ['-w', str(width)]
5d681e96 576 sp_kwargs = dict(
1c088fa8
PH
577 stdin=subprocess.PIPE,
578 stdout=slave,
579 stderr=self._err_file)
5d681e96
PH
580 try:
581 self._output_process = subprocess.Popen(
582 ['bidiv'] + width_args, **sp_kwargs
583 )
584 except OSError:
5d681e96
PH
585 self._output_process = subprocess.Popen(
586 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
587 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 588 except OSError as ose:
66e7ace1 589 if ose.errno == errno.ENOENT:
6febd1c1 590 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
591 else:
592 raise
0783b09b 593
3089bc74
S
594 if (sys.platform != 'win32'
595 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
596 and not params.get('restrictfilenames', False)):
e9137224 597 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 598 self.report_warning(
6febd1c1 599 'Assuming --restrict-filenames since file system encoding '
1b725173 600 'cannot encode all characters. '
6febd1c1 601 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 602 self.params['restrictfilenames'] = True
34308b30 603
de6000d9 604 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 605
187986a8 606 # Creating format selector here allows us to catch syntax errors before the extraction
607 self.format_selector = (
608 None if self.params.get('format') is None
609 else self.build_format_selector(self.params['format']))
610
dca08720
PH
611 self._setup_opener()
612
3511266b 613 if auto_init:
883d4b1e 614 if auto_init != 'no_verbose_header':
615 self.print_debug_header()
3511266b
PH
616 self.add_default_info_extractors()
617
4f026faf 618 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 619 pp_def = dict(pp_def_raw)
fd7cfb64 620 when = pp_def.pop('when', 'post_process')
621 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 622 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 623 self.add_post_processor(pp, when=when)
4f026faf 624
ab8e5e51
AM
625 for ph in self.params.get('post_hooks', []):
626 self.add_post_hook(ph)
627
71b640cc
PH
628 for ph in self.params.get('progress_hooks', []):
629 self.add_progress_hook(ph)
630
51fb4995
YCH
631 register_socks_protocols()
632
ed39cac5 633 def preload_download_archive(fn):
634 """Preload the archive, if any is specified"""
635 if fn is None:
636 return False
637 self.write_debug('Loading archive file %r\n' % fn)
638 try:
639 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
640 for line in archive_file:
641 self.archive.add(line.strip())
642 except IOError as ioe:
643 if ioe.errno != errno.ENOENT:
644 raise
645 return False
646 return True
647
648 self.archive = set()
649 preload_download_archive(self.params.get('download_archive'))
650
7d4111ed
PH
651 def warn_if_short_id(self, argv):
652 # short YouTube ID starting with dash?
653 idxs = [
654 i for i, a in enumerate(argv)
655 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
656 if idxs:
657 correct_argv = (
7a5c1cfe 658 ['yt-dlp']
3089bc74
S
659 + [a for i, a in enumerate(argv) if i not in idxs]
660 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
661 )
662 self.report_warning(
663 'Long argument string detected. '
664 'Use -- to separate parameters and URLs, like this:\n%s\n' %
665 args_to_str(correct_argv))
666
8222d8de
JMF
667 def add_info_extractor(self, ie):
668 """Add an InfoExtractor object to the end of the list."""
8b7491c8 669 ie_key = ie.ie_key()
670 self._ies[ie_key] = ie
e52d7f85 671 if not isinstance(ie, type):
8b7491c8 672 self._ies_instances[ie_key] = ie
e52d7f85 673 ie.set_downloader(self)
8222d8de 674
8b7491c8 675 def _get_info_extractor_class(self, ie_key):
676 ie = self._ies.get(ie_key)
677 if ie is None:
678 ie = get_info_extractor(ie_key)
679 self.add_info_extractor(ie)
680 return ie
681
56c73665
JMF
682 def get_info_extractor(self, ie_key):
683 """
684 Get an instance of an IE with name ie_key, it will try to get one from
685 the _ies list, if there's no instance it will create a new one and add
686 it to the extractor list.
687 """
688 ie = self._ies_instances.get(ie_key)
689 if ie is None:
690 ie = get_info_extractor(ie_key)()
691 self.add_info_extractor(ie)
692 return ie
693
023fa8c4
JMF
694 def add_default_info_extractors(self):
695 """
696 Add the InfoExtractors returned by gen_extractors to the end of the list
697 """
e52d7f85 698 for ie in gen_extractor_classes():
023fa8c4
JMF
699 self.add_info_extractor(ie)
700
56d868db 701 def add_post_processor(self, pp, when='post_process'):
8222d8de 702 """Add a PostProcessor object to the end of the chain."""
5bfa4862 703 self._pps[when].append(pp)
8222d8de
JMF
704 pp.set_downloader(self)
705
ab8e5e51
AM
706 def add_post_hook(self, ph):
707 """Add the post hook"""
708 self._post_hooks.append(ph)
709
933605d7 710 def add_progress_hook(self, ph):
819e0531 711 """Add the download progress hook"""
933605d7 712 self._progress_hooks.append(ph)
8ab470f1 713
819e0531 714 def add_postprocessor_hook(self, ph):
715 """Add the postprocessing progress hook"""
716 self._postprocessor_hooks.append(ph)
717
1c088fa8 718 def _bidi_workaround(self, message):
5d681e96 719 if not hasattr(self, '_output_channel'):
1c088fa8
PH
720 return message
721
5d681e96 722 assert hasattr(self, '_output_process')
11b85ce6 723 assert isinstance(message, compat_str)
6febd1c1
PH
724 line_count = message.count('\n') + 1
725 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 726 self._output_process.stdin.flush()
6febd1c1 727 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 728 for _ in range(line_count))
6febd1c1 729 return res[:-len('\n')]
1c088fa8 730
b35496d8 731 def _write_string(self, message, out=None, only_once=False):
732 if only_once:
733 if message in self._printed_messages:
734 return
735 self._printed_messages.add(message)
736 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 737
848887eb 738 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 739 """Print message to stdout"""
8bf9319e 740 if self.params.get('logger'):
43afe285 741 self.params['logger'].debug(message)
835a1478 742 elif not quiet or self.params.get('verbose'):
743 self._write_string(
744 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
745 self._err_file if quiet else self._screen_file)
8222d8de 746
b35496d8 747 def to_stderr(self, message, only_once=False):
0760b0a7 748 """Print message to stderr"""
11b85ce6 749 assert isinstance(message, compat_str)
8bf9319e 750 if self.params.get('logger'):
43afe285
IB
751 self.params['logger'].error(message)
752 else:
b35496d8 753 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 754
1e5b9a95
PH
755 def to_console_title(self, message):
756 if not self.params.get('consoletitle', False):
757 return
4bede0d8
C
758 if compat_os_name == 'nt':
759 if ctypes.windll.kernel32.GetConsoleWindow():
760 # c_wchar_p() might not be necessary if `message` is
761 # already of type unicode()
762 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 763 elif 'TERM' in os.environ:
b46696bd 764 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 765
bdde425c
PH
766 def save_console_title(self):
767 if not self.params.get('consoletitle', False):
768 return
b7b04c78 769 if self.params.get('simulate'):
94c3442e 770 return
4bede0d8 771 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 772 # Save the title on stack
734f90bb 773 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
774
775 def restore_console_title(self):
776 if not self.params.get('consoletitle', False):
777 return
b7b04c78 778 if self.params.get('simulate'):
94c3442e 779 return
4bede0d8 780 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 781 # Restore the title from stack
734f90bb 782 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
783
784 def __enter__(self):
785 self.save_console_title()
786 return self
787
788 def __exit__(self, *args):
789 self.restore_console_title()
f89197d7 790
dca08720 791 if self.params.get('cookiefile') is not None:
1bab3437 792 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 793
8222d8de
JMF
794 def trouble(self, message=None, tb=None):
795 """Determine action to take when a download problem appears.
796
797 Depending on if the downloader has been configured to ignore
798 download errors or not, this method may throw an exception or
799 not when errors are found, after printing the message.
800
801 tb, if given, is additional traceback information.
802 """
803 if message is not None:
804 self.to_stderr(message)
805 if self.params.get('verbose'):
806 if tb is None:
807 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 808 tb = ''
8222d8de 809 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 810 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 811 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
812 else:
813 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 814 tb = ''.join(tb_data)
c19bc311 815 if tb:
816 self.to_stderr(tb)
b1940459 817 if not self.params.get('ignoreerrors'):
8222d8de
JMF
818 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
819 exc_info = sys.exc_info()[1].exc_info
820 else:
821 exc_info = sys.exc_info()
822 raise DownloadError(message, exc_info)
823 self._download_retcode = 1
824
0760b0a7 825 def to_screen(self, message, skip_eol=False):
826 """Print message to stdout if not in quiet mode"""
827 self.to_stdout(
828 message, skip_eol, quiet=self.params.get('quiet', False))
829
819e0531 830 def _color_text(self, text, color):
831 if self.params.get('no_color'):
832 return text
833 return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
834
c84aeac6 835 def report_warning(self, message, only_once=False):
8222d8de
JMF
836 '''
837 Print the message to stderr, it will be prefixed with 'WARNING:'
838 If stderr is a tty file the 'WARNING:' will be colored
839 '''
6d07ce01
JMF
840 if self.params.get('logger') is not None:
841 self.params['logger'].warning(message)
8222d8de 842 else:
ad8915b7
PH
843 if self.params.get('no_warnings'):
844 return
819e0531 845 self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
8222d8de
JMF
846
847 def report_error(self, message, tb=None):
848 '''
849 Do the same as trouble, but prefixes the message with 'ERROR:', colored
850 in red if stderr is a tty file.
851 '''
819e0531 852 self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
8222d8de 853
b35496d8 854 def write_debug(self, message, only_once=False):
0760b0a7 855 '''Log debug message or Print message to stderr'''
856 if not self.params.get('verbose', False):
857 return
858 message = '[debug] %s' % message
859 if self.params.get('logger'):
860 self.params['logger'].debug(message)
861 else:
b35496d8 862 self.to_stderr(message, only_once)
0760b0a7 863
8222d8de
JMF
864 def report_file_already_downloaded(self, file_name):
865 """Report file has already been fully downloaded."""
866 try:
6febd1c1 867 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 868 except UnicodeEncodeError:
6febd1c1 869 self.to_screen('[download] The file has already been downloaded')
8222d8de 870
0c3d0f51 871 def report_file_delete(self, file_name):
872 """Report that existing file will be deleted."""
873 try:
c25228e5 874 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 875 except UnicodeEncodeError:
c25228e5 876 self.to_screen('Deleting existing file')
0c3d0f51 877
1151c407 878 def raise_no_formats(self, info, forced=False):
879 has_drm = info.get('__has_drm')
88acdbc2 880 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
881 expected = self.params.get('ignore_no_formats_error')
882 if forced or not expected:
1151c407 883 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
884 expected=has_drm or expected)
88acdbc2 885 else:
886 self.report_warning(msg)
887
de6000d9 888 def parse_outtmpl(self):
889 outtmpl_dict = self.params.get('outtmpl', {})
890 if not isinstance(outtmpl_dict, dict):
891 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 892 # Remove spaces in the default template
893 if self.params.get('restrictfilenames'):
894 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
895 else:
896 sanitize = lambda x: x
de6000d9 897 outtmpl_dict.update({
71ce444a 898 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 899 if outtmpl_dict.get(k) is None})
de6000d9 900 for key, val in outtmpl_dict.items():
901 if isinstance(val, bytes):
902 self.report_warning(
903 'Parameter outtmpl is bytes, but should be a unicode string. '
904 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
905 return outtmpl_dict
906
21cd8fae 907 def get_output_path(self, dir_type='', filename=None):
908 paths = self.params.get('paths', {})
909 assert isinstance(paths, dict)
910 path = os.path.join(
911 expand_path(paths.get('home', '').strip()),
912 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
913 filename or '')
914
915 # Temporary fix for #4787
916 # 'Treat' all problem characters by passing filename through preferredencoding
917 # to workaround encoding issues with subprocess on python2 @ Windows
918 if sys.version_info < (3, 0) and sys.platform == 'win32':
919 path = encodeFilename(path, True).decode(preferredencoding())
920 return sanitize_path(path, force=self.params.get('windowsfilenames'))
921
76a264ac 922 @staticmethod
901130bb 923 def _outtmpl_expandpath(outtmpl):
924 # expand_path translates '%%' into '%' and '$$' into '$'
925 # correspondingly that is not what we want since we need to keep
926 # '%%' intact for template dict substitution step. Working around
927 # with boundary-alike separator hack.
928 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
929 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
930
931 # outtmpl should be expand_path'ed before template dict substitution
932 # because meta fields may contain env variables we don't want to
933 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
934 # title "Hello $PATH", we don't want `$PATH` to be expanded.
935 return expand_path(outtmpl).replace(sep, '')
936
937 @staticmethod
938 def escape_outtmpl(outtmpl):
939 ''' Escape any remaining strings like %s, %abc% etc. '''
940 return re.sub(
941 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
942 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
943 outtmpl)
944
945 @classmethod
946 def validate_outtmpl(cls, outtmpl):
76a264ac 947 ''' @return None or Exception object '''
7d1eb38a 948 outtmpl = re.sub(
524e2e4f 949 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 950 lambda mobj: f'{mobj.group(0)[:-1]}s',
951 cls._outtmpl_expandpath(outtmpl))
76a264ac 952 try:
7d1eb38a 953 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 954 return None
955 except ValueError as err:
956 return err
957
03b4de72 958 @staticmethod
959 def _copy_infodict(info_dict):
960 info_dict = dict(info_dict)
961 for key in ('__original_infodict', '__postprocessors'):
962 info_dict.pop(key, None)
963 return info_dict
964
143db31d 965 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
819e0531 966 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
6e84b215 967 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 968
03b4de72 969 info_dict = self._copy_infodict(info_dict)
752cda38 970 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 971 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 972 if info_dict.get('duration', None) is not None
973 else None)
752cda38 974 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
975 if info_dict.get('resolution') is None:
976 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 977
e6f21b3d 978 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 979 # of %(field)s to %(field)0Nd for backward compatibility
980 field_size_compat_map = {
752cda38 981 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
e6f21b3d 982 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
752cda38 983 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 984 }
752cda38 985
385a27fa 986 TMPL_DICT = {}
524e2e4f 987 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 988 MATH_FUNCTIONS = {
989 '+': float.__add__,
990 '-': float.__sub__,
991 }
e625be0d 992 # Field is of the form key1.key2...
993 # where keys (except first) can be string, int or slice
2b8a2973 994 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 995 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
996 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 997 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
998 (?P<negate>-)?
385a27fa 999 (?P<fields>{field})
1000 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1001 (?:>(?P<strf_format>.+?))?
7c37ff97 1002 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 1003 (?:\|(?P<default>.*?))?
385a27fa 1004 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1005
2b8a2973 1006 def _traverse_infodict(k):
1007 k = k.split('.')
1008 if k[0] == '':
1009 k.pop(0)
1010 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1011
752cda38 1012 def get_value(mdict):
1013 # Object traversal
2b8a2973 1014 value = _traverse_infodict(mdict['fields'])
752cda38 1015 # Negative
1016 if mdict['negate']:
1017 value = float_or_none(value)
1018 if value is not None:
1019 value *= -1
1020 # Do maths
385a27fa 1021 offset_key = mdict['maths']
1022 if offset_key:
752cda38 1023 value = float_or_none(value)
1024 operator = None
385a27fa 1025 while offset_key:
1026 item = re.match(
1027 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1028 offset_key).group(0)
1029 offset_key = offset_key[len(item):]
1030 if operator is None:
752cda38 1031 operator = MATH_FUNCTIONS[item]
385a27fa 1032 continue
1033 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1034 offset = float_or_none(item)
1035 if offset is None:
2b8a2973 1036 offset = float_or_none(_traverse_infodict(item))
385a27fa 1037 try:
1038 value = operator(value, multiplier * offset)
1039 except (TypeError, ZeroDivisionError):
1040 return None
1041 operator = None
752cda38 1042 # Datetime formatting
1043 if mdict['strf_format']:
7c37ff97 1044 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1045
1046 return value
1047
b868936c 1048 na = self.params.get('outtmpl_na_placeholder', 'NA')
1049
6e84b215 1050 def _dumpjson_default(obj):
1051 if isinstance(obj, (set, LazyList)):
1052 return list(obj)
1053 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1054
752cda38 1055 def create_key(outer_mobj):
1056 if not outer_mobj.group('has_key'):
b836dc94 1057 return outer_mobj.group(0)
752cda38 1058 key = outer_mobj.group('key')
752cda38 1059 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1060 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1061 value, default = None, na
1062 while mobj:
e625be0d 1063 mobj = mobj.groupdict()
7c37ff97 1064 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1065 value = get_value(mobj)
7c37ff97 1066 if value is None and mobj['alternate']:
1067 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1068 else:
1069 break
752cda38 1070
b868936c 1071 fmt = outer_mobj.group('format')
752cda38 1072 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1073 fmt = '0{:d}d'.format(field_size_compat_map[key])
1074
1075 value = default if value is None else value
752cda38 1076
7d1eb38a 1077 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1078 if fmt[-1] == 'l': # list
91dd88b9 1079 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1080 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1081 elif fmt[-1] == 'j': # json
6e84b215 1082 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1083 elif fmt[-1] == 'q': # quoted
7d1eb38a 1084 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1085 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1086 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1087 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1088 elif fmt[-1] == 'U': # unicode normalized
1089 opts = outer_mobj.group('conversion') or ''
1090 value, fmt = unicodedata.normalize(
1091 # "+" = compatibility equivalence, "#" = NFD
1092 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1093 value), str_fmt
7d1eb38a 1094 elif fmt[-1] == 'c':
524e2e4f 1095 if value:
1096 value = str(value)[0]
76a264ac 1097 else:
524e2e4f 1098 fmt = str_fmt
76a264ac 1099 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1100 value = float_or_none(value)
752cda38 1101 if value is None:
1102 value, fmt = default, 's'
901130bb 1103
752cda38 1104 if sanitize:
1105 if fmt[-1] == 'r':
1106 # If value is an object, sanitize might convert it to a string
1107 # So we convert it to repr first
7d1eb38a 1108 value, fmt = repr(value), str_fmt
639f1cea 1109 if fmt[-1] in 'csr':
7c37ff97 1110 value = sanitize(initial_field, value)
901130bb 1111
b868936c 1112 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1113 TMPL_DICT[key] = value
b868936c 1114 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1115
385a27fa 1116 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1117
819e0531 1118 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1119 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1120 return self.escape_outtmpl(outtmpl) % info_dict
1121
de6000d9 1122 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1123 try:
586a91b6 1124 sanitize = lambda k, v: sanitize_filename(
45598aab 1125 compat_str(v),
1bb5c511 1126 restricted=self.params.get('restrictfilenames'),
40df485f 1127 is_id=(k == 'id' or k.endswith('_id')))
b836dc94 1128 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1129 filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
15da37c7 1130
143db31d 1131 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1132 if filename and force_ext is not None:
752cda38 1133 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1134
bdc3fd2f
U
1135 # https://github.com/blackjack4494/youtube-dlc/issues/85
1136 trim_file_name = self.params.get('trim_file_name', False)
1137 if trim_file_name:
1138 fn_groups = filename.rsplit('.')
1139 ext = fn_groups[-1]
1140 sub_ext = ''
1141 if len(fn_groups) > 2:
1142 sub_ext = fn_groups[-2]
1143 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1144
0202b52a 1145 return filename
8222d8de 1146 except ValueError as err:
6febd1c1 1147 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1148 return None
1149
de6000d9 1150 def prepare_filename(self, info_dict, dir_type='', warn=False):
1151 """Generate the output filename."""
21cd8fae 1152
de6000d9 1153 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1154 if not filename and dir_type not in ('', 'temp'):
1155 return ''
de6000d9 1156
c84aeac6 1157 if warn:
21cd8fae 1158 if not self.params.get('paths'):
de6000d9 1159 pass
1160 elif filename == '-':
c84aeac6 1161 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1162 elif os.path.isabs(filename):
c84aeac6 1163 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1164 if filename == '-' or not filename:
1165 return filename
1166
21cd8fae 1167 return self.get_output_path(dir_type, filename)
0202b52a 1168
120fe513 1169 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1170 """ Returns None if the file should be downloaded """
8222d8de 1171
c77495e3 1172 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1173
8b0d7497 1174 def check_filter():
8b0d7497 1175 if 'title' in info_dict:
1176 # This can happen when we're just evaluating the playlist
1177 title = info_dict['title']
1178 matchtitle = self.params.get('matchtitle', False)
1179 if matchtitle:
1180 if not re.search(matchtitle, title, re.IGNORECASE):
1181 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1182 rejecttitle = self.params.get('rejecttitle', False)
1183 if rejecttitle:
1184 if re.search(rejecttitle, title, re.IGNORECASE):
1185 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1186 date = info_dict.get('upload_date')
1187 if date is not None:
1188 dateRange = self.params.get('daterange', DateRange())
1189 if date not in dateRange:
1190 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1191 view_count = info_dict.get('view_count')
1192 if view_count is not None:
1193 min_views = self.params.get('min_views')
1194 if min_views is not None and view_count < min_views:
1195 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1196 max_views = self.params.get('max_views')
1197 if max_views is not None and view_count > max_views:
1198 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1199 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1200 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1201
8f18aca8 1202 match_filter = self.params.get('match_filter')
1203 if match_filter is not None:
1204 try:
1205 ret = match_filter(info_dict, incomplete=incomplete)
1206 except TypeError:
1207 # For backward compatibility
1208 ret = None if incomplete else match_filter(info_dict)
1209 if ret is not None:
1210 return ret
8b0d7497 1211 return None
1212
c77495e3 1213 if self.in_download_archive(info_dict):
1214 reason = '%s has already been recorded in the archive' % video_title
1215 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1216 else:
1217 reason = check_filter()
1218 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1219 if reason is not None:
120fe513 1220 if not silent:
1221 self.to_screen('[download] ' + reason)
c77495e3 1222 if self.params.get(break_opt, False):
1223 raise break_err()
8b0d7497 1224 return reason
fe7e0c98 1225
b6c45014
JMF
1226 @staticmethod
1227 def add_extra_info(info_dict, extra_info):
1228 '''Set the keys from extra_info in info dict if they are missing'''
1229 for key, value in extra_info.items():
1230 info_dict.setdefault(key, value)
1231
409e1828 1232 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1233 process=True, force_generic_extractor=False):
41d1cca3 1234 """
1235 Return a list with a dictionary for each video extracted.
1236
1237 Arguments:
1238 url -- URL to extract
1239
1240 Keyword arguments:
1241 download -- whether to download videos during extraction
1242 ie_key -- extractor key hint
1243 extra_info -- dictionary containing the extra values to add to each result
1244 process -- whether to resolve all unresolved references (URLs, playlist items),
1245 must be True for download to work.
1246 force_generic_extractor -- force using the generic extractor
1247 """
fe7e0c98 1248
409e1828 1249 if extra_info is None:
1250 extra_info = {}
1251
61aa5ba3 1252 if not ie_key and force_generic_extractor:
d22dec74
S
1253 ie_key = 'Generic'
1254
8222d8de 1255 if ie_key:
8b7491c8 1256 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1257 else:
1258 ies = self._ies
1259
8b7491c8 1260 for ie_key, ie in ies.items():
8222d8de
JMF
1261 if not ie.suitable(url):
1262 continue
1263
1264 if not ie.working():
6febd1c1
PH
1265 self.report_warning('The program functionality for this site has been marked as broken, '
1266 'and will probably not work.')
8222d8de 1267
1151c407 1268 temp_id = ie.get_temp_id(url)
a0566bbf 1269 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1270 self.to_screen("[%s] %s: has already been recorded in archive" % (
1271 ie_key, temp_id))
1272 break
8b7491c8 1273 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1274 else:
1275 self.report_error('no suitable InfoExtractor for URL %s' % url)
1276
8e5fecc8 1277 def __handle_extraction_exceptions(func):
b5ae35ee 1278 @functools.wraps(func)
a0566bbf 1279 def wrapper(self, *args, **kwargs):
1280 try:
1281 return func(self, *args, **kwargs)
773f291d
S
1282 except GeoRestrictedError as e:
1283 msg = e.msg
1284 if e.countries:
1285 msg += '\nThis video is available in %s.' % ', '.join(
1286 map(ISO3166Utils.short2full, e.countries))
1287 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1288 self.report_error(msg)
fb043a6e 1289 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1290 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1291 except ThrottledDownload:
1292 self.to_stderr('\r')
1293 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1294 return wrapper(self, *args, **kwargs)
8e5fecc8 1295 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1296 raise
8222d8de 1297 except Exception as e:
b1940459 1298 if self.params.get('ignoreerrors'):
9b9c5355 1299 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1300 else:
1301 raise
a0566bbf 1302 return wrapper
1303
1304 @__handle_extraction_exceptions
58f197b7 1305 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1306 ie_result = ie.extract(url)
1307 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1308 return
1309 if isinstance(ie_result, list):
1310 # Backwards compatibility: old IE result format
1311 ie_result = {
1312 '_type': 'compat_list',
1313 'entries': ie_result,
1314 }
e37d0efb 1315 if extra_info.get('original_url'):
1316 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1317 self.add_default_extra_info(ie_result, ie, url)
1318 if process:
1319 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1320 else:
a0566bbf 1321 return ie_result
fe7e0c98 1322
ea38e55f 1323 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1324 if url is not None:
1325 self.add_extra_info(ie_result, {
1326 'webpage_url': url,
1327 'original_url': url,
1328 'webpage_url_basename': url_basename(url),
1329 })
1330 if ie is not None:
1331 self.add_extra_info(ie_result, {
1332 'extractor': ie.IE_NAME,
1333 'extractor_key': ie.ie_key(),
1334 })
ea38e55f 1335
58adec46 1336 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1337 """
1338 Take the result of the ie(may be modified) and resolve all unresolved
1339 references (URLs, playlist items).
1340
1341 It will also download the videos if 'download'.
1342 Returns the resolved ie_result.
1343 """
58adec46 1344 if extra_info is None:
1345 extra_info = {}
e8ee972c
PH
1346 result_type = ie_result.get('_type', 'video')
1347
057a5206 1348 if result_type in ('url', 'url_transparent'):
134c6ea8 1349 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1350 if ie_result.get('original_url'):
1351 extra_info.setdefault('original_url', ie_result['original_url'])
1352
057a5206 1353 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1354 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1355 or extract_flat is True):
ecb54191 1356 info_copy = ie_result.copy()
6033d980 1357 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1358 if ie and not ie_result.get('id'):
4614bc22 1359 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1360 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1361 self.add_extra_info(info_copy, extra_info)
ecb54191 1362 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1363 if self.params.get('force_write_download_archive', False):
1364 self.record_download_archive(info_copy)
e8ee972c
PH
1365 return ie_result
1366
8222d8de 1367 if result_type == 'video':
b6c45014 1368 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1369 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1370 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1371 if additional_urls:
e9f4ccd1 1372 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1373 if isinstance(additional_urls, compat_str):
1374 additional_urls = [additional_urls]
1375 self.to_screen(
1376 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1377 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1378 ie_result['additional_entries'] = [
1379 self.extract_info(
1380 url, download, extra_info,
1381 force_generic_extractor=self.params.get('force_generic_extractor'))
1382 for url in additional_urls
1383 ]
1384 return ie_result
8222d8de
JMF
1385 elif result_type == 'url':
1386 # We have to add extra_info to the results because it may be
1387 # contained in a playlist
07cce701 1388 return self.extract_info(
1389 ie_result['url'], download,
1390 ie_key=ie_result.get('ie_key'),
1391 extra_info=extra_info)
7fc3fa05
PH
1392 elif result_type == 'url_transparent':
1393 # Use the information from the embedding page
1394 info = self.extract_info(
1395 ie_result['url'], ie_key=ie_result.get('ie_key'),
1396 extra_info=extra_info, download=False, process=False)
1397
1640eb09
S
1398 # extract_info may return None when ignoreerrors is enabled and
1399 # extraction failed with an error, don't crash and return early
1400 # in this case
1401 if not info:
1402 return info
1403
412c617d
PH
1404 force_properties = dict(
1405 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1406 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1407 if f in force_properties:
1408 del force_properties[f]
1409 new_result = info.copy()
1410 new_result.update(force_properties)
7fc3fa05 1411
0563f7ac
S
1412 # Extracted info may not be a video result (i.e.
1413 # info.get('_type', 'video') != video) but rather an url or
1414 # url_transparent. In such cases outer metadata (from ie_result)
1415 # should be propagated to inner one (info). For this to happen
1416 # _type of info should be overridden with url_transparent. This
067aa17e 1417 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1418 if new_result.get('_type') == 'url':
1419 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1420
1421 return self.process_ie_result(
1422 new_result, download=download, extra_info=extra_info)
40fcba5e 1423 elif result_type in ('playlist', 'multi_video'):
30a074c2 1424 # Protect from infinite recursion due to recursively nested playlists
1425 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1426 webpage_url = ie_result['webpage_url']
1427 if webpage_url in self._playlist_urls:
7e85e872 1428 self.to_screen(
30a074c2 1429 '[download] Skipping already downloaded playlist: %s'
1430 % ie_result.get('title') or ie_result.get('id'))
1431 return
7e85e872 1432
30a074c2 1433 self._playlist_level += 1
1434 self._playlist_urls.add(webpage_url)
bc516a3f 1435 self._sanitize_thumbnails(ie_result)
30a074c2 1436 try:
1437 return self.__process_playlist(ie_result, download)
1438 finally:
1439 self._playlist_level -= 1
1440 if not self._playlist_level:
1441 self._playlist_urls.clear()
8222d8de 1442 elif result_type == 'compat_list':
c9bf4114
PH
1443 self.report_warning(
1444 'Extractor %s returned a compat_list result. '
1445 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1446
8222d8de 1447 def _fixup(r):
b868936c 1448 self.add_extra_info(r, {
1449 'extractor': ie_result['extractor'],
1450 'webpage_url': ie_result['webpage_url'],
1451 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1452 'extractor_key': ie_result['extractor_key'],
1453 })
8222d8de
JMF
1454 return r
1455 ie_result['entries'] = [
b6c45014 1456 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1457 for r in ie_result['entries']
1458 ]
1459 return ie_result
1460 else:
1461 raise Exception('Invalid result type: %s' % result_type)
1462
e92caff5 1463 def _ensure_dir_exists(self, path):
1464 return make_dir(path, self.report_error)
1465
30a074c2 1466 def __process_playlist(self, ie_result, download):
1467 # We process each entry in the playlist
1468 playlist = ie_result.get('title') or ie_result.get('id')
1469 self.to_screen('[download] Downloading playlist: %s' % playlist)
1470
498f5606 1471 if 'entries' not in ie_result:
1472 raise EntryNotInPlaylist()
1473 incomplete_entries = bool(ie_result.get('requested_entries'))
1474 if incomplete_entries:
1475 def fill_missing_entries(entries, indexes):
1476 ret = [None] * max(*indexes)
1477 for i, entry in zip(indexes, entries):
1478 ret[i - 1] = entry
1479 return ret
1480 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1481
30a074c2 1482 playlist_results = []
1483
56a8fb4f 1484 playliststart = self.params.get('playliststart', 1)
30a074c2 1485 playlistend = self.params.get('playlistend')
1486 # For backwards compatibility, interpret -1 as whole list
1487 if playlistend == -1:
1488 playlistend = None
1489
1490 playlistitems_str = self.params.get('playlist_items')
1491 playlistitems = None
1492 if playlistitems_str is not None:
1493 def iter_playlistitems(format):
1494 for string_segment in format.split(','):
1495 if '-' in string_segment:
1496 start, end = string_segment.split('-')
1497 for item in range(int(start), int(end) + 1):
1498 yield int(item)
1499 else:
1500 yield int(string_segment)
1501 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1502
1503 ie_entries = ie_result['entries']
56a8fb4f 1504 msg = (
1505 'Downloading %d videos' if not isinstance(ie_entries, list)
1506 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1507
1508 if isinstance(ie_entries, list):
1509 def get_entry(i):
1510 return ie_entries[i - 1]
1511 else:
1512 if not isinstance(ie_entries, PagedList):
1513 ie_entries = LazyList(ie_entries)
1514
1515 def get_entry(i):
1516 return YoutubeDL.__handle_extraction_exceptions(
1517 lambda self, i: ie_entries[i - 1]
1518 )(self, i)
50fed816 1519
56a8fb4f 1520 entries = []
ff1c7fc9 1521 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1522 for i in items:
1523 if i == 0:
1524 continue
56a8fb4f 1525 if playlistitems is None and playlistend is not None and playlistend < i:
1526 break
1527 entry = None
1528 try:
50fed816 1529 entry = get_entry(i)
56a8fb4f 1530 if entry is None:
498f5606 1531 raise EntryNotInPlaylist()
56a8fb4f 1532 except (IndexError, EntryNotInPlaylist):
1533 if incomplete_entries:
1534 raise EntryNotInPlaylist()
1535 elif not playlistitems:
1536 break
1537 entries.append(entry)
120fe513 1538 try:
1539 if entry is not None:
1540 self._match_entry(entry, incomplete=True, silent=True)
1541 except (ExistingVideoReached, RejectedVideoReached):
1542 break
56a8fb4f 1543 ie_result['entries'] = entries
30a074c2 1544
56a8fb4f 1545 # Save playlist_index before re-ordering
1546 entries = [
9e598870 1547 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1548 for i, entry in enumerate(entries, 1)
1549 if entry is not None]
1550 n_entries = len(entries)
498f5606 1551
498f5606 1552 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1553 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1554 ie_result['requested_entries'] = playlistitems
1555
1556 if self.params.get('allow_playlist_files', True):
1557 ie_copy = {
1558 'playlist': playlist,
1559 'playlist_id': ie_result.get('id'),
1560 'playlist_title': ie_result.get('title'),
1561 'playlist_uploader': ie_result.get('uploader'),
1562 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1563 'playlist_index': 0,
498f5606 1564 }
1565 ie_copy.update(dict(ie_result))
1566
80c03fa9 1567 if self._write_info_json('playlist', ie_result,
1568 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1569 return
1570 if self._write_description('playlist', ie_result,
1571 self.prepare_filename(ie_copy, 'pl_description')) is None:
1572 return
681de68e 1573 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1574 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1575
1576 if self.params.get('playlistreverse', False):
1577 entries = entries[::-1]
30a074c2 1578 if self.params.get('playlistrandom', False):
1579 random.shuffle(entries)
1580
1581 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1582
56a8fb4f 1583 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1584 failures = 0
1585 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1586 for i, entry_tuple in enumerate(entries, 1):
1587 playlist_index, entry = entry_tuple
81139999 1588 if 'playlist-index' in self.params.get('compat_opts', []):
1589 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1590 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1591 # This __x_forwarded_for_ip thing is a bit ugly but requires
1592 # minimal changes
1593 if x_forwarded_for:
1594 entry['__x_forwarded_for_ip'] = x_forwarded_for
1595 extra = {
1596 'n_entries': n_entries,
f59ae581 1597 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1598 'playlist_index': playlist_index,
1599 'playlist_autonumber': i,
30a074c2 1600 'playlist': playlist,
1601 'playlist_id': ie_result.get('id'),
1602 'playlist_title': ie_result.get('title'),
1603 'playlist_uploader': ie_result.get('uploader'),
1604 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1605 'extractor': ie_result['extractor'],
1606 'webpage_url': ie_result['webpage_url'],
1607 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1608 'extractor_key': ie_result['extractor_key'],
1609 }
1610
1611 if self._match_entry(entry, incomplete=True) is not None:
1612 continue
1613
1614 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1615 if not entry_result:
1616 failures += 1
1617 if failures >= max_failures:
1618 self.report_error(
1619 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1620 break
30a074c2 1621 # TODO: skip failed (empty) entries?
1622 playlist_results.append(entry_result)
1623 ie_result['entries'] = playlist_results
1624 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1625 return ie_result
1626
a0566bbf 1627 @__handle_extraction_exceptions
1628 def __process_iterable_entry(self, entry, download, extra_info):
1629 return self.process_ie_result(
1630 entry, download=download, extra_info=extra_info)
1631
67134eab
JMF
1632 def _build_format_filter(self, filter_spec):
1633 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1634
1635 OPERATORS = {
1636 '<': operator.lt,
1637 '<=': operator.le,
1638 '>': operator.gt,
1639 '>=': operator.ge,
1640 '=': operator.eq,
1641 '!=': operator.ne,
1642 }
67134eab 1643 operator_rex = re.compile(r'''(?x)\s*
187986a8 1644 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1647 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1648 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1649 if m:
1650 try:
1651 comparison_value = int(m.group('value'))
1652 except ValueError:
1653 comparison_value = parse_filesize(m.group('value'))
1654 if comparison_value is None:
1655 comparison_value = parse_filesize(m.group('value') + 'B')
1656 if comparison_value is None:
1657 raise ValueError(
1658 'Invalid value %r in format specification %r' % (
67134eab 1659 m.group('value'), filter_spec))
9ddb6925
S
1660 op = OPERATORS[m.group('op')]
1661
083c9df9 1662 if not m:
9ddb6925
S
1663 STR_OPERATORS = {
1664 '=': operator.eq,
10d33b34
YCH
1665 '^=': lambda attr, value: attr.startswith(value),
1666 '$=': lambda attr, value: attr.endswith(value),
1667 '*=': lambda attr, value: value in attr,
9ddb6925 1668 }
187986a8 1669 str_operator_rex = re.compile(r'''(?x)\s*
1670 (?P<key>[a-zA-Z0-9._-]+)\s*
1671 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1673 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1674 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1675 if m:
1676 comparison_value = m.group('value')
2cc779f4
S
1677 str_op = STR_OPERATORS[m.group('op')]
1678 if m.group('negation'):
e118a879 1679 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1680 else:
1681 op = str_op
083c9df9 1682
9ddb6925 1683 if not m:
187986a8 1684 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1685
1686 def _filter(f):
1687 actual_value = f.get(m.group('key'))
1688 if actual_value is None:
1689 return m.group('none_inclusive')
1690 return op(actual_value, comparison_value)
67134eab
JMF
1691 return _filter
1692
0017d9ad 1693 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1694
af0f7428
S
1695 def can_merge():
1696 merger = FFmpegMergerPP(self)
1697 return merger.available and merger.can_merge()
1698
91ebc640 1699 prefer_best = (
b7b04c78 1700 not self.params.get('simulate')
91ebc640 1701 and download
1702 and (
1703 not can_merge()
19807826 1704 or info_dict.get('is_live', False)
de6000d9 1705 or self.outtmpl_dict['default'] == '-'))
53ed7066 1706 compat = (
1707 prefer_best
1708 or self.params.get('allow_multiple_audio_streams', False)
1709 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1710
1711 return (
53ed7066 1712 'best/bestvideo+bestaudio' if prefer_best
1713 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1714 else 'bestvideo+bestaudio/best')
0017d9ad 1715
67134eab
JMF
1716 def build_format_selector(self, format_spec):
1717 def syntax_error(note, start):
1718 message = (
1719 'Invalid format specification: '
1720 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1721 return SyntaxError(message)
1722
1723 PICKFIRST = 'PICKFIRST'
1724 MERGE = 'MERGE'
1725 SINGLE = 'SINGLE'
0130afb7 1726 GROUP = 'GROUP'
67134eab
JMF
1727 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1728
91ebc640 1729 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1730 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1731
e8e73840 1732 check_formats = self.params.get('check_formats')
1733
67134eab
JMF
1734 def _parse_filter(tokens):
1735 filter_parts = []
1736 for type, string, start, _, _ in tokens:
1737 if type == tokenize.OP and string == ']':
1738 return ''.join(filter_parts)
1739 else:
1740 filter_parts.append(string)
1741
232541df 1742 def _remove_unused_ops(tokens):
17cc1534 1743 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1744 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1746 last_string, last_start, last_end, last_line = None, None, None, None
1747 for type, string, start, end, line in tokens:
1748 if type == tokenize.OP and string == '[':
1749 if last_string:
1750 yield tokenize.NAME, last_string, last_start, last_end, last_line
1751 last_string = None
1752 yield type, string, start, end, line
1753 # everything inside brackets will be handled by _parse_filter
1754 for type, string, start, end, line in tokens:
1755 yield type, string, start, end, line
1756 if type == tokenize.OP and string == ']':
1757 break
1758 elif type == tokenize.OP and string in ALLOWED_OPS:
1759 if last_string:
1760 yield tokenize.NAME, last_string, last_start, last_end, last_line
1761 last_string = None
1762 yield type, string, start, end, line
1763 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1764 if not last_string:
1765 last_string = string
1766 last_start = start
1767 last_end = end
1768 else:
1769 last_string += string
1770 if last_string:
1771 yield tokenize.NAME, last_string, last_start, last_end, last_line
1772
cf2ac6df 1773 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1774 selectors = []
1775 current_selector = None
1776 for type, string, start, _, _ in tokens:
1777 # ENCODING is only defined in python 3.x
1778 if type == getattr(tokenize, 'ENCODING', None):
1779 continue
1780 elif type in [tokenize.NAME, tokenize.NUMBER]:
1781 current_selector = FormatSelector(SINGLE, string, [])
1782 elif type == tokenize.OP:
cf2ac6df
JMF
1783 if string == ')':
1784 if not inside_group:
1785 # ')' will be handled by the parentheses group
1786 tokens.restore_last_token()
67134eab 1787 break
cf2ac6df 1788 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1789 tokens.restore_last_token()
1790 break
cf2ac6df
JMF
1791 elif inside_choice and string == ',':
1792 tokens.restore_last_token()
1793 break
1794 elif string == ',':
0a31a350
JMF
1795 if not current_selector:
1796 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1797 selectors.append(current_selector)
1798 current_selector = None
1799 elif string == '/':
d96d604e
JMF
1800 if not current_selector:
1801 raise syntax_error('"/" must follow a format selector', start)
67134eab 1802 first_choice = current_selector
cf2ac6df 1803 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1804 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1805 elif string == '[':
1806 if not current_selector:
1807 current_selector = FormatSelector(SINGLE, 'best', [])
1808 format_filter = _parse_filter(tokens)
1809 current_selector.filters.append(format_filter)
0130afb7
JMF
1810 elif string == '(':
1811 if current_selector:
1812 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1813 group = _parse_format_selection(tokens, inside_group=True)
1814 current_selector = FormatSelector(GROUP, group, [])
67134eab 1815 elif string == '+':
d03cfdce 1816 if not current_selector:
1817 raise syntax_error('Unexpected "+"', start)
1818 selector_1 = current_selector
1819 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1820 if not selector_2:
1821 raise syntax_error('Expected a selector', start)
1822 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1823 else:
1824 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1825 elif type == tokenize.ENDMARKER:
1826 break
1827 if current_selector:
1828 selectors.append(current_selector)
1829 return selectors
1830
f8d4ad9a 1831 def _merge(formats_pair):
1832 format_1, format_2 = formats_pair
1833
1834 formats_info = []
1835 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1836 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1837
1838 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1839 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1840 for (i, fmt_info) in enumerate(formats_info):
551f9388 1841 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1842 formats_info.pop(i)
1843 continue
1844 for aud_vid in ['audio', 'video']:
f8d4ad9a 1845 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1846 if get_no_more[aud_vid]:
1847 formats_info.pop(i)
f5510afe 1848 break
f8d4ad9a 1849 get_no_more[aud_vid] = True
1850
1851 if len(formats_info) == 1:
1852 return formats_info[0]
1853
1854 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1855 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1856
1857 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1858 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1859
1860 output_ext = self.params.get('merge_output_format')
1861 if not output_ext:
1862 if the_only_video:
1863 output_ext = the_only_video['ext']
1864 elif the_only_audio and not video_fmts:
1865 output_ext = the_only_audio['ext']
1866 else:
1867 output_ext = 'mkv'
1868
975a0d0d 1869 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1870
f8d4ad9a 1871 new_dict = {
1872 'requested_formats': formats_info,
975a0d0d 1873 'format': '+'.join(filtered('format')),
1874 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 1875 'ext': output_ext,
975a0d0d 1876 'protocol': '+'.join(map(determine_protocol, formats_info)),
1877 'language': '+'.join(orderedSet(filtered('language'))),
1878 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 1881 }
1882
1883 if the_only_video:
1884 new_dict.update({
1885 'width': the_only_video.get('width'),
1886 'height': the_only_video.get('height'),
1887 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1888 'fps': the_only_video.get('fps'),
1889 'vcodec': the_only_video.get('vcodec'),
1890 'vbr': the_only_video.get('vbr'),
1891 'stretched_ratio': the_only_video.get('stretched_ratio'),
1892 })
1893
1894 if the_only_audio:
1895 new_dict.update({
1896 'acodec': the_only_audio.get('acodec'),
1897 'abr': the_only_audio.get('abr'),
975a0d0d 1898 'asr': the_only_audio.get('asr'),
f8d4ad9a 1899 })
1900
1901 return new_dict
1902
e8e73840 1903 def _check_formats(formats):
981052c9 1904 if not check_formats:
1905 yield from formats
b5ac45b1 1906 return
e8e73840 1907 for f in formats:
1908 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1909 temp_file = tempfile.NamedTemporaryFile(
1910 suffix='.tmp', delete=False,
1911 dir=self.get_output_path('temp') or None)
1912 temp_file.close()
fe346461 1913 try:
981052c9 1914 success, _ = self.dl(temp_file.name, f, test=True)
1915 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1916 success = False
fe346461 1917 finally:
21cd8fae 1918 if os.path.exists(temp_file.name):
1919 try:
1920 os.remove(temp_file.name)
1921 except OSError:
1922 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1923 if success:
e8e73840 1924 yield f
1925 else:
1926 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1927
67134eab 1928 def _build_selector_function(selector):
909d24dd 1929 if isinstance(selector, list): # ,
67134eab
JMF
1930 fs = [_build_selector_function(s) for s in selector]
1931
317f7ab6 1932 def selector_function(ctx):
67134eab 1933 for f in fs:
981052c9 1934 yield from f(ctx)
67134eab 1935 return selector_function
909d24dd 1936
1937 elif selector.type == GROUP: # ()
0130afb7 1938 selector_function = _build_selector_function(selector.selector)
909d24dd 1939
1940 elif selector.type == PICKFIRST: # /
67134eab
JMF
1941 fs = [_build_selector_function(s) for s in selector.selector]
1942
317f7ab6 1943 def selector_function(ctx):
67134eab 1944 for f in fs:
317f7ab6 1945 picked_formats = list(f(ctx))
67134eab
JMF
1946 if picked_formats:
1947 return picked_formats
1948 return []
67134eab 1949
981052c9 1950 elif selector.type == MERGE: # +
1951 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1952
1953 def selector_function(ctx):
1954 for pair in itertools.product(
1955 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1956 yield _merge(pair)
1957
909d24dd 1958 elif selector.type == SINGLE: # atom
598d185d 1959 format_spec = selector.selector or 'best'
909d24dd 1960
f8d4ad9a 1961 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1962 if format_spec == 'all':
1963 def selector_function(ctx):
981052c9 1964 yield from _check_formats(ctx['formats'])
f8d4ad9a 1965 elif format_spec == 'mergeall':
1966 def selector_function(ctx):
981052c9 1967 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1968 if not formats:
1969 return
921b76ca 1970 merged_format = formats[-1]
1971 for f in formats[-2::-1]:
f8d4ad9a 1972 merged_format = _merge((merged_format, f))
1973 yield merged_format
909d24dd 1974
1975 else:
e8e73840 1976 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1977 mobj = re.match(
1978 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1979 format_spec)
1980 if mobj is not None:
1981 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1982 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1983 format_type = (mobj.group('type') or [None])[0]
1984 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1985 format_modified = mobj.group('mod') is not None
909d24dd 1986
1987 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1988 _filter_f = (
eff63539 1989 (lambda f: f.get('%scodec' % format_type) != 'none')
1990 if format_type and format_modified # bv*, ba*, wv*, wa*
1991 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1992 if format_type # bv, ba, wv, wa
1993 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1994 if not format_modified # b, w
8326b00a 1995 else lambda f: True) # b*, w*
1996 filter_f = lambda f: _filter_f(f) and (
1997 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1998 else:
48ee10ee 1999 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2000 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2001 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2002 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2003 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2004 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2005 else:
b5ae35ee 2006 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2007
2008 def selector_function(ctx):
2009 formats = list(ctx['formats'])
909d24dd 2010 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2011 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2012 # for extractors with incomplete formats (audio only (soundcloud)
2013 # or video only (imgur)) best/worst will fallback to
2014 # best/worst {video,audio}-only format
e8e73840 2015 matches = formats
981052c9 2016 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2017 try:
e8e73840 2018 yield matches[format_idx - 1]
981052c9 2019 except IndexError:
2020 return
083c9df9 2021
67134eab 2022 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2023
317f7ab6
S
2024 def final_selector(ctx):
2025 ctx_copy = copy.deepcopy(ctx)
67134eab 2026 for _filter in filters:
317f7ab6
S
2027 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2028 return selector_function(ctx_copy)
67134eab 2029 return final_selector
083c9df9 2030
67134eab 2031 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2032 try:
232541df 2033 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2034 except tokenize.TokenError:
2035 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2036
2037 class TokenIterator(object):
2038 def __init__(self, tokens):
2039 self.tokens = tokens
2040 self.counter = 0
2041
2042 def __iter__(self):
2043 return self
2044
2045 def __next__(self):
2046 if self.counter >= len(self.tokens):
2047 raise StopIteration()
2048 value = self.tokens[self.counter]
2049 self.counter += 1
2050 return value
2051
2052 next = __next__
2053
2054 def restore_last_token(self):
2055 self.counter -= 1
2056
2057 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2058 return _build_selector_function(parsed_selector)
a9c58ad9 2059
e5660ee6
JMF
2060 def _calc_headers(self, info_dict):
2061 res = std_headers.copy()
2062
2063 add_headers = info_dict.get('http_headers')
2064 if add_headers:
2065 res.update(add_headers)
2066
2067 cookies = self._calc_cookies(info_dict)
2068 if cookies:
2069 res['Cookie'] = cookies
2070
0016b84e
S
2071 if 'X-Forwarded-For' not in res:
2072 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2073 if x_forwarded_for_ip:
2074 res['X-Forwarded-For'] = x_forwarded_for_ip
2075
e5660ee6
JMF
2076 return res
2077
2078 def _calc_cookies(self, info_dict):
5c2266df 2079 pr = sanitized_Request(info_dict['url'])
e5660ee6 2080 self.cookiejar.add_cookie_header(pr)
662435f7 2081 return pr.get_header('Cookie')
e5660ee6 2082
b0249bca 2083 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2084 thumbnails = info_dict.get('thumbnails')
2085 if thumbnails is None:
2086 thumbnail = info_dict.get('thumbnail')
2087 if thumbnail:
2088 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2089 if thumbnails:
2090 thumbnails.sort(key=lambda t: (
2091 t.get('preference') if t.get('preference') is not None else -1,
2092 t.get('width') if t.get('width') is not None else -1,
2093 t.get('height') if t.get('height') is not None else -1,
2094 t.get('id') if t.get('id') is not None else '',
2095 t.get('url')))
b0249bca 2096
0ba692ac 2097 def thumbnail_tester():
0ba692ac 2098 def test_thumbnail(t):
e820fbaa 2099 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
0ba692ac 2100 try:
2101 self.urlopen(HEADRequest(t['url']))
2102 except network_exceptions as err:
e820fbaa 2103 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
0ba692ac 2104 return False
2105 return True
0ba692ac 2106 return test_thumbnail
b0249bca 2107
bc516a3f 2108 for i, t in enumerate(thumbnails):
bc516a3f 2109 if t.get('id') is None:
2110 t['id'] = '%d' % i
b0249bca 2111 if t.get('width') and t.get('height'):
2112 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2113 t['url'] = sanitize_url(t['url'])
0ba692ac 2114
e820fbaa 2115 if self.params.get('check_formats'):
0ba692ac 2116 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2117 else:
2118 info_dict['thumbnails'] = thumbnails
bc516a3f 2119
dd82ffea
JMF
2120 def process_video_result(self, info_dict, download=True):
2121 assert info_dict.get('_type', 'video') == 'video'
2122
bec1fad2
PH
2123 if 'id' not in info_dict:
2124 raise ExtractorError('Missing "id" field in extractor result')
2125 if 'title' not in info_dict:
1151c407 2126 raise ExtractorError('Missing "title" field in extractor result',
2127 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2128
c9969434
S
2129 def report_force_conversion(field, field_not, conversion):
2130 self.report_warning(
2131 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2132 % (field, field_not, conversion))
2133
2134 def sanitize_string_field(info, string_field):
2135 field = info.get(string_field)
2136 if field is None or isinstance(field, compat_str):
2137 return
2138 report_force_conversion(string_field, 'a string', 'string')
2139 info[string_field] = compat_str(field)
2140
2141 def sanitize_numeric_fields(info):
2142 for numeric_field in self._NUMERIC_FIELDS:
2143 field = info.get(numeric_field)
2144 if field is None or isinstance(field, compat_numeric_types):
2145 continue
2146 report_force_conversion(numeric_field, 'numeric', 'int')
2147 info[numeric_field] = int_or_none(field)
2148
2149 sanitize_string_field(info_dict, 'id')
2150 sanitize_numeric_fields(info_dict)
be6217b2 2151
dd82ffea
JMF
2152 if 'playlist' not in info_dict:
2153 # It isn't part of a playlist
2154 info_dict['playlist'] = None
2155 info_dict['playlist_index'] = None
2156
bc516a3f 2157 self._sanitize_thumbnails(info_dict)
d5519808 2158
536a55da 2159 thumbnail = info_dict.get('thumbnail')
bc516a3f 2160 thumbnails = info_dict.get('thumbnails')
536a55da
S
2161 if thumbnail:
2162 info_dict['thumbnail'] = sanitize_url(thumbnail)
2163 elif thumbnails:
d5519808
PH
2164 info_dict['thumbnail'] = thumbnails[-1]['url']
2165
ae30b840 2166 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2167 info_dict['display_id'] = info_dict['id']
2168
239df021 2169 if info_dict.get('duration') is not None:
2170 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2171
10db0d2f 2172 for ts_key, date_key in (
2173 ('timestamp', 'upload_date'),
2174 ('release_timestamp', 'release_date'),
2175 ):
2176 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2177 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2178 # see http://bugs.python.org/issue1646728)
2179 try:
2180 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2181 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2182 except (ValueError, OverflowError, OSError):
2183 pass
9d2ecdbc 2184
ae30b840 2185 live_keys = ('is_live', 'was_live')
2186 live_status = info_dict.get('live_status')
2187 if live_status is None:
2188 for key in live_keys:
2189 if info_dict.get(key) is False:
2190 continue
2191 if info_dict.get(key):
2192 live_status = key
2193 break
2194 if all(info_dict.get(key) is False for key in live_keys):
2195 live_status = 'not_live'
2196 if live_status:
2197 info_dict['live_status'] = live_status
2198 for key in live_keys:
2199 if info_dict.get(key) is None:
2200 info_dict[key] = (live_status == key)
2201
33d2fc2f
S
2202 # Auto generate title fields corresponding to the *_number fields when missing
2203 # in order to always have clean titles. This is very common for TV series.
2204 for field in ('chapter', 'season', 'episode'):
2205 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2206 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2207
05108a49
S
2208 for cc_kind in ('subtitles', 'automatic_captions'):
2209 cc = info_dict.get(cc_kind)
2210 if cc:
2211 for _, subtitle in cc.items():
2212 for subtitle_format in subtitle:
2213 if subtitle_format.get('url'):
2214 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2215 if subtitle_format.get('ext') is None:
2216 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2217
2218 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2219 subtitles = info_dict.get('subtitles')
4bba3716 2220
360e1ca5 2221 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2222 info_dict['id'], subtitles, automatic_captions)
a504ced0 2223
dd82ffea
JMF
2224 # We now pick which formats have to be downloaded
2225 if info_dict.get('formats') is None:
2226 # There's only one format available
2227 formats = [info_dict]
2228 else:
2229 formats = info_dict['formats']
2230
e0493e90 2231 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2232 if not self.params.get('allow_unplayable_formats'):
2233 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2234
db95dc13 2235 if not formats:
1151c407 2236 self.raise_no_formats(info_dict)
db95dc13 2237
73af5cc8
S
2238 def is_wellformed(f):
2239 url = f.get('url')
a5ac0c47 2240 if not url:
73af5cc8
S
2241 self.report_warning(
2242 '"url" field is missing or empty - skipping format, '
2243 'there is an error in extractor')
a5ac0c47
S
2244 return False
2245 if isinstance(url, bytes):
2246 sanitize_string_field(f, 'url')
2247 return True
73af5cc8
S
2248
2249 # Filter out malformed formats for better extraction robustness
2250 formats = list(filter(is_wellformed, formats))
2251
181c7053
S
2252 formats_dict = {}
2253
dd82ffea 2254 # We check that all the formats have the format and format_id fields
db95dc13 2255 for i, format in enumerate(formats):
c9969434
S
2256 sanitize_string_field(format, 'format_id')
2257 sanitize_numeric_fields(format)
dcf77cf1 2258 format['url'] = sanitize_url(format['url'])
e74e3b63 2259 if not format.get('format_id'):
8016c922 2260 format['format_id'] = compat_str(i)
e2effb08
S
2261 else:
2262 # Sanitize format_id from characters used in format selector expression
ec85ded8 2263 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2264 format_id = format['format_id']
2265 if format_id not in formats_dict:
2266 formats_dict[format_id] = []
2267 formats_dict[format_id].append(format)
2268
2269 # Make sure all formats have unique format_id
03b4de72 2270 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2271 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2272 ambigious_id = len(ambiguous_formats) > 1
2273 for i, format in enumerate(ambiguous_formats):
2274 if ambigious_id:
181c7053 2275 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2276 if format.get('ext') is None:
2277 format['ext'] = determine_ext(format['url']).lower()
2278 # Ensure there is no conflict between id and ext in format selection
2279 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2280 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2281 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2282
2283 for i, format in enumerate(formats):
8c51aa65 2284 if format.get('format') is None:
6febd1c1 2285 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2286 id=format['format_id'],
2287 res=self.format_resolution(format),
b868936c 2288 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2289 )
6f0be937 2290 if format.get('protocol') is None:
b5559424 2291 format['protocol'] = determine_protocol(format)
239df021 2292 if format.get('resolution') is None:
2293 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2294 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2295 format['dynamic_range'] = 'SDR'
e5660ee6
JMF
2296 # Add HTTP headers, so that external programs can use them from the
2297 # json output
2298 full_format_info = info_dict.copy()
2299 full_format_info.update(format)
2300 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2301 # Remove private housekeeping stuff
2302 if '__x_forwarded_for_ip' in info_dict:
2303 del info_dict['__x_forwarded_for_ip']
dd82ffea 2304
4bcc7bd1 2305 # TODO Central sorting goes here
99e206d5 2306
88acdbc2 2307 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2308 # only set the 'formats' fields if the original info_dict list them
2309 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2310 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2311 # which can't be exported to json
b3d9ef88 2312 info_dict['formats'] = formats
4ec82a72 2313
2314 info_dict, _ = self.pre_process(info_dict)
2315
b7b04c78 2316 if self.params.get('list_thumbnails'):
2317 self.list_thumbnails(info_dict)
2318 if self.params.get('listformats'):
86c66b2d 2319 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2320 self.to_screen('%s has no formats' % info_dict['id'])
2321 else:
2322 self.list_formats(info_dict)
b7b04c78 2323 if self.params.get('listsubtitles'):
2324 if 'automatic_captions' in info_dict:
2325 self.list_subtitles(
2326 info_dict['id'], automatic_captions, 'automatic captions')
2327 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2328 list_only = self.params.get('simulate') is None and (
2329 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2330 if list_only:
b7b04c78 2331 # Without this printing, -F --print-json will not work
169dbde9 2332 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2333 return
2334
187986a8 2335 format_selector = self.format_selector
2336 if format_selector is None:
0017d9ad 2337 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2338 self.write_debug('Default format spec: %s' % req_format)
187986a8 2339 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2340
2341 # While in format selection we may need to have an access to the original
2342 # format set in order to calculate some metrics or do some processing.
2343 # For now we need to be able to guess whether original formats provided
2344 # by extractor are incomplete or not (i.e. whether extractor provides only
2345 # video-only or audio-only formats) for proper formats selection for
2346 # extractors with such incomplete formats (see
067aa17e 2347 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2348 # Since formats may be filtered during format selection and may not match
2349 # the original formats the results may be incorrect. Thus original formats
2350 # or pre-calculated metrics should be passed to format selection routines
2351 # as well.
2352 # We will pass a context object containing all necessary additional data
2353 # instead of just formats.
2354 # This fixes incorrect format selection issue (see
067aa17e 2355 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2356 incomplete_formats = (
317f7ab6 2357 # All formats are video-only or
3089bc74 2358 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2359 # all formats are audio-only
3089bc74 2360 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2361
2362 ctx = {
2363 'formats': formats,
2364 'incomplete_formats': incomplete_formats,
2365 }
2366
2367 formats_to_download = list(format_selector(ctx))
dd82ffea 2368 if not formats_to_download:
b7da73eb 2369 if not self.params.get('ignore_no_formats_error'):
1151c407 2370 raise ExtractorError('Requested format is not available', expected=True,
2371 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2372 else:
2373 self.report_warning('Requested format is not available')
4513a41a
A
2374 # Process what we can, even without any available formats.
2375 self.process_info(dict(info_dict))
b7da73eb 2376 elif download:
2377 self.to_screen(
07cce701 2378 '[info] %s: Downloading %d format(s): %s' % (
2379 info_dict['id'], len(formats_to_download),
2380 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2381 for fmt in formats_to_download:
dd82ffea 2382 new_info = dict(info_dict)
4ec82a72 2383 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2384 new_info['__original_infodict'] = info_dict
b7da73eb 2385 new_info.update(fmt)
dd82ffea
JMF
2386 self.process_info(new_info)
2387 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2388 if formats_to_download:
2389 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2390 return info_dict
2391
98c70d6f 2392 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2393 """Select the requested subtitles and their format"""
98c70d6f
JMF
2394 available_subs = {}
2395 if normal_subtitles and self.params.get('writesubtitles'):
2396 available_subs.update(normal_subtitles)
2397 if automatic_captions and self.params.get('writeautomaticsub'):
2398 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2399 if lang not in available_subs:
2400 available_subs[lang] = cap_info
2401
4d171848
JMF
2402 if (not self.params.get('writesubtitles') and not
2403 self.params.get('writeautomaticsub') or not
2404 available_subs):
2405 return None
a504ced0 2406
c32b0aab 2407 all_sub_langs = available_subs.keys()
a504ced0 2408 if self.params.get('allsubtitles', False):
c32b0aab 2409 requested_langs = all_sub_langs
2410 elif self.params.get('subtitleslangs', False):
77c4a9ef 2411 # A list is used so that the order of languages will be the same as
2412 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2413 requested_langs = []
2414 for lang_re in self.params.get('subtitleslangs'):
2415 if lang_re == 'all':
2416 requested_langs.extend(all_sub_langs)
c32b0aab 2417 continue
77c4a9ef 2418 discard = lang_re[0] == '-'
c32b0aab 2419 if discard:
77c4a9ef 2420 lang_re = lang_re[1:]
2421 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2422 if discard:
2423 for lang in current_langs:
77c4a9ef 2424 while lang in requested_langs:
2425 requested_langs.remove(lang)
c32b0aab 2426 else:
77c4a9ef 2427 requested_langs.extend(current_langs)
2428 requested_langs = orderedSet(requested_langs)
c32b0aab 2429 elif 'en' in available_subs:
2430 requested_langs = ['en']
a504ced0 2431 else:
c32b0aab 2432 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2433 if requested_langs:
2434 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2435
2436 formats_query = self.params.get('subtitlesformat', 'best')
2437 formats_preference = formats_query.split('/') if formats_query else []
2438 subs = {}
2439 for lang in requested_langs:
2440 formats = available_subs.get(lang)
2441 if formats is None:
2442 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2443 continue
a504ced0
JMF
2444 for ext in formats_preference:
2445 if ext == 'best':
2446 f = formats[-1]
2447 break
2448 matches = list(filter(lambda f: f['ext'] == ext, formats))
2449 if matches:
2450 f = matches[-1]
2451 break
2452 else:
2453 f = formats[-1]
2454 self.report_warning(
2455 'No subtitle format found matching "%s" for language %s, '
2456 'using %s' % (formats_query, lang, f['ext']))
2457 subs[lang] = f
2458 return subs
2459
d06daf23 2460 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2461 def print_mandatory(field, actual_field=None):
2462 if actual_field is None:
2463 actual_field = field
d06daf23 2464 if (self.params.get('force%s' % field, False)
53c18592 2465 and (not incomplete or info_dict.get(actual_field) is not None)):
2466 self.to_stdout(info_dict[actual_field])
d06daf23
S
2467
2468 def print_optional(field):
2469 if (self.params.get('force%s' % field, False)
2470 and info_dict.get(field) is not None):
2471 self.to_stdout(info_dict[field])
2472
53c18592 2473 info_dict = info_dict.copy()
2474 if filename is not None:
2475 info_dict['filename'] = filename
2476 if info_dict.get('requested_formats') is not None:
2477 # For RTMP URLs, also include the playpath
2478 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2479 elif 'url' in info_dict:
2480 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2481
2b8a2973 2482 if self.params.get('forceprint') or self.params.get('forcejson'):
2483 self.post_extract(info_dict)
53c18592 2484 for tmpl in self.params.get('forceprint', []):
b5ae35ee 2485 mobj = re.match(r'\w+(=?)$', tmpl)
2486 if mobj and mobj.group(1):
2487 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2488 elif mobj:
2489 tmpl = '%({})s'.format(tmpl)
2490 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
53c18592 2491
d06daf23
S
2492 print_mandatory('title')
2493 print_mandatory('id')
53c18592 2494 print_mandatory('url', 'urls')
d06daf23
S
2495 print_optional('thumbnail')
2496 print_optional('description')
53c18592 2497 print_optional('filename')
b868936c 2498 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2499 self.to_stdout(formatSeconds(info_dict['duration']))
2500 print_mandatory('format')
53c18592 2501
2b8a2973 2502 if self.params.get('forcejson'):
6e84b215 2503 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2504
e8e73840 2505 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2506 if not info.get('url'):
1151c407 2507 self.raise_no_formats(info, True)
e8e73840 2508
2509 if test:
2510 verbose = self.params.get('verbose')
2511 params = {
2512 'test': True,
a169858f 2513 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2514 'verbose': verbose,
2515 'noprogress': not verbose,
2516 'nopart': True,
2517 'skip_unavailable_fragments': False,
2518 'keep_fragments': False,
2519 'overwrites': True,
2520 '_no_ytdl_file': True,
2521 }
2522 else:
2523 params = self.params
96fccc10 2524 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2525 if not test:
2526 for ph in self._progress_hooks:
2527 fd.add_progress_hook(ph)
18e674b4 2528 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2529 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2530
2531 new_info = copy.deepcopy(self._copy_infodict(info))
e8e73840 2532 if new_info.get('http_headers') is None:
2533 new_info['http_headers'] = self._calc_headers(new_info)
2534 return fd.download(name, new_info, subtitle)
2535
8222d8de
JMF
2536 def process_info(self, info_dict):
2537 """Process a single resolved IE result."""
2538
2539 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2540
2541 max_downloads = self.params.get('max_downloads')
2542 if max_downloads is not None:
2543 if self._num_downloads >= int(max_downloads):
2544 raise MaxDownloadsReached()
8222d8de 2545
d06daf23 2546 # TODO: backward compatibility, to be removed
8222d8de 2547 info_dict['fulltitle'] = info_dict['title']
8222d8de 2548
4513a41a 2549 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2550 info_dict['format'] = info_dict['ext']
2551
c77495e3 2552 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2553 return
2554
277d6ff5 2555 self.post_extract(info_dict)
fd288278 2556 self._num_downloads += 1
8222d8de 2557
dcf64d43 2558 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2559 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2560 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2561 files_to_move = {}
8222d8de
JMF
2562
2563 # Forced printings
4513a41a 2564 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2565
b7b04c78 2566 if self.params.get('simulate'):
2d30509f 2567 if self.params.get('force_write_download_archive', False):
2568 self.record_download_archive(info_dict)
2d30509f 2569 # Do nothing else if in simulate mode
8222d8de
JMF
2570 return
2571
de6000d9 2572 if full_filename is None:
8222d8de 2573 return
e92caff5 2574 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2575 return
e92caff5 2576 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2577 return
2578
80c03fa9 2579 if self._write_description('video', info_dict,
2580 self.prepare_filename(info_dict, 'description')) is None:
2581 return
2582
2583 sub_files = self._write_subtitles(info_dict, temp_filename)
2584 if sub_files is None:
2585 return
2586 files_to_move.update(dict(sub_files))
2587
2588 thumb_files = self._write_thumbnails(
2589 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2590 if thumb_files is None:
2591 return
2592 files_to_move.update(dict(thumb_files))
8222d8de 2593
80c03fa9 2594 infofn = self.prepare_filename(info_dict, 'infojson')
2595 _infojson_written = self._write_info_json('video', info_dict, infofn)
2596 if _infojson_written:
2597 info_dict['__infojson_filename'] = infofn
2598 elif _infojson_written is None:
2599 return
2600
2601 # Note: Annotations are deprecated
2602 annofn = None
1fb07d10 2603 if self.params.get('writeannotations', False):
de6000d9 2604 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2605 if annofn:
e92caff5 2606 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2607 return
0c3d0f51 2608 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2609 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2610 elif not info_dict.get('annotations'):
2611 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2612 else:
2613 try:
6febd1c1 2614 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2615 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2616 annofile.write(info_dict['annotations'])
2617 except (KeyError, TypeError):
6febd1c1 2618 self.report_warning('There are no annotations to write.')
7b6fefc9 2619 except (OSError, IOError):
6febd1c1 2620 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2621 return
1fb07d10 2622
732044af 2623 # Write internet shortcut files
2624 url_link = webloc_link = desktop_link = False
2625 if self.params.get('writelink', False):
2626 if sys.platform == "darwin": # macOS.
2627 webloc_link = True
2628 elif sys.platform.startswith("linux"):
2629 desktop_link = True
2630 else: # if sys.platform in ['win32', 'cygwin']:
2631 url_link = True
2632 if self.params.get('writeurllink', False):
2633 url_link = True
2634 if self.params.get('writewebloclink', False):
2635 webloc_link = True
2636 if self.params.get('writedesktoplink', False):
2637 desktop_link = True
2638
2639 if url_link or webloc_link or desktop_link:
2640 if 'webpage_url' not in info_dict:
2641 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2642 return
2643 ascii_url = iri_to_uri(info_dict['webpage_url'])
2644
2645 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2646 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2647 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2648 self.to_screen('[info] Internet shortcut is already present')
2649 else:
2650 try:
2651 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2652 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2653 template_vars = {'url': ascii_url}
2654 if embed_filename:
2655 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2656 linkfile.write(template % template_vars)
2657 except (OSError, IOError):
2658 self.report_error('Cannot write internet shortcut ' + linkfn)
2659 return False
2660 return True
2661
2662 if url_link:
2663 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2664 return
2665 if webloc_link:
2666 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2667 return
2668 if desktop_link:
2669 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2670 return
2671
56d868db 2672 try:
2673 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2674 except PostProcessingError as err:
2675 self.report_error('Preprocessing: %s' % str(err))
2676 return
2677
732044af 2678 must_record_download_archive = False
56d868db 2679 if self.params.get('skip_download', False):
2680 info_dict['filepath'] = temp_filename
2681 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2682 info_dict['__files_to_move'] = files_to_move
2683 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2684 else:
2685 # Download
b868936c 2686 info_dict.setdefault('__postprocessors', [])
4340deca 2687 try:
0202b52a 2688
6b591b29 2689 def existing_file(*filepaths):
2690 ext = info_dict.get('ext')
2691 final_ext = self.params.get('final_ext', ext)
2692 existing_files = []
2693 for file in orderedSet(filepaths):
2694 if final_ext != ext:
2695 converted = replace_extension(file, final_ext, ext)
2696 if os.path.exists(encodeFilename(converted)):
2697 existing_files.append(converted)
2698 if os.path.exists(encodeFilename(file)):
2699 existing_files.append(file)
2700
2701 if not existing_files or self.params.get('overwrites', False):
2702 for file in orderedSet(existing_files):
2703 self.report_file_delete(file)
2704 os.remove(encodeFilename(file))
2705 return None
2706
6b591b29 2707 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2708 return existing_files[0]
0202b52a 2709
2710 success = True
4340deca 2711 if info_dict.get('requested_formats') is not None:
81cd954a
S
2712
2713 def compatible_formats(formats):
d03cfdce 2714 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2715 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2716 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2717 if len(video_formats) > 2 or len(audio_formats) > 2:
2718 return False
2719
81cd954a 2720 # Check extension
d03cfdce 2721 exts = set(format.get('ext') for format in formats)
2722 COMPATIBLE_EXTS = (
2723 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2724 set(('webm',)),
2725 )
2726 for ext_sets in COMPATIBLE_EXTS:
2727 if ext_sets.issuperset(exts):
2728 return True
81cd954a
S
2729 # TODO: Check acodec/vcodec
2730 return False
2731
2732 requested_formats = info_dict['requested_formats']
0202b52a 2733 old_ext = info_dict['ext']
4e3b637d 2734 if self.params.get('merge_output_format') is None:
2735 if not compatible_formats(requested_formats):
2736 info_dict['ext'] = 'mkv'
2737 self.report_warning(
2738 'Requested formats are incompatible for merge and will be merged into mkv')
2739 if (info_dict['ext'] == 'webm'
2740 and info_dict.get('thumbnails')
2741 # check with type instead of pp_key, __name__, or isinstance
2742 # since we dont want any custom PPs to trigger this
2743 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2744 info_dict['ext'] = 'mkv'
2745 self.report_warning(
2746 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 2747 new_ext = info_dict['ext']
0202b52a 2748
124bc071 2749 def correct_ext(filename, ext=new_ext):
96fccc10 2750 if filename == '-':
2751 return filename
0202b52a 2752 filename_real_ext = os.path.splitext(filename)[1][1:]
2753 filename_wo_ext = (
2754 os.path.splitext(filename)[0]
124bc071 2755 if filename_real_ext in (old_ext, new_ext)
0202b52a 2756 else filename)
124bc071 2757 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2758
38c6902b 2759 # Ensure filename always has a correct extension for successful merge
0202b52a 2760 full_filename = correct_ext(full_filename)
2761 temp_filename = correct_ext(temp_filename)
2762 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2763 info_dict['__real_download'] = False
18e674b4 2764
dbf5416a 2765 if dl_filename is not None:
6c7274ec 2766 self.report_file_already_downloaded(dl_filename)
c111cefa 2767 elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
dbf5416a 2768 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2769 success, real_download = self.dl(temp_filename, info_dict)
2770 info_dict['__real_download'] = real_download
18e674b4 2771 else:
2772 downloaded = []
2773 merger = FFmpegMergerPP(self)
2774 if self.params.get('allow_unplayable_formats'):
2775 self.report_warning(
2776 'You have requested merging of multiple formats '
2777 'while also allowing unplayable formats to be downloaded. '
2778 'The formats won\'t be merged to prevent data corruption.')
2779 elif not merger.available:
2780 self.report_warning(
2781 'You have requested merging of multiple formats but ffmpeg is not installed. '
2782 'The formats won\'t be merged.')
2783
96fccc10 2784 if temp_filename == '-':
c111cefa 2785 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
96fccc10 2786 else 'but the formats are incompatible for simultaneous download' if merger.available
2787 else 'but ffmpeg is not installed')
2788 self.report_warning(
2789 f'You have requested downloading multiple formats to stdout {reason}. '
2790 'The formats will be streamed one after the other')
2791 fname = temp_filename
dbf5416a 2792 for f in requested_formats:
2793 new_info = dict(info_dict)
2794 del new_info['requested_formats']
2795 new_info.update(f)
96fccc10 2796 if temp_filename != '-':
124bc071 2797 fname = prepend_extension(
2798 correct_ext(temp_filename, new_info['ext']),
2799 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2800 if not self._ensure_dir_exists(fname):
2801 return
a21e0ab1 2802 f['filepath'] = fname
96fccc10 2803 downloaded.append(fname)
dbf5416a 2804 partial_success, real_download = self.dl(fname, new_info)
2805 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2806 success = success and partial_success
2807 if merger.available and not self.params.get('allow_unplayable_formats'):
2808 info_dict['__postprocessors'].append(merger)
2809 info_dict['__files_to_merge'] = downloaded
2810 # Even if there were no downloads, it is being merged only now
2811 info_dict['__real_download'] = True
2812 else:
2813 for file in downloaded:
2814 files_to_move[file] = None
4340deca
P
2815 else:
2816 # Just a single file
0202b52a 2817 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2818 if dl_filename is None or dl_filename == temp_filename:
2819 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2820 # So we should try to resume the download
e8e73840 2821 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2822 info_dict['__real_download'] = real_download
6c7274ec 2823 else:
2824 self.report_file_already_downloaded(dl_filename)
0202b52a 2825
0202b52a 2826 dl_filename = dl_filename or temp_filename
c571435f 2827 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2828
3158150c 2829 except network_exceptions as err:
7960b056 2830 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2831 return
2832 except (OSError, IOError) as err:
2833 raise UnavailableVideoError(err)
2834 except (ContentTooShortError, ) as err:
2835 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2836 return
8222d8de 2837
de6000d9 2838 if success and full_filename != '-':
f17f8651 2839
fd7cfb64 2840 def fixup():
2841 do_fixup = True
2842 fixup_policy = self.params.get('fixup')
2843 vid = info_dict['id']
2844
2845 if fixup_policy in ('ignore', 'never'):
2846 return
2847 elif fixup_policy == 'warn':
2848 do_fixup = False
f89b3e2d 2849 elif fixup_policy != 'force':
2850 assert fixup_policy in ('detect_or_warn', None)
2851 if not info_dict.get('__real_download'):
2852 do_fixup = False
fd7cfb64 2853
2854 def ffmpeg_fixup(cndn, msg, cls):
2855 if not cndn:
2856 return
2857 if not do_fixup:
2858 self.report_warning(f'{vid}: {msg}')
2859 return
2860 pp = cls(self)
2861 if pp.available:
2862 info_dict['__postprocessors'].append(pp)
2863 else:
2864 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2865
2866 stretched_ratio = info_dict.get('stretched_ratio')
2867 ffmpeg_fixup(
2868 stretched_ratio not in (1, None),
2869 f'Non-uniform pixel ratio {stretched_ratio}',
2870 FFmpegFixupStretchedPP)
2871
2872 ffmpeg_fixup(
2873 (info_dict.get('requested_formats') is None
2874 and info_dict.get('container') == 'm4a_dash'
2875 and info_dict.get('ext') == 'm4a'),
2876 'writing DASH m4a. Only some players support this container',
2877 FFmpegFixupM4aPP)
2878
993191c0 2879 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2880 downloader = downloader.__name__ if downloader else None
84726743 2881 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2882 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2883 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2884 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2885
2886 fixup()
8222d8de 2887 try:
23c1a667 2888 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2889 except PostProcessingError as err:
2890 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2891 return
ab8e5e51
AM
2892 try:
2893 for ph in self._post_hooks:
23c1a667 2894 ph(info_dict['filepath'])
ab8e5e51
AM
2895 except Exception as err:
2896 self.report_error('post hooks: %s' % str(err))
2897 return
2d30509f 2898 must_record_download_archive = True
2899
2900 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2901 self.record_download_archive(info_dict)
c3e6ffba 2902 max_downloads = self.params.get('max_downloads')
2903 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2904 raise MaxDownloadsReached()
8222d8de
JMF
2905
2906 def download(self, url_list):
2907 """Download a given list of URLs."""
de6000d9 2908 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2909 if (len(url_list) > 1
2910 and outtmpl != '-'
2911 and '%' not in outtmpl
2912 and self.params.get('max_downloads') != 1):
acd69589 2913 raise SameFileError(outtmpl)
8222d8de
JMF
2914
2915 for url in url_list:
2916 try:
5f6a1245 2917 # It also downloads the videos
61aa5ba3
S
2918 res = self.extract_info(
2919 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2920 except UnavailableVideoError:
6febd1c1 2921 self.report_error('unable to download video')
8222d8de 2922 except MaxDownloadsReached:
8f18aca8 2923 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2924 raise
2925 except ExistingVideoReached:
8f18aca8 2926 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2927 raise
2928 except RejectedVideoReached:
8f18aca8 2929 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2930 raise
63e0be34
PH
2931 else:
2932 if self.params.get('dump_single_json', False):
277d6ff5 2933 self.post_extract(res)
6e84b215 2934 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2935
2936 return self._download_retcode
2937
1dcc4c0c 2938 def download_with_info_file(self, info_filename):
31bd3925
JMF
2939 with contextlib.closing(fileinput.FileInput(
2940 [info_filename], mode='r',
2941 openhook=fileinput.hook_encoded('utf-8'))) as f:
2942 # FileInput doesn't have a read method, we can't call json.load
8012d892 2943 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2944 try:
2945 self.process_ie_result(info, download=True)
d3f62c19 2946 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2947 webpage_url = info.get('webpage_url')
2948 if webpage_url is not None:
6febd1c1 2949 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2950 return self.download([webpage_url])
2951 else:
2952 raise
2953 return self._download_retcode
1dcc4c0c 2954
cb202fd2 2955 @staticmethod
8012d892 2956 def sanitize_info(info_dict, remove_private_keys=False):
2957 ''' Sanitize the infodict for converting to json '''
3ad56b42 2958 if info_dict is None:
2959 return info_dict
6e84b215 2960 info_dict.setdefault('epoch', int(time.time()))
2961 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2962 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2963 if remove_private_keys:
6e84b215 2964 remove_keys |= {
2965 'requested_formats', 'requested_subtitles', 'requested_entries',
2966 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2967 }
ae8f99e6 2968 empty_values = (None, {}, [], set(), tuple())
2969 reject = lambda k, v: k not in keep_keys and (
2970 k.startswith('_') or k in remove_keys or v in empty_values)
2971 else:
ae8f99e6 2972 reject = lambda k, v: k in remove_keys
5226731e 2973 filter_fn = lambda obj: (
b0249bca 2974 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2975 else obj if not isinstance(obj, dict)
ae8f99e6 2976 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2977 return filter_fn(info_dict)
cb202fd2 2978
8012d892 2979 @staticmethod
2980 def filter_requested_info(info_dict, actually_filter=True):
2981 ''' Alias of sanitize_info for backward compatibility '''
2982 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2983
dcf64d43 2984 def run_pp(self, pp, infodict):
5bfa4862 2985 files_to_delete = []
dcf64d43 2986 if '__files_to_move' not in infodict:
2987 infodict['__files_to_move'] = {}
b1940459 2988 try:
2989 files_to_delete, infodict = pp.run(infodict)
2990 except PostProcessingError as e:
2991 # Must be True and not 'only_download'
2992 if self.params.get('ignoreerrors') is True:
2993 self.report_error(e)
2994 return infodict
2995 raise
2996
5bfa4862 2997 if not files_to_delete:
dcf64d43 2998 return infodict
5bfa4862 2999 if self.params.get('keepvideo', False):
3000 for f in files_to_delete:
dcf64d43 3001 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3002 else:
3003 for old_filename in set(files_to_delete):
3004 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3005 try:
3006 os.remove(encodeFilename(old_filename))
3007 except (IOError, OSError):
3008 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3009 if old_filename in infodict['__files_to_move']:
3010 del infodict['__files_to_move'][old_filename]
3011 return infodict
5bfa4862 3012
277d6ff5 3013 @staticmethod
3014 def post_extract(info_dict):
3015 def actual_post_extract(info_dict):
3016 if info_dict.get('_type') in ('playlist', 'multi_video'):
3017 for video_dict in info_dict.get('entries', {}):
b050d210 3018 actual_post_extract(video_dict or {})
277d6ff5 3019 return
3020
07cce701 3021 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3022 extra = post_extractor().items()
3023 info_dict.update(extra)
07cce701 3024 info_dict.pop('__post_extractor', None)
277d6ff5 3025
4ec82a72 3026 original_infodict = info_dict.get('__original_infodict') or {}
3027 original_infodict.update(extra)
3028 original_infodict.pop('__post_extractor', None)
3029
b050d210 3030 actual_post_extract(info_dict or {})
277d6ff5 3031
56d868db 3032 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3033 info = dict(ie_info)
56d868db 3034 info['__files_to_move'] = files_to_move or {}
3035 for pp in self._pps[key]:
dcf64d43 3036 info = self.run_pp(pp, info)
56d868db 3037 return info, info.pop('__files_to_move', None)
5bfa4862 3038
dcf64d43 3039 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3040 """Run all the postprocessors on the given file."""
3041 info = dict(ie_info)
3042 info['filepath'] = filename
dcf64d43 3043 info['__files_to_move'] = files_to_move or {}
0202b52a 3044
56d868db 3045 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3046 info = self.run_pp(pp, info)
3047 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3048 del info['__files_to_move']
56d868db 3049 for pp in self._pps['after_move']:
dcf64d43 3050 info = self.run_pp(pp, info)
23c1a667 3051 return info
c1c9a79c 3052
5db07df6 3053 def _make_archive_id(self, info_dict):
e9fef7ee
S
3054 video_id = info_dict.get('id')
3055 if not video_id:
3056 return
5db07df6
PH
3057 # Future-proof against any change in case
3058 # and backwards compatibility with prior versions
e9fef7ee 3059 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3060 if extractor is None:
1211bb6d
S
3061 url = str_or_none(info_dict.get('url'))
3062 if not url:
3063 return
e9fef7ee 3064 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3065 for ie_key, ie in self._ies.items():
1211bb6d 3066 if ie.suitable(url):
8b7491c8 3067 extractor = ie_key
e9fef7ee
S
3068 break
3069 else:
3070 return
d0757229 3071 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3072
3073 def in_download_archive(self, info_dict):
3074 fn = self.params.get('download_archive')
3075 if fn is None:
3076 return False
3077
3078 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3079 if not vid_id:
7012b23c 3080 return False # Incomplete video information
5db07df6 3081
a45e8619 3082 return vid_id in self.archive
c1c9a79c
PH
3083
3084 def record_download_archive(self, info_dict):
3085 fn = self.params.get('download_archive')
3086 if fn is None:
3087 return
5db07df6
PH
3088 vid_id = self._make_archive_id(info_dict)
3089 assert vid_id
c1c9a79c 3090 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3091 archive_file.write(vid_id + '\n')
a45e8619 3092 self.archive.add(vid_id)
dd82ffea 3093
8c51aa65 3094 @staticmethod
8abeeb94 3095 def format_resolution(format, default='unknown'):
a903d828 3096 is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
9359f3d4 3097 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3098 return 'audio only'
f49d89ee
PH
3099 if format.get('resolution') is not None:
3100 return format['resolution']
35615307
DA
3101 if format.get('width') and format.get('height'):
3102 res = '%dx%d' % (format['width'], format['height'])
3103 elif format.get('height'):
3104 res = '%sp' % format['height']
3105 elif format.get('width'):
388ae76b 3106 res = '%dx?' % format['width']
a903d828 3107 elif is_images:
3108 return 'images'
8c51aa65 3109 else:
a903d828 3110 return default
3111 return f'{res} images' if is_images else res
8c51aa65 3112
c57f7757
PH
3113 def _format_note(self, fdict):
3114 res = ''
3115 if fdict.get('ext') in ['f4f', 'f4m']:
3116 res += '(unsupported) '
32f90364
PH
3117 if fdict.get('language'):
3118 if res:
3119 res += ' '
9016d76f 3120 res += '[%s] ' % fdict['language']
c57f7757
PH
3121 if fdict.get('format_note') is not None:
3122 res += fdict['format_note'] + ' '
3123 if fdict.get('tbr') is not None:
3124 res += '%4dk ' % fdict['tbr']
3125 if fdict.get('container') is not None:
3126 if res:
3127 res += ', '
3128 res += '%s container' % fdict['container']
3089bc74
S
3129 if (fdict.get('vcodec') is not None
3130 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3131 if res:
3132 res += ', '
3133 res += fdict['vcodec']
91c7271a 3134 if fdict.get('vbr') is not None:
c57f7757
PH
3135 res += '@'
3136 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3137 res += 'video@'
3138 if fdict.get('vbr') is not None:
3139 res += '%4dk' % fdict['vbr']
fbb21cf5 3140 if fdict.get('fps') is not None:
5d583bdf
S
3141 if res:
3142 res += ', '
3143 res += '%sfps' % fdict['fps']
c57f7757
PH
3144 if fdict.get('acodec') is not None:
3145 if res:
3146 res += ', '
3147 if fdict['acodec'] == 'none':
3148 res += 'video only'
3149 else:
3150 res += '%-5s' % fdict['acodec']
3151 elif fdict.get('abr') is not None:
3152 if res:
3153 res += ', '
3154 res += 'audio'
3155 if fdict.get('abr') is not None:
3156 res += '@%3dk' % fdict['abr']
3157 if fdict.get('asr') is not None:
3158 res += ' (%5dHz)' % fdict['asr']
3159 if fdict.get('filesize') is not None:
3160 if res:
3161 res += ', '
3162 res += format_bytes(fdict['filesize'])
9732d77e
PH
3163 elif fdict.get('filesize_approx') is not None:
3164 if res:
3165 res += ', '
3166 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3167 return res
91c7271a 3168
c57f7757 3169 def list_formats(self, info_dict):
94badb25 3170 formats = info_dict.get('formats', [info_dict])
53ed7066 3171 new_format = (
3172 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3173 and self.params.get('listformats_table', True) is not False)
76d321f6 3174 if new_format:
3175 table = [
3176 [
3177 format_field(f, 'format_id'),
3178 format_field(f, 'ext'),
3179 self.format_resolution(f),
3180 format_field(f, 'fps', '%d'),
176f1866 3181 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
76d321f6 3182 '|',
3183 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3184 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3185 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3186 '|',
3187 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3188 format_field(f, 'vbr', '%4dk'),
3189 format_field(f, 'acodec', default='unknown').replace('none', ''),
3190 format_field(f, 'abr', '%3dk'),
3191 format_field(f, 'asr', '%5dHz'),
3f698246 3192 ', '.join(filter(None, (
3193 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3194 format_field(f, 'language', '[%s]'),
3195 format_field(f, 'format_note'),
3196 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3197 ))),
3f698246 3198 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
176f1866 3199 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3200 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3201 else:
3202 table = [
3203 [
3204 format_field(f, 'format_id'),
3205 format_field(f, 'ext'),
3206 self.format_resolution(f),
3207 self._format_note(f)]
3208 for f in formats
3209 if f.get('preference') is None or f['preference'] >= -1000]
3210 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3211
cfb56d1a 3212 self.to_screen(
169dbde9 3213 '[info] Available formats for %s:' % info_dict['id'])
3214 self.to_stdout(render_table(
bc97cdae 3215 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3216
3217 def list_thumbnails(self, info_dict):
b0249bca 3218 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3219 if not thumbnails:
b7b72db9 3220 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3221 return
cfb56d1a
PH
3222
3223 self.to_screen(
3224 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3225 self.to_stdout(render_table(
cfb56d1a
PH
3226 ['ID', 'width', 'height', 'URL'],
3227 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3228
360e1ca5 3229 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3230 if not subtitles:
360e1ca5 3231 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3232 return
a504ced0 3233 self.to_screen(
edab9dbf 3234 'Available %s for %s:' % (name, video_id))
2412044c 3235
3236 def _row(lang, formats):
49c258e1 3237 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3238 if len(set(names)) == 1:
7aee40c1 3239 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3240 return [lang, ', '.join(names), ', '.join(exts)]
3241
169dbde9 3242 self.to_stdout(render_table(
2412044c 3243 ['Language', 'Name', 'Formats'],
3244 [_row(lang, formats) for lang, formats in subtitles.items()],
3245 hideEmpty=True))
a504ced0 3246
dca08720
PH
3247 def urlopen(self, req):
3248 """ Start an HTTP download """
82d8a8b6 3249 if isinstance(req, compat_basestring):
67dda517 3250 req = sanitized_Request(req)
19a41fc6 3251 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3252
3253 def print_debug_header(self):
3254 if not self.params.get('verbose'):
3255 return
883d4b1e 3256 get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
b0472057 3257 encoding_str = (
883d4b1e 3258 '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
734f90bb
PH
3259 locale.getpreferredencoding(),
3260 sys.getfilesystemencoding(),
883d4b1e 3261 get_encoding(self._screen_file), get_encoding(self._err_file),
b0472057 3262 self.get_encoding()))
883d4b1e 3263
3264 logger = self.params.get('logger')
3265 if logger:
3266 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3267 write_debug(encoding_str)
3268 else:
3269 write_debug = lambda msg: self._write_string(f'[debug] {msg}')
3270 write_string(encoding_str, encoding=None)
734f90bb 3271
4c88ff87 3272 source = detect_variant()
883d4b1e 3273 write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
e0986e31 3274 if _LAZY_LOADER:
883d4b1e 3275 write_debug('Lazy loading extractors enabled\n')
3ae5e797 3276 if plugin_extractors or plugin_postprocessors:
883d4b1e 3277 write_debug('Plugins: %s\n' % [
3ae5e797 3278 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3279 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3280 if self.params.get('compat_opts'):
883d4b1e 3281 write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3282 try:
3283 sp = subprocess.Popen(
3284 ['git', 'rev-parse', '--short', 'HEAD'],
3285 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3286 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3287 out, err = process_communicate_or_kill(sp)
dca08720
PH
3288 out = out.decode().strip()
3289 if re.match('[0-9a-f]+', out):
883d4b1e 3290 write_debug('Git HEAD: %s\n' % out)
70a1165b 3291 except Exception:
dca08720
PH
3292 try:
3293 sys.exc_clear()
70a1165b 3294 except Exception:
dca08720 3295 pass
b300cda4
S
3296
3297 def python_implementation():
3298 impl_name = platform.python_implementation()
3299 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3300 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3301 return impl_name
3302
883d4b1e 3303 write_debug('Python version %s (%s %s) - %s\n' % (
e5813e53 3304 platform.python_version(),
3305 python_implementation(),
3306 platform.architecture()[0],
b300cda4 3307 platform_name()))
d28b5171 3308
73fac4e9 3309 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3310 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3311 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3312 exe_str = ', '.join(
2831b468 3313 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3314 ) or 'none'
883d4b1e 3315 write_debug('exe versions: %s\n' % exe_str)
dca08720 3316
2831b468 3317 from .downloader.websocket import has_websockets
3318 from .postprocessor.embedthumbnail import has_mutagen
3319 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3320
ad3dc496 3321 lib_str = ', '.join(sorted(filter(None, (
edf65256 3322 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3323 has_websockets and 'websockets',
3324 has_mutagen and 'mutagen',
3325 SQLITE_AVAILABLE and 'sqlite',
3326 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3327 )))) or 'none'
883d4b1e 3328 write_debug('Optional libraries: %s\n' % lib_str)
3329 write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
d1d5c08f 3330 supports_terminal_sequences(self._screen_file),
3331 supports_terminal_sequences(self._err_file)))
2831b468 3332
dca08720
PH
3333 proxy_map = {}
3334 for handler in self._opener.handlers:
3335 if hasattr(handler, 'proxies'):
3336 proxy_map.update(handler.proxies)
883d4b1e 3337 write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3338
58b1f00d
PH
3339 if self.params.get('call_home', False):
3340 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
883d4b1e 3341 write_debug('Public IP address: %s\n' % ipaddr)
f5546c0b 3342 return
58b1f00d
PH
3343 latest_version = self.urlopen(
3344 'https://yt-dl.org/latest/version').read().decode('utf-8')
3345 if version_tuple(latest_version) > version_tuple(__version__):
3346 self.report_warning(
3347 'You are using an outdated version (newest version: %s)! '
3348 'See https://yt-dl.org/update if you need help updating.' %
3349 latest_version)
3350
e344693b 3351 def _setup_opener(self):
6ad14cab 3352 timeout_val = self.params.get('socket_timeout')
17bddf3e 3353 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3354
982ee69a 3355 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3356 opts_cookiefile = self.params.get('cookiefile')
3357 opts_proxy = self.params.get('proxy')
3358
982ee69a 3359 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3360
6a3f4c3f 3361 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3362 if opts_proxy is not None:
3363 if opts_proxy == '':
3364 proxies = {}
3365 else:
3366 proxies = {'http': opts_proxy, 'https': opts_proxy}
3367 else:
3368 proxies = compat_urllib_request.getproxies()
067aa17e 3369 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3370 if 'http' in proxies and 'https' not in proxies:
3371 proxies['https'] = proxies['http']
91410c9b 3372 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3373
3374 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3375 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3376 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3377 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3378 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3379
3380 # When passing our own FileHandler instance, build_opener won't add the
3381 # default FileHandler and allows us to disable the file protocol, which
3382 # can be used for malicious purposes (see
067aa17e 3383 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3384 file_handler = compat_urllib_request.FileHandler()
3385
3386 def file_open(*args, **kwargs):
7a5c1cfe 3387 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3388 file_handler.file_open = file_open
3389
3390 opener = compat_urllib_request.build_opener(
fca6dba8 3391 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3392
dca08720
PH
3393 # Delete the default user-agent header, which would otherwise apply in
3394 # cases where our custom HTTP handler doesn't come into play
067aa17e 3395 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3396 opener.addheaders = []
3397 self._opener = opener
62fec3b2
PH
3398
3399 def encode(self, s):
3400 if isinstance(s, bytes):
3401 return s # Already encoded
3402
3403 try:
3404 return s.encode(self.get_encoding())
3405 except UnicodeEncodeError as err:
3406 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3407 raise
3408
3409 def get_encoding(self):
3410 encoding = self.params.get('encoding')
3411 if encoding is None:
3412 encoding = preferredencoding()
3413 return encoding
ec82d85a 3414
80c03fa9 3415 def _write_info_json(self, label, ie_result, infofn):
3416 ''' Write infojson and returns True = written, False = skip, None = error '''
3417 if not self.params.get('writeinfojson'):
3418 return False
3419 elif not infofn:
3420 self.write_debug(f'Skipping writing {label} infojson')
3421 return False
3422 elif not self._ensure_dir_exists(infofn):
3423 return None
3424 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3425 self.to_screen(f'[info] {label.title()} metadata is already present')
3426 else:
3427 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3428 try:
3429 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3430 except (OSError, IOError):
3431 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3432 return None
3433 return True
3434
3435 def _write_description(self, label, ie_result, descfn):
3436 ''' Write description and returns True = written, False = skip, None = error '''
3437 if not self.params.get('writedescription'):
3438 return False
3439 elif not descfn:
3440 self.write_debug(f'Skipping writing {label} description')
3441 return False
3442 elif not self._ensure_dir_exists(descfn):
3443 return None
3444 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3445 self.to_screen(f'[info] {label.title()} description is already present')
3446 elif ie_result.get('description') is None:
3447 self.report_warning(f'There\'s no {label} description to write')
3448 return False
3449 else:
3450 try:
3451 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3452 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3453 descfile.write(ie_result['description'])
3454 except (OSError, IOError):
3455 self.report_error(f'Cannot write {label} description file {descfn}')
3456 return None
3457 return True
3458
3459 def _write_subtitles(self, info_dict, filename):
3460 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3461 ret = []
3462 subtitles = info_dict.get('requested_subtitles')
3463 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3464 # subtitles download errors are already managed as troubles in relevant IE
3465 # that way it will silently go on when used with unsupporting IE
3466 return ret
3467
3468 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3469 if not sub_filename_base:
3470 self.to_screen('[info] Skipping writing video subtitles')
3471 return ret
3472 for sub_lang, sub_info in subtitles.items():
3473 sub_format = sub_info['ext']
3474 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3475 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3476 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3477 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3478 sub_info['filepath'] = sub_filename
3479 ret.append((sub_filename, sub_filename_final))
3480 continue
3481
3482 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3483 if sub_info.get('data') is not None:
3484 try:
3485 # Use newline='' to prevent conversion of newline characters
3486 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3487 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3488 subfile.write(sub_info['data'])
3489 sub_info['filepath'] = sub_filename
3490 ret.append((sub_filename, sub_filename_final))
3491 continue
3492 except (OSError, IOError):
3493 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3494 return None
3495
3496 try:
3497 sub_copy = sub_info.copy()
3498 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3499 self.dl(sub_filename, sub_copy, subtitle=True)
3500 sub_info['filepath'] = sub_filename
3501 ret.append((sub_filename, sub_filename_final))
3502 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3503 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3504 continue
519804a9 3505 return ret
80c03fa9 3506
3507 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3508 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3509 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3510 thumbnails, ret = [], []
6c4fd172 3511 if write_all or self.params.get('writethumbnail', False):
0202b52a 3512 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3513 multiple = write_all and len(thumbnails) > 1
ec82d85a 3514
80c03fa9 3515 if thumb_filename_base is None:
3516 thumb_filename_base = filename
3517 if thumbnails and not thumb_filename_base:
3518 self.write_debug(f'Skipping writing {label} thumbnail')
3519 return ret
3520
981052c9 3521 for t in thumbnails[::-1]:
80c03fa9 3522 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3523 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3524 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3525 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3526
80c03fa9 3527 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3528 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3529 t['filepath'] = thumb_filename
80c03fa9 3530 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
ec82d85a 3531 else:
80c03fa9 3532 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3533 try:
3534 uf = self.urlopen(t['url'])
80c03fa9 3535 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3536 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3537 shutil.copyfileobj(uf, thumbf)
80c03fa9 3538 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3539 t['filepath'] = thumb_filename
3158150c 3540 except network_exceptions as err:
80c03fa9 3541 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3542 if ret and not write_all:
3543 break
0202b52a 3544 return ret