]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[cleanup] Misc fixes (see desc)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
26e63931 2import collections
31bd3925 3import contextlib
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
b5ae35ee 7import functools
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
dca08720 14import platform
f8271158 15import random
8222d8de
JMF
16import re
17import shutil
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
f9934b96 25import urllib.request
961ea474
S
26from string import ascii_letters
27
f8271158 28from .cache import Cache
8c25f81b 29from .compat import (
003c69a8 30 compat_get_terminal_size,
e9c0cdd3 31 compat_os_name,
7d1eb38a 32 compat_shlex_quote,
ce02ed60
PH
33 compat_str,
34 compat_urllib_error,
35 compat_urllib_request,
819e0531 36 windows_enable_vt_mode,
8c25f81b 37)
982ee69a 38from .cookies import load_cookies
f8271158 39from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
40from .downloader.rtmp import rtmpdump_version
41from .extractor import _LAZY_LOADER
42from .extractor import _PLUGIN_CLASSES as plugin_extractors
43from .extractor import gen_extractor_classes, get_info_extractor
44from .extractor.openload import PhantomJSwrapper
45from .minicurses import format_text
46from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
47from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
57 MoveFilesAfterDownloadPP,
58 get_postprocessor,
59)
60from .update import detect_variant
8c25f81b 61from .utils import (
f8271158 62 DEFAULT_OUTTMPL,
63 LINK_TEMPLATES,
64 NO_DEFAULT,
1d485a1a 65 NUMBER_RE,
f8271158 66 OUTTMPL_TYPES,
67 POSTPROCESS_WHEN,
68 STR_FORMAT_RE_TMPL,
69 STR_FORMAT_TYPES,
70 ContentTooShortError,
71 DateRange,
72 DownloadCancelled,
73 DownloadError,
74 EntryNotInPlaylist,
75 ExistingVideoReached,
76 ExtractorError,
77 GeoRestrictedError,
78 HEADRequest,
79 InAdvancePagedList,
80 ISO3166Utils,
81 LazyList,
82 MaxDownloadsReached,
19a03940 83 Namespace,
f8271158 84 PagedList,
85 PerRequestProxyHandler,
86 Popen,
87 PostProcessingError,
88 ReExtractInfo,
89 RejectedVideoReached,
90 SameFileError,
91 UnavailableVideoError,
92 YoutubeDLCookieProcessor,
93 YoutubeDLHandler,
94 YoutubeDLRedirectHandler,
eedb7ba5
S
95 age_restricted,
96 args_to_str,
ce02ed60 97 date_from_str,
ce02ed60 98 determine_ext,
b5559424 99 determine_protocol,
c0384f22 100 encode_compat_str,
ce02ed60 101 encodeFilename,
a06916d9 102 error_to_compat_str,
590bc6f6 103 expand_path,
90137ca4 104 filter_dict,
e29663c6 105 float_or_none,
02dbf93f 106 format_bytes,
e0fd9573 107 format_decimal_suffix,
f8271158 108 format_field,
525ef922 109 formatSeconds,
0bb322b9 110 get_domain,
c9969434 111 int_or_none,
732044af 112 iri_to_uri,
34921b43 113 join_nonempty,
ce02ed60 114 locked_file,
0202b52a 115 make_dir,
dca08720 116 make_HTTPS_handler,
8b7539d2 117 merge_headers,
3158150c 118 network_exceptions,
ec11a9f4 119 number_of_digits,
cd6fc19e 120 orderedSet,
083c9df9 121 parse_filesize,
dca08720 122 platform_name,
ce02ed60 123 preferredencoding,
eedb7ba5 124 prepend_extension,
51fb4995 125 register_socks_protocols,
3efb96a6 126 remove_terminal_sequences,
cfb56d1a 127 render_table,
eedb7ba5 128 replace_extension,
ce02ed60 129 sanitize_filename,
1bb5c511 130 sanitize_path,
dcf77cf1 131 sanitize_url,
67dda517 132 sanitized_Request,
e5660ee6 133 std_headers,
1211bb6d 134 str_or_none,
e29663c6 135 strftime_or_none,
ce02ed60 136 subtitles_filename,
819e0531 137 supports_terminal_sequences,
f2ebc5c7 138 timetuple_from_msec,
732044af 139 to_high_limit_path,
324ad820 140 traverse_obj,
6033d980 141 try_get,
29eb5174 142 url_basename,
7d1eb38a 143 variadic,
58b1f00d 144 version_tuple,
ce02ed60
PH
145 write_json_file,
146 write_string,
4f026faf 147)
f8271158 148from .version import RELEASE_GIT_HEAD, __version__
8222d8de 149
e9c0cdd3
YCH
150if compat_os_name == 'nt':
151 import ctypes
152
2459b6e1 153
86e5f3ed 154class YoutubeDL:
8222d8de
JMF
155 """YoutubeDL class.
156
157 YoutubeDL objects are the ones responsible of downloading the
158 actual video file and writing it to disk if the user has requested
159 it, among some other tasks. In most cases there should be one per
160 program. As, given a video URL, the downloader doesn't know how to
161 extract all the needed information, task that InfoExtractors do, it
162 has to pass the URL to one of them.
163
164 For this, YoutubeDL objects have a method that allows
165 InfoExtractors to be registered in a given order. When it is passed
166 a URL, the YoutubeDL object handles it to the first InfoExtractor it
167 finds that reports being able to handle it. The InfoExtractor extracts
168 all the information about the video or videos the URL refers to, and
169 YoutubeDL process the extracted information, possibly using a File
170 Downloader to download the video.
171
172 YoutubeDL objects accept a lot of parameters. In order not to saturate
173 the object constructor with arguments, it receives a dictionary of
174 options instead. These options are available through the params
175 attribute for the InfoExtractors to use. The YoutubeDL also
176 registers itself as the downloader in charge for the InfoExtractors
177 that are added to it, so this is a "mutual registration".
178
179 Available options:
180
181 username: Username for authentication purposes.
182 password: Password for authentication purposes.
180940e0 183 videopassword: Password for accessing a video.
1da50aa3
S
184 ap_mso: Adobe Pass multiple-system operator identifier.
185 ap_username: Multiple-system operator account username.
186 ap_password: Multiple-system operator account password.
8222d8de
JMF
187 usenetrc: Use netrc for authentication instead.
188 verbose: Print additional info to stdout.
189 quiet: Do not print messages to stdout.
ad8915b7 190 no_warnings: Do not print out anything for warnings.
bb66c247 191 forceprint: A dict with keys WHEN mapped to a list of templates to
192 print to stdout. The allowed keys are video or any of the
193 items in utils.POSTPROCESS_WHEN.
ca30f449 194 For compatibility, a single list is also accepted
bb66c247 195 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
196 a list of tuples with (template, filename)
53c18592 197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
8694c600 204 forcejson: Force printing info_dict as JSON.
63e0be34
PH
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
c25228e5 207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
b7b04c78 209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 211 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 212 You can also pass a function. The function takes 'ctx' as
213 argument and returns the formats to download.
214 See "build_format_selector" for an implementation
63ad4d43 215 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 216 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
217 extracting metadata even if the video is not actually
218 available for download (experimental)
0930b11f 219 format_sort: A list of fields by which to sort the video formats.
220 See "Sorting Formats" for more details.
c25228e5 221 format_sort_force: Force the given format_sort. see "Sorting Formats"
222 for more details.
08d30158 223 prefer_free_formats: Whether to prefer video formats with free containers
224 over non-free ones of same quality.
c25228e5 225 allow_multiple_video_streams: Allow multiple video streams to be merged
226 into a single file
227 allow_multiple_audio_streams: Allow multiple audio streams to be merged
228 into a single file
0ba692ac 229 check_formats Whether to test if the formats are downloadable.
9f1a1c36 230 Can be True (check all), False (check none),
231 'selected' (check selected formats),
0ba692ac 232 or None (check only if requested by extractor)
4524baf0 233 paths: Dictionary of output paths. The allowed keys are 'home'
234 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 235 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 236 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 237 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
238 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
239 restrictfilenames: Do not allow "&" and spaces in file names
240 trim_file_name: Limit length of filename (extension excluded)
4524baf0 241 windowsfilenames: Force the filenames to be windows compatible
b1940459 242 ignoreerrors: Do not stop on download/postprocessing errors.
243 Can be 'only_download' to ignore only download errors.
244 Default is 'only_download' for CLI, but False for API
26e2805c 245 skip_playlist_after_errors: Number of allowed failures until the rest of
246 the playlist is skipped
d22dec74 247 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 248 overwrites: Overwrite all video and metadata files if True,
249 overwrite only non-video files if None
250 and don't overwrite any file if False
34488702 251 For compatibility with youtube-dl,
252 "nooverwrites" may also be used instead
8222d8de
JMF
253 playliststart: Playlist item to start at.
254 playlistend: Playlist item to end at.
c14e88f0 255 playlist_items: Specific indices of playlist to download.
ff815fe6 256 playlistreverse: Download playlist items in reverse order.
75822ca7 257 playlistrandom: Download playlist items in random order.
8222d8de
JMF
258 matchtitle: Download only matching titles.
259 rejecttitle: Reject downloads for matching titles.
8bf9319e 260 logger: Log messages to a logging.Logger instance.
8222d8de 261 logtostderr: Log messages to stderr instead of stdout.
819e0531 262 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
263 writedescription: Write the video description to a .description file
264 writeinfojson: Write the video description to a .info.json file
75d43ca0 265 clean_infojson: Remove private fields from the infojson
34488702 266 getcomments: Extract video comments. This will not be written to disk
06167fbb 267 unless writeinfojson is also given
1fb07d10 268 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 269 writethumbnail: Write the thumbnail image to a file
c25228e5 270 allow_playlist_files: Whether to write playlists' description, infojson etc
271 also to disk when using the 'write*' options
ec82d85a 272 write_all_thumbnails: Write all thumbnail formats to files
732044af 273 writelink: Write an internet shortcut file, depending on the
274 current platform (.url/.webloc/.desktop)
275 writeurllink: Write a Windows internet shortcut file (.url)
276 writewebloclink: Write a macOS internet shortcut file (.webloc)
277 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 278 writesubtitles: Write the video subtitles to a file
741dd8ea 279 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 280 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 281 Downloads all the subtitles of the video
0b7f3118 282 (requires writesubtitles or writeautomaticsub)
8222d8de 283 listsubtitles: Lists all available subtitles for the video
a504ced0 284 subtitlesformat: The format code for subtitles
c32b0aab 285 subtitleslangs: List of languages of the subtitles to download (can be regex).
286 The list may contain "all" to refer to all the available
287 subtitles. The language can be prefixed with a "-" to
288 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
289 keepvideo: Keep the video file after post-processing
290 daterange: A DateRange object, download only if the upload_date is in the range.
291 skip_download: Skip the actual download of the video file
c35f9e72 292 cachedir: Location of the cache files in the filesystem.
a0e07d31 293 False to disable filesystem cache.
47192f92 294 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
295 age_limit: An integer representing the user's age in years.
296 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
297 min_views: An integer representing the minimum view count the video
298 must have in order to not be skipped.
299 Videos without view count information are always
300 downloaded. None for no limit.
301 max_views: An integer representing the maximum view count.
302 Videos that are more popular than that are not
303 downloaded.
304 Videos without view count information are always
305 downloaded. None for no limit.
306 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
307 Videos already present in the file are not downloaded
308 again.
8a51f564 309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_on_reject: Stop the download process when encountering a video that
312 has been filtered out.
b222c271 313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
8a51f564 315 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/pathfrom where cookies are loaded, and the name of the
318 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 319 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
320 support RFC 5746 secure renegotiation
f59f5ef8 321 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
322 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
323 At the moment, this is only supported by YouTube.
8b7539d2 324 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 325 proxy: URL of the proxy server to use
38cce791 326 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 327 on geo-restricted sites.
e344693b 328 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
329 bidi_workaround: Work around buggy terminals without bidirectional text
330 support, using fridibi
a0ddb8a2 331 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 332 include_ads: Download ads as well (deprecated)
04b4d394
PH
333 default_search: Prepend this string if an input url is not valid.
334 'auto' for elaborate guessing
62fec3b2 335 encoding: Use this encoding instead of the system-specified.
e8ee972c 336 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
337 Pass in 'in_playlist' to only show this behavior for
338 playlist items.
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
b5ae35ee 348 post_hooks: Deprecated - Register a custom postprocessor instead
349 A list of functions that get called as the final step
ab8e5e51
AM
350 for each video file, after all postprocessors have been
351 called. The filename will be passed as the only argument.
71b640cc
PH
352 progress_hooks: A list of functions that get called on download
353 progress, with a dictionary with the entries
5cda4eda 354 * status: One of "downloading", "error", or "finished".
ee69b99a 355 Check this first and ignore unknown values.
3ba7740d 356 * info_dict: The extracted info_dict
71b640cc 357
5cda4eda 358 If status is one of "downloading", or "finished", the
ee69b99a
PH
359 following properties may also be present:
360 * filename: The final filename (always present)
5cda4eda 361 * tmpfilename: The filename we're currently writing to
71b640cc
PH
362 * downloaded_bytes: Bytes on disk
363 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
364 * total_bytes_estimate: Guess of the eventual file size,
365 None if unavailable.
366 * elapsed: The number of seconds since download started.
71b640cc
PH
367 * eta: The estimated time in seconds, None if unknown
368 * speed: The download speed in bytes/second, None if
369 unknown
5cda4eda
PH
370 * fragment_index: The counter of the currently
371 downloaded video fragment.
372 * fragment_count: The number of fragments (= individual
373 files that will be merged)
71b640cc
PH
374
375 Progress hooks are guaranteed to be called at least once
376 (with status "finished") if the download is successful.
819e0531 377 postprocessor_hooks: A list of functions that get called on postprocessing
378 progress, with a dictionary with the entries
379 * status: One of "started", "processing", or "finished".
380 Check this first and ignore unknown values.
381 * postprocessor: Name of the postprocessor
382 * info_dict: The extracted info_dict
383
384 Progress hooks are guaranteed to be called at least twice
385 (with status "started" and "finished") if the processing is successful.
45598f15 386 merge_output_format: Extension to use when merging formats.
6b591b29 387 final_ext: Expected final extension; used to detect when the file was
59a7a13e 388 already downloaded and converted
6271f1ca
PH
389 fixup: Automatically correct known faults of the file.
390 One of:
391 - "never": do nothing
392 - "warn": only emit a warning
393 - "detect_or_warn": check whether we can do anything
62cd676c 394 about it, warn otherwise (default)
504f20dd 395 source_address: Client-side IP address to bind to.
6ec6cb4e 396 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 397 yt-dlp servers for debugging. (BROKEN)
1cf376f5 398 sleep_interval_requests: Number of seconds to sleep between requests
399 during extraction
7aa589a5
S
400 sleep_interval: Number of seconds to sleep before each download when
401 used alone or a lower bound of a range for randomized
402 sleep before each download (minimum possible number
403 of seconds to sleep) when used along with
404 max_sleep_interval.
405 max_sleep_interval:Upper bound of a range for randomized sleep before each
406 download (maximum possible number of seconds to sleep).
407 Must only be used along with sleep_interval.
408 Actual sleep time will be a random float from range
409 [sleep_interval; max_sleep_interval].
1cf376f5 410 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
411 listformats: Print an overview of available video formats and exit.
412 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 413 match_filter: A function that gets called for every video with the signature
414 (info_dict, *, incomplete: bool) -> Optional[str]
415 For backward compatibility with youtube-dl, the signature
416 (info_dict) -> Optional[str] is also allowed.
417 - If it returns a message, the video is ignored.
418 - If it returns None, the video is downloaded.
419 - If it returns utils.NO_DEFAULT, the user is interactively
420 asked whether to download the video.
347de493 421 match_filter_func in utils.py is one example for this.
7e5db8c9 422 no_color: Do not emit color codes in output.
0a840f58 423 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 424 HTTP header
0a840f58 425 geo_bypass_country:
773f291d
S
426 Two-letter ISO 3166-2 country code that will be used for
427 explicit geographic restriction bypassing via faking
504f20dd 428 X-Forwarded-For HTTP header
5f95927a
S
429 geo_bypass_ip_block:
430 IP range in CIDR notation that will be used similarly to
504f20dd 431 geo_bypass_country
71b640cc 432
85729c51 433 The following options determine which downloader is picked:
52a8a1e1 434 external_downloader: A dictionary of protocol keys and the executable of the
435 external downloader to use for it. The allowed protocols
436 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
437 Set the value to 'native' to use the native downloader
438 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
439 or {'m3u8': 'ffmpeg'} instead.
440 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
441 if True, otherwise use ffmpeg/avconv if False, otherwise
442 use downloader suggested by extractor if None.
53ed7066 443 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 444 The following options do not work when used through the API:
b5ae35ee 445 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 446 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 447 Refer __init__.py for their implementation
819e0531 448 progress_template: Dictionary of templates for progress outputs.
449 Allowed keys are 'download', 'postprocess',
450 'download-title' (console title) and 'postprocess-title'.
451 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 452
8222d8de 453 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 454 the downloader (see yt_dlp/downloader/common.py):
51d9739f 455 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
456 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
457 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 458 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
459
460 The following options are used by the post processors:
d4a24f40 461 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 462 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
463 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
464 to the binary or its containing directory.
43820c03 465 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 466 and a list of additional command-line arguments for the
467 postprocessor/executable. The dict can also have "PP+EXE" keys
468 which are used when the given exe is used by the given PP.
469 Use 'default' as the name for arguments to passed to all PP
470 For compatibility with youtube-dl, a single list of args
471 can also be used
e409895f 472
473 The following options are used by the extractors:
62bff2c1 474 extractor_retries: Number of times to retry for known errors
475 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 476 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 477 discontinuities such as ad breaks (default: False)
5d3a0e79 478 extractor_args: A dictionary of arguments to be passed to the extractors.
479 See "EXTRACTOR ARGUMENTS" for details.
480 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 481 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 482 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
483 If True (default), DASH manifests and related
62bff2c1 484 data will be downloaded and processed by extractor.
485 You can reduce network I/O by disabling it if you don't
486 care about DASH. (only for youtube)
5d3a0e79 487 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
488 If True (default), HLS manifests and related
62bff2c1 489 data will be downloaded and processed by extractor.
490 You can reduce network I/O by disabling it if you don't
491 care about HLS. (only for youtube)
8222d8de
JMF
492 """
493
86e5f3ed 494 _NUMERIC_FIELDS = {
c9969434 495 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 496 'timestamp', 'release_timestamp',
c9969434
S
497 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
498 'average_rating', 'comment_count', 'age_limit',
499 'start_time', 'end_time',
500 'chapter_number', 'season_number', 'episode_number',
501 'track_number', 'disc_number', 'release_year',
86e5f3ed 502 }
c9969434 503
6db9c4d5 504 _format_fields = {
505 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 506 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
6db9c4d5 507 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
508 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
509 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
510 'preference', 'language', 'language_preference', 'quality', 'source_preference',
511 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
512 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
513 }
48ee10ee 514 _format_selection_exts = {
515 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
516 'video': {'mp4', 'flv', 'webm', '3gp'},
517 'storyboards': {'mhtml'},
518 }
519
3511266b 520 def __init__(self, params=None, auto_init=True):
883d4b1e 521 """Create a FileDownloader object with the given options.
522 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 523 Set to 'no_verbose_header' to not print the header
883d4b1e 524 """
e9f9a10f
JMF
525 if params is None:
526 params = {}
592b7485 527 self.params = params
8b7491c8 528 self._ies = {}
56c73665 529 self._ies_instances = {}
1e43a6f7 530 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 531 self._printed_messages = set()
1cf376f5 532 self._first_webpage_request = True
ab8e5e51 533 self._post_hooks = []
933605d7 534 self._progress_hooks = []
819e0531 535 self._postprocessor_hooks = []
8222d8de
JMF
536 self._download_retcode = 0
537 self._num_downloads = 0
9c906919 538 self._num_videos = 0
592b7485 539 self._playlist_level = 0
540 self._playlist_urls = set()
a0e07d31 541 self.cache = Cache(self)
34308b30 542
819e0531 543 windows_enable_vt_mode()
cf4f42cb 544 self._out_files = {
545 'error': sys.stderr,
546 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
547 'console': None if compat_os_name == 'nt' else next(
548 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
549 }
550 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 551 self._allow_colors = {
cf4f42cb 552 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
553 for type_ in ('screen', 'error')
ec11a9f4 554 }
819e0531 555
a61f4b28 556 if sys.version_info < (3, 6):
557 self.report_warning(
0181adef 558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 559
88acdbc2 560 if self.params.get('allow_unplayable_formats'):
561 self.report_warning(
ec11a9f4 562 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
ec11a9f4 565 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 566
be5df5ee
S
567 def check_deprecated(param, option, suggestion):
568 if self.params.get(param) is not None:
86e5f3ed 569 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
570 return True
571 return False
572
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
574 if self.params.get('geo_verification_proxy') is None:
575 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
576
0d1bb027 577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 580
49a57e70 581 for msg in self.params.get('_warnings', []):
0d1bb027 582 self.report_warning(msg)
ee8dd27a 583 for msg in self.params.get('_deprecation_warnings', []):
584 self.deprecation_warning(msg)
0d1bb027 585
ec11a9f4 586 if 'list-formats' in self.params.get('compat_opts', []):
587 self.params['listformats_table'] = False
588
b5ae35ee 589 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 594 elif self.params.get('overwrites') is None:
595 self.params.pop('overwrites', None)
b868936c 596 else:
597 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 598
455a15e2 599 self.params.setdefault('forceprint', {})
600 self.params.setdefault('print_to_file', {})
bb66c247 601
602 # Compatibility with older syntax
ca30f449 603 if not isinstance(params['forceprint'], dict):
455a15e2 604 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 605
455a15e2 606 if self.params.get('bidi_workaround', False):
1c088fa8
PH
607 try:
608 import pty
609 master, slave = pty.openpty()
003c69a8 610 width = compat_get_terminal_size().columns
1c088fa8
PH
611 if width is None:
612 width_args = []
613 else:
614 width_args = ['-w', str(width)]
5d681e96 615 sp_kwargs = dict(
1c088fa8
PH
616 stdin=subprocess.PIPE,
617 stdout=slave,
cf4f42cb 618 stderr=self._out_files['error'])
5d681e96 619 try:
d3c93ec2 620 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 621 except OSError:
d3c93ec2 622 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 623 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 624 except OSError as ose:
66e7ace1 625 if ose.errno == errno.ENOENT:
49a57e70 626 self.report_warning(
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
629 else:
630 raise
0783b09b 631
97ec5bc5 632 if auto_init:
633 if auto_init != 'no_verbose_header':
634 self.print_debug_header()
635 self.add_default_info_extractors()
636
3089bc74
S
637 if (sys.platform != 'win32'
638 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 639 and not self.params.get('restrictfilenames', False)):
e9137224 640 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 641 self.report_warning(
6febd1c1 642 'Assuming --restrict-filenames since file system encoding '
1b725173 643 'cannot encode all characters. '
6febd1c1 644 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 645 self.params['restrictfilenames'] = True
34308b30 646
de6000d9 647 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 648
187986a8 649 # Creating format selector here allows us to catch syntax errors before the extraction
650 self.format_selector = (
fa9f30b8 651 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 652 else self.params['format'] if callable(self.params['format'])
187986a8 653 else self.build_format_selector(self.params['format']))
654
8b7539d2 655 # Set http_headers defaults according to std_headers
656 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
657
013b50b7 658 hooks = {
659 'post_hooks': self.add_post_hook,
660 'progress_hooks': self.add_progress_hook,
661 'postprocessor_hooks': self.add_postprocessor_hook,
662 }
663 for opt, fn in hooks.items():
664 for ph in self.params.get(opt, []):
665 fn(ph)
71b640cc 666
5bfc8bee 667 for pp_def_raw in self.params.get('postprocessors', []):
668 pp_def = dict(pp_def_raw)
669 when = pp_def.pop('when', 'post_process')
670 self.add_post_processor(
f9934b96 671 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 672 when=when)
673
97ec5bc5 674 self._setup_opener()
51fb4995
YCH
675 register_socks_protocols()
676
ed39cac5 677 def preload_download_archive(fn):
678 """Preload the archive, if any is specified"""
679 if fn is None:
680 return False
49a57e70 681 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 682 try:
683 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
684 for line in archive_file:
685 self.archive.add(line.strip())
86e5f3ed 686 except OSError as ioe:
ed39cac5 687 if ioe.errno != errno.ENOENT:
688 raise
689 return False
690 return True
691
692 self.archive = set()
693 preload_download_archive(self.params.get('download_archive'))
694
7d4111ed
PH
695 def warn_if_short_id(self, argv):
696 # short YouTube ID starting with dash?
697 idxs = [
698 i for i, a in enumerate(argv)
699 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
700 if idxs:
701 correct_argv = (
7a5c1cfe 702 ['yt-dlp']
3089bc74
S
703 + [a for i, a in enumerate(argv) if i not in idxs]
704 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
705 )
706 self.report_warning(
707 'Long argument string detected. '
49a57e70 708 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
709 args_to_str(correct_argv))
710
8222d8de
JMF
711 def add_info_extractor(self, ie):
712 """Add an InfoExtractor object to the end of the list."""
8b7491c8 713 ie_key = ie.ie_key()
714 self._ies[ie_key] = ie
e52d7f85 715 if not isinstance(ie, type):
8b7491c8 716 self._ies_instances[ie_key] = ie
e52d7f85 717 ie.set_downloader(self)
8222d8de 718
8b7491c8 719 def _get_info_extractor_class(self, ie_key):
720 ie = self._ies.get(ie_key)
721 if ie is None:
722 ie = get_info_extractor(ie_key)
723 self.add_info_extractor(ie)
724 return ie
725
56c73665
JMF
726 def get_info_extractor(self, ie_key):
727 """
728 Get an instance of an IE with name ie_key, it will try to get one from
729 the _ies list, if there's no instance it will create a new one and add
730 it to the extractor list.
731 """
732 ie = self._ies_instances.get(ie_key)
733 if ie is None:
734 ie = get_info_extractor(ie_key)()
735 self.add_info_extractor(ie)
736 return ie
737
023fa8c4
JMF
738 def add_default_info_extractors(self):
739 """
740 Add the InfoExtractors returned by gen_extractors to the end of the list
741 """
e52d7f85 742 for ie in gen_extractor_classes():
023fa8c4
JMF
743 self.add_info_extractor(ie)
744
56d868db 745 def add_post_processor(self, pp, when='post_process'):
8222d8de 746 """Add a PostProcessor object to the end of the chain."""
5bfa4862 747 self._pps[when].append(pp)
8222d8de
JMF
748 pp.set_downloader(self)
749
ab8e5e51
AM
750 def add_post_hook(self, ph):
751 """Add the post hook"""
752 self._post_hooks.append(ph)
753
933605d7 754 def add_progress_hook(self, ph):
819e0531 755 """Add the download progress hook"""
933605d7 756 self._progress_hooks.append(ph)
8ab470f1 757
819e0531 758 def add_postprocessor_hook(self, ph):
759 """Add the postprocessing progress hook"""
760 self._postprocessor_hooks.append(ph)
5bfc8bee 761 for pps in self._pps.values():
762 for pp in pps:
763 pp.add_progress_hook(ph)
819e0531 764
1c088fa8 765 def _bidi_workaround(self, message):
5d681e96 766 if not hasattr(self, '_output_channel'):
1c088fa8
PH
767 return message
768
5d681e96 769 assert hasattr(self, '_output_process')
11b85ce6 770 assert isinstance(message, compat_str)
6febd1c1
PH
771 line_count = message.count('\n') + 1
772 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 773 self._output_process.stdin.flush()
6febd1c1 774 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 775 for _ in range(line_count))
6febd1c1 776 return res[:-len('\n')]
1c088fa8 777
b35496d8 778 def _write_string(self, message, out=None, only_once=False):
779 if only_once:
780 if message in self._printed_messages:
781 return
782 self._printed_messages.add(message)
783 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 784
cf4f42cb 785 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 786 """Print message to stdout"""
cf4f42cb 787 if quiet is not None:
ae6a1b95 788 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 789 self._write_string(
790 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
791 self._out_files['print'])
792
793 def to_screen(self, message, skip_eol=False, quiet=None):
794 """Print message to screen if not in quiet mode"""
8bf9319e 795 if self.params.get('logger'):
43afe285 796 self.params['logger'].debug(message)
cf4f42cb 797 return
798 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
799 return
800 self._write_string(
801 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
802 self._out_files['screen'])
8222d8de 803
b35496d8 804 def to_stderr(self, message, only_once=False):
0760b0a7 805 """Print message to stderr"""
11b85ce6 806 assert isinstance(message, compat_str)
8bf9319e 807 if self.params.get('logger'):
43afe285
IB
808 self.params['logger'].error(message)
809 else:
cf4f42cb 810 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
811
812 def _send_console_code(self, code):
813 if compat_os_name == 'nt' or not self._out_files['console']:
814 return
815 self._write_string(code, self._out_files['console'])
8222d8de 816
1e5b9a95
PH
817 def to_console_title(self, message):
818 if not self.params.get('consoletitle', False):
819 return
3efb96a6 820 message = remove_terminal_sequences(message)
4bede0d8
C
821 if compat_os_name == 'nt':
822 if ctypes.windll.kernel32.GetConsoleWindow():
823 # c_wchar_p() might not be necessary if `message` is
824 # already of type unicode()
825 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 826 else:
827 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 828
bdde425c 829 def save_console_title(self):
cf4f42cb 830 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 831 return
592b7485 832 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
833
834 def restore_console_title(self):
cf4f42cb 835 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 836 return
592b7485 837 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
838
839 def __enter__(self):
840 self.save_console_title()
841 return self
842
843 def __exit__(self, *args):
844 self.restore_console_title()
f89197d7 845
dca08720 846 if self.params.get('cookiefile') is not None:
1bab3437 847 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 848
fa9f30b8 849 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
850 """Determine action to take when a download problem appears.
851
852 Depending on if the downloader has been configured to ignore
853 download errors or not, this method may throw an exception or
854 not when errors are found, after printing the message.
855
fa9f30b8 856 @param tb If given, is additional traceback information
857 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
858 """
859 if message is not None:
860 self.to_stderr(message)
861 if self.params.get('verbose'):
862 if tb is None:
863 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 864 tb = ''
8222d8de 865 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 866 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 867 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
868 else:
869 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 870 tb = ''.join(tb_data)
c19bc311 871 if tb:
872 self.to_stderr(tb)
fa9f30b8 873 if not is_error:
874 return
b1940459 875 if not self.params.get('ignoreerrors'):
8222d8de
JMF
876 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
877 exc_info = sys.exc_info()[1].exc_info
878 else:
879 exc_info = sys.exc_info()
880 raise DownloadError(message, exc_info)
881 self._download_retcode = 1
882
19a03940 883 Styles = Namespace(
884 HEADERS='yellow',
885 EMPHASIS='light blue',
492272fe 886 FILENAME='green',
19a03940 887 ID='green',
888 DELIM='blue',
889 ERROR='red',
890 WARNING='yellow',
891 SUPPRESS='light black',
892 )
ec11a9f4 893
7578d77d 894 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 895 text = str(text)
ec11a9f4 896 if test_encoding:
897 original_text = text
5c104538 898 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
899 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 900 text = text.encode(encoding, 'ignore').decode(encoding)
901 if fallback is not None and text != original_text:
902 text = fallback
7578d77d 903 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 904
905 def _format_screen(self, *args, **kwargs):
7578d77d 906 return self._format_text(
cf4f42cb 907 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 908
909 def _format_err(self, *args, **kwargs):
7578d77d 910 return self._format_text(
cf4f42cb 911 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 912
c84aeac6 913 def report_warning(self, message, only_once=False):
8222d8de
JMF
914 '''
915 Print the message to stderr, it will be prefixed with 'WARNING:'
916 If stderr is a tty file the 'WARNING:' will be colored
917 '''
6d07ce01
JMF
918 if self.params.get('logger') is not None:
919 self.params['logger'].warning(message)
8222d8de 920 else:
ad8915b7
PH
921 if self.params.get('no_warnings'):
922 return
ec11a9f4 923 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 924
ee8dd27a 925 def deprecation_warning(self, message):
926 if self.params.get('logger') is not None:
a44ca5a4 927 self.params['logger'].warning(f'DeprecationWarning: {message}')
ee8dd27a 928 else:
929 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
930
fa9f30b8 931 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
932 '''
933 Do the same as trouble, but prefixes the message with 'ERROR:', colored
934 in red if stderr is a tty file.
935 '''
fa9f30b8 936 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 937
b35496d8 938 def write_debug(self, message, only_once=False):
0760b0a7 939 '''Log debug message or Print message to stderr'''
940 if not self.params.get('verbose', False):
941 return
942 message = '[debug] %s' % message
943 if self.params.get('logger'):
944 self.params['logger'].debug(message)
945 else:
b35496d8 946 self.to_stderr(message, only_once)
0760b0a7 947
8222d8de
JMF
948 def report_file_already_downloaded(self, file_name):
949 """Report file has already been fully downloaded."""
950 try:
6febd1c1 951 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 952 except UnicodeEncodeError:
6febd1c1 953 self.to_screen('[download] The file has already been downloaded')
8222d8de 954
0c3d0f51 955 def report_file_delete(self, file_name):
956 """Report that existing file will be deleted."""
957 try:
c25228e5 958 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 959 except UnicodeEncodeError:
c25228e5 960 self.to_screen('Deleting existing file')
0c3d0f51 961
319b6059 962 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 963 has_drm = info.get('_has_drm')
319b6059 964 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
965 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
966 if forced or not ignored:
1151c407 967 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 968 expected=has_drm or ignored or expected)
88acdbc2 969 else:
970 self.report_warning(msg)
971
de6000d9 972 def parse_outtmpl(self):
973 outtmpl_dict = self.params.get('outtmpl', {})
974 if not isinstance(outtmpl_dict, dict):
975 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 976 # Remove spaces in the default template
977 if self.params.get('restrictfilenames'):
978 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
979 else:
980 sanitize = lambda x: x
de6000d9 981 outtmpl_dict.update({
71ce444a 982 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 983 if outtmpl_dict.get(k) is None})
86e5f3ed 984 for _, val in outtmpl_dict.items():
de6000d9 985 if isinstance(val, bytes):
86e5f3ed 986 self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
de6000d9 987 return outtmpl_dict
988
21cd8fae 989 def get_output_path(self, dir_type='', filename=None):
990 paths = self.params.get('paths', {})
991 assert isinstance(paths, dict)
992 path = os.path.join(
993 expand_path(paths.get('home', '').strip()),
994 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
995 filename or '')
21cd8fae 996 return sanitize_path(path, force=self.params.get('windowsfilenames'))
997
76a264ac 998 @staticmethod
901130bb 999 def _outtmpl_expandpath(outtmpl):
1000 # expand_path translates '%%' into '%' and '$$' into '$'
1001 # correspondingly that is not what we want since we need to keep
1002 # '%%' intact for template dict substitution step. Working around
1003 # with boundary-alike separator hack.
1004 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
86e5f3ed 1005 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1006
1007 # outtmpl should be expand_path'ed before template dict substitution
1008 # because meta fields may contain env variables we don't want to
1009 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1010 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1011 return expand_path(outtmpl).replace(sep, '')
1012
1013 @staticmethod
1014 def escape_outtmpl(outtmpl):
1015 ''' Escape any remaining strings like %s, %abc% etc. '''
1016 return re.sub(
1017 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1018 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1019 outtmpl)
1020
1021 @classmethod
1022 def validate_outtmpl(cls, outtmpl):
76a264ac 1023 ''' @return None or Exception object '''
7d1eb38a 1024 outtmpl = re.sub(
37893bb0 1025 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1026 lambda mobj: f'{mobj.group(0)[:-1]}s',
1027 cls._outtmpl_expandpath(outtmpl))
76a264ac 1028 try:
7d1eb38a 1029 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1030 return None
1031 except ValueError as err:
1032 return err
1033
03b4de72 1034 @staticmethod
1035 def _copy_infodict(info_dict):
1036 info_dict = dict(info_dict)
09b49e1f 1037 info_dict.pop('__postprocessors', None)
03b4de72 1038 return info_dict
1039
e0fd9573 1040 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1041 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1042 @param sanitize Whether to sanitize the output as a filename.
1043 For backward compatibility, a function can also be passed
1044 """
1045
6e84b215 1046 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1047
03b4de72 1048 info_dict = self._copy_infodict(info_dict)
752cda38 1049 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1050 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1051 if info_dict.get('duration', None) is not None
1052 else None)
1d485a1a 1053 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1054 info_dict['video_autonumber'] = self._num_videos
752cda38 1055 if info_dict.get('resolution') is None:
1056 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1057
e6f21b3d 1058 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1059 # of %(field)s to %(field)0Nd for backward compatibility
1060 field_size_compat_map = {
0a5a191a 1061 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1062 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1063 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1064 }
752cda38 1065
385a27fa 1066 TMPL_DICT = {}
37893bb0 1067 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1068 MATH_FUNCTIONS = {
1069 '+': float.__add__,
1070 '-': float.__sub__,
1071 }
e625be0d 1072 # Field is of the form key1.key2...
1073 # where keys (except first) can be string, int or slice
2b8a2973 1074 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1d485a1a 1075 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1076 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1077 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1078 (?P<negate>-)?
1d485a1a 1079 (?P<fields>{FIELD_RE})
1080 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1081 (?:>(?P<strf_format>.+?))?
34baa9fd 1082 (?P<remaining>
1083 (?P<alternate>(?<!\\),[^|&)]+)?
1084 (?:&(?P<replacement>.*?))?
1085 (?:\|(?P<default>.*?))?
1d485a1a 1086 )$''')
752cda38 1087
2b8a2973 1088 def _traverse_infodict(k):
1089 k = k.split('.')
1090 if k[0] == '':
1091 k.pop(0)
1092 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1093
752cda38 1094 def get_value(mdict):
1095 # Object traversal
2b8a2973 1096 value = _traverse_infodict(mdict['fields'])
752cda38 1097 # Negative
1098 if mdict['negate']:
1099 value = float_or_none(value)
1100 if value is not None:
1101 value *= -1
1102 # Do maths
385a27fa 1103 offset_key = mdict['maths']
1104 if offset_key:
752cda38 1105 value = float_or_none(value)
1106 operator = None
385a27fa 1107 while offset_key:
1108 item = re.match(
1109 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1110 offset_key).group(0)
1111 offset_key = offset_key[len(item):]
1112 if operator is None:
752cda38 1113 operator = MATH_FUNCTIONS[item]
385a27fa 1114 continue
1115 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1116 offset = float_or_none(item)
1117 if offset is None:
2b8a2973 1118 offset = float_or_none(_traverse_infodict(item))
385a27fa 1119 try:
1120 value = operator(value, multiplier * offset)
1121 except (TypeError, ZeroDivisionError):
1122 return None
1123 operator = None
752cda38 1124 # Datetime formatting
1125 if mdict['strf_format']:
7c37ff97 1126 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1127
1128 return value
1129
b868936c 1130 na = self.params.get('outtmpl_na_placeholder', 'NA')
1131
e0fd9573 1132 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1133 return sanitize_filename(str(value), restricted=restricted, is_id=(
1134 bool(re.search(r'(^|[_.])id(\.|$)', key))
1135 if 'filename-sanitization' in self.params.get('compat_opts', [])
1136 else NO_DEFAULT))
e0fd9573 1137
1138 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139 sanitize = bool(sanitize)
1140
6e84b215 1141 def _dumpjson_default(obj):
1142 if isinstance(obj, (set, LazyList)):
1143 return list(obj)
adbc4ec4 1144 return repr(obj)
6e84b215 1145
752cda38 1146 def create_key(outer_mobj):
1147 if not outer_mobj.group('has_key'):
b836dc94 1148 return outer_mobj.group(0)
752cda38 1149 key = outer_mobj.group('key')
752cda38 1150 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1151 initial_field = mobj.group('fields') if mobj else ''
e978789f 1152 value, replacement, default = None, None, na
7c37ff97 1153 while mobj:
e625be0d 1154 mobj = mobj.groupdict()
7c37ff97 1155 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1156 value = get_value(mobj)
e978789f 1157 replacement = mobj['replacement']
7c37ff97 1158 if value is None and mobj['alternate']:
34baa9fd 1159 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1160 else:
1161 break
752cda38 1162
b868936c 1163 fmt = outer_mobj.group('format')
752cda38 1164 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1165 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1166
e978789f 1167 value = default if value is None else value if replacement is None else replacement
752cda38 1168
4476d2c7 1169 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1170 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1171 if fmt[-1] == 'l': # list
4476d2c7 1172 delim = '\n' if '#' in flags else ', '
9e907ebd 1173 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1174 elif fmt[-1] == 'j': # json
4476d2c7 1175 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1176 elif fmt[-1] == 'q': # quoted
4476d2c7 1177 value = map(str, variadic(value) if '#' in flags else [value])
1178 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1179 elif fmt[-1] == 'B': # bytes
86e5f3ed 1180 value = f'%{str_fmt}'.encode() % str(value).encode('utf-8')
f5aa5cfb 1181 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1182 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1183 value, fmt = unicodedata.normalize(
1184 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1185 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1186 value), str_fmt
e0fd9573 1187 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1188 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190 factor=1024 if '#' in flags else 1000)
37893bb0 1191 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1192 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1193 elif fmt[-1] == 'c':
524e2e4f 1194 if value:
1195 value = str(value)[0]
76a264ac 1196 else:
524e2e4f 1197 fmt = str_fmt
76a264ac 1198 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1199 value = float_or_none(value)
752cda38 1200 if value is None:
1201 value, fmt = default, 's'
901130bb 1202
752cda38 1203 if sanitize:
1204 if fmt[-1] == 'r':
1205 # If value is an object, sanitize might convert it to a string
1206 # So we convert it to repr first
7d1eb38a 1207 value, fmt = repr(value), str_fmt
639f1cea 1208 if fmt[-1] in 'csr':
e0fd9573 1209 value = sanitizer(initial_field, value)
901130bb 1210
b868936c 1211 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1212 TMPL_DICT[key] = value
b868936c 1213 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1214
385a27fa 1215 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1216
819e0531 1217 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219 return self.escape_outtmpl(outtmpl) % info_dict
1220
5127e92a 1221 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1222 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1223 if outtmpl is None:
1224 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
8222d8de 1225 try:
5127e92a 1226 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1227 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1228 if not filename:
1229 return None
15da37c7 1230
5127e92a 1231 if tmpl_type in ('', 'temp'):
6a0546e3 1232 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1233 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1234 filename = replace_extension(filename, ext, final_ext)
5127e92a 1235 elif tmpl_type:
6a0546e3 1236 force_ext = OUTTMPL_TYPES[tmpl_type]
1237 if force_ext:
1238 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1239
bdc3fd2f
U
1240 # https://github.com/blackjack4494/youtube-dlc/issues/85
1241 trim_file_name = self.params.get('trim_file_name', False)
1242 if trim_file_name:
5c22c63d 1243 no_ext, *ext = filename.rsplit('.', 2)
1244 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1245
0202b52a 1246 return filename
8222d8de 1247 except ValueError as err:
6febd1c1 1248 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1249 return None
1250
5127e92a 1251 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1252 """Generate the output filename"""
1253 if outtmpl:
1254 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1255 dir_type = None
1256 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1257 if not filename and dir_type not in ('', 'temp'):
1258 return ''
de6000d9 1259
c84aeac6 1260 if warn:
21cd8fae 1261 if not self.params.get('paths'):
de6000d9 1262 pass
1263 elif filename == '-':
c84aeac6 1264 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1265 elif os.path.isabs(filename):
c84aeac6 1266 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1267 if filename == '-' or not filename:
1268 return filename
1269
21cd8fae 1270 return self.get_output_path(dir_type, filename)
0202b52a 1271
120fe513 1272 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1273 """ Returns None if the file should be downloaded """
8222d8de 1274
c77495e3 1275 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1276
8b0d7497 1277 def check_filter():
8b0d7497 1278 if 'title' in info_dict:
1279 # This can happen when we're just evaluating the playlist
1280 title = info_dict['title']
1281 matchtitle = self.params.get('matchtitle', False)
1282 if matchtitle:
1283 if not re.search(matchtitle, title, re.IGNORECASE):
1284 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1285 rejecttitle = self.params.get('rejecttitle', False)
1286 if rejecttitle:
1287 if re.search(rejecttitle, title, re.IGNORECASE):
1288 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1289 date = info_dict.get('upload_date')
1290 if date is not None:
1291 dateRange = self.params.get('daterange', DateRange())
1292 if date not in dateRange:
86e5f3ed 1293 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1294 view_count = info_dict.get('view_count')
1295 if view_count is not None:
1296 min_views = self.params.get('min_views')
1297 if min_views is not None and view_count < min_views:
1298 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1299 max_views = self.params.get('max_views')
1300 if max_views is not None and view_count > max_views:
1301 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1302 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1303 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1304
8f18aca8 1305 match_filter = self.params.get('match_filter')
1306 if match_filter is not None:
1307 try:
1308 ret = match_filter(info_dict, incomplete=incomplete)
1309 except TypeError:
1310 # For backward compatibility
1311 ret = None if incomplete else match_filter(info_dict)
492272fe 1312 if ret is NO_DEFAULT:
1313 while True:
1314 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1315 reply = input(self._format_screen(
1316 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1317 if reply in {'y', ''}:
1318 return None
1319 elif reply == 'n':
1320 return f'Skipping {video_title}'
1321 return True
1322 elif ret is not None:
8f18aca8 1323 return ret
8b0d7497 1324 return None
1325
c77495e3 1326 if self.in_download_archive(info_dict):
1327 reason = '%s has already been recorded in the archive' % video_title
1328 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1329 else:
1330 reason = check_filter()
1331 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1332 if reason is not None:
120fe513 1333 if not silent:
1334 self.to_screen('[download] ' + reason)
c77495e3 1335 if self.params.get(break_opt, False):
1336 raise break_err()
8b0d7497 1337 return reason
fe7e0c98 1338
b6c45014
JMF
1339 @staticmethod
1340 def add_extra_info(info_dict, extra_info):
1341 '''Set the keys from extra_info in info dict if they are missing'''
1342 for key, value in extra_info.items():
1343 info_dict.setdefault(key, value)
1344
409e1828 1345 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1346 process=True, force_generic_extractor=False):
41d1cca3 1347 """
1348 Return a list with a dictionary for each video extracted.
1349
1350 Arguments:
1351 url -- URL to extract
1352
1353 Keyword arguments:
1354 download -- whether to download videos during extraction
1355 ie_key -- extractor key hint
1356 extra_info -- dictionary containing the extra values to add to each result
1357 process -- whether to resolve all unresolved references (URLs, playlist items),
1358 must be True for download to work.
1359 force_generic_extractor -- force using the generic extractor
1360 """
fe7e0c98 1361
409e1828 1362 if extra_info is None:
1363 extra_info = {}
1364
61aa5ba3 1365 if not ie_key and force_generic_extractor:
d22dec74
S
1366 ie_key = 'Generic'
1367
8222d8de 1368 if ie_key:
8b7491c8 1369 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1370 else:
1371 ies = self._ies
1372
8b7491c8 1373 for ie_key, ie in ies.items():
8222d8de
JMF
1374 if not ie.suitable(url):
1375 continue
1376
1377 if not ie.working():
6febd1c1
PH
1378 self.report_warning('The program functionality for this site has been marked as broken, '
1379 'and will probably not work.')
8222d8de 1380
1151c407 1381 temp_id = ie.get_temp_id(url)
a0566bbf 1382 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1383 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1384 if self.params.get('break_on_existing', False):
1385 raise ExistingVideoReached()
a0566bbf 1386 break
8b7491c8 1387 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1388 else:
1389 self.report_error('no suitable InfoExtractor for URL %s' % url)
1390
8e5fecc8 1391 def __handle_extraction_exceptions(func):
b5ae35ee 1392 @functools.wraps(func)
a0566bbf 1393 def wrapper(self, *args, **kwargs):
6da22e7d 1394 while True:
1395 try:
1396 return func(self, *args, **kwargs)
1397 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1398 raise
6da22e7d 1399 except ReExtractInfo as e:
1400 if e.expected:
1401 self.to_screen(f'{e}; Re-extracting data')
1402 else:
1403 self.to_stderr('\r')
1404 self.report_warning(f'{e}; Re-extracting data')
1405 continue
1406 except GeoRestrictedError as e:
1407 msg = e.msg
1408 if e.countries:
1409 msg += '\nThis video is available in %s.' % ', '.join(
1410 map(ISO3166Utils.short2full, e.countries))
1411 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1412 self.report_error(msg)
1413 except ExtractorError as e: # An error we somewhat expected
1414 self.report_error(str(e), e.format_traceback())
1415 except Exception as e:
1416 if self.params.get('ignoreerrors'):
1417 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1418 else:
1419 raise
1420 break
a0566bbf 1421 return wrapper
1422
f2ebc5c7 1423 def _wait_for_video(self, ie_result):
1424 if (not self.params.get('wait_for_video')
1425 or ie_result.get('_type', 'video') != 'video'
1426 or ie_result.get('formats') or ie_result.get('url')):
1427 return
1428
1429 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1430 last_msg = ''
1431
1432 def progress(msg):
1433 nonlocal last_msg
1434 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1435 last_msg = msg
1436
1437 min_wait, max_wait = self.params.get('wait_for_video')
1438 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1439 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1440 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1441 self.report_warning('Release time of video is not known')
1442 elif (diff or 0) <= 0:
1443 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1444 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1445 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1446
1447 wait_till = time.time() + diff
1448 try:
1449 while True:
1450 diff = wait_till - time.time()
1451 if diff <= 0:
1452 progress('')
1453 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1454 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1455 time.sleep(1)
1456 except KeyboardInterrupt:
1457 progress('')
1458 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1459 except BaseException as e:
1460 if not isinstance(e, ReExtractInfo):
1461 self.to_screen('')
1462 raise
1463
a0566bbf 1464 @__handle_extraction_exceptions
58f197b7 1465 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1466 ie_result = ie.extract(url)
1467 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1468 return
1469 if isinstance(ie_result, list):
1470 # Backwards compatibility: old IE result format
1471 ie_result = {
1472 '_type': 'compat_list',
1473 'entries': ie_result,
1474 }
e37d0efb 1475 if extra_info.get('original_url'):
1476 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1477 self.add_default_extra_info(ie_result, ie, url)
1478 if process:
f2ebc5c7 1479 self._wait_for_video(ie_result)
a0566bbf 1480 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1481 else:
a0566bbf 1482 return ie_result
fe7e0c98 1483
ea38e55f 1484 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1485 if url is not None:
1486 self.add_extra_info(ie_result, {
1487 'webpage_url': url,
1488 'original_url': url,
57ebfca3 1489 })
1490 webpage_url = ie_result.get('webpage_url')
1491 if webpage_url:
1492 self.add_extra_info(ie_result, {
1493 'webpage_url_basename': url_basename(webpage_url),
1494 'webpage_url_domain': get_domain(webpage_url),
6033d980 1495 })
1496 if ie is not None:
1497 self.add_extra_info(ie_result, {
1498 'extractor': ie.IE_NAME,
1499 'extractor_key': ie.ie_key(),
1500 })
ea38e55f 1501
58adec46 1502 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1503 """
1504 Take the result of the ie(may be modified) and resolve all unresolved
1505 references (URLs, playlist items).
1506
1507 It will also download the videos if 'download'.
1508 Returns the resolved ie_result.
1509 """
58adec46 1510 if extra_info is None:
1511 extra_info = {}
e8ee972c
PH
1512 result_type = ie_result.get('_type', 'video')
1513
057a5206 1514 if result_type in ('url', 'url_transparent'):
134c6ea8 1515 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1516 if ie_result.get('original_url'):
1517 extra_info.setdefault('original_url', ie_result['original_url'])
1518
057a5206 1519 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1520 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1521 or extract_flat is True):
ecb54191 1522 info_copy = ie_result.copy()
6033d980 1523 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1524 if ie and not ie_result.get('id'):
4614bc22 1525 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1526 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1527 self.add_extra_info(info_copy, extra_info)
b5475f11 1528 info_copy, _ = self.pre_process(info_copy)
ecb54191 1529 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1530 if self.params.get('force_write_download_archive', False):
1531 self.record_download_archive(info_copy)
e8ee972c
PH
1532 return ie_result
1533
8222d8de 1534 if result_type == 'video':
b6c45014 1535 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1536 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1537 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1538 if additional_urls:
e9f4ccd1 1539 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1540 if isinstance(additional_urls, compat_str):
1541 additional_urls = [additional_urls]
1542 self.to_screen(
1543 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1544 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1545 ie_result['additional_entries'] = [
1546 self.extract_info(
b69fd25c 1547 url, download, extra_info=extra_info,
9c2b75b5 1548 force_generic_extractor=self.params.get('force_generic_extractor'))
1549 for url in additional_urls
1550 ]
1551 return ie_result
8222d8de
JMF
1552 elif result_type == 'url':
1553 # We have to add extra_info to the results because it may be
1554 # contained in a playlist
07cce701 1555 return self.extract_info(
1556 ie_result['url'], download,
1557 ie_key=ie_result.get('ie_key'),
1558 extra_info=extra_info)
7fc3fa05
PH
1559 elif result_type == 'url_transparent':
1560 # Use the information from the embedding page
1561 info = self.extract_info(
1562 ie_result['url'], ie_key=ie_result.get('ie_key'),
1563 extra_info=extra_info, download=False, process=False)
1564
1640eb09
S
1565 # extract_info may return None when ignoreerrors is enabled and
1566 # extraction failed with an error, don't crash and return early
1567 # in this case
1568 if not info:
1569 return info
1570
412c617d 1571 new_result = info.copy()
90137ca4 1572 new_result.update(filter_dict(ie_result, lambda k, v: (
1573 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
7fc3fa05 1574
0563f7ac
S
1575 # Extracted info may not be a video result (i.e.
1576 # info.get('_type', 'video') != video) but rather an url or
1577 # url_transparent. In such cases outer metadata (from ie_result)
1578 # should be propagated to inner one (info). For this to happen
1579 # _type of info should be overridden with url_transparent. This
067aa17e 1580 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1581 if new_result.get('_type') == 'url':
1582 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1583
1584 return self.process_ie_result(
1585 new_result, download=download, extra_info=extra_info)
40fcba5e 1586 elif result_type in ('playlist', 'multi_video'):
30a074c2 1587 # Protect from infinite recursion due to recursively nested playlists
1588 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1589 webpage_url = ie_result['webpage_url']
1590 if webpage_url in self._playlist_urls:
7e85e872 1591 self.to_screen(
30a074c2 1592 '[download] Skipping already downloaded playlist: %s'
1593 % ie_result.get('title') or ie_result.get('id'))
1594 return
7e85e872 1595
30a074c2 1596 self._playlist_level += 1
1597 self._playlist_urls.add(webpage_url)
03f83004 1598 self._fill_common_fields(ie_result, False)
bc516a3f 1599 self._sanitize_thumbnails(ie_result)
30a074c2 1600 try:
1601 return self.__process_playlist(ie_result, download)
1602 finally:
1603 self._playlist_level -= 1
1604 if not self._playlist_level:
1605 self._playlist_urls.clear()
8222d8de 1606 elif result_type == 'compat_list':
c9bf4114
PH
1607 self.report_warning(
1608 'Extractor %s returned a compat_list result. '
1609 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1610
8222d8de 1611 def _fixup(r):
b868936c 1612 self.add_extra_info(r, {
1613 'extractor': ie_result['extractor'],
1614 'webpage_url': ie_result['webpage_url'],
1615 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1616 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1617 'extractor_key': ie_result['extractor_key'],
1618 })
8222d8de
JMF
1619 return r
1620 ie_result['entries'] = [
b6c45014 1621 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1622 for r in ie_result['entries']
1623 ]
1624 return ie_result
1625 else:
1626 raise Exception('Invalid result type: %s' % result_type)
1627
e92caff5 1628 def _ensure_dir_exists(self, path):
1629 return make_dir(path, self.report_error)
1630
3b603dbd 1631 @staticmethod
1632 def _playlist_infodict(ie_result, **kwargs):
1633 return {
1634 **ie_result,
1635 'playlist': ie_result.get('title') or ie_result.get('id'),
1636 'playlist_id': ie_result.get('id'),
1637 'playlist_title': ie_result.get('title'),
1638 'playlist_uploader': ie_result.get('uploader'),
1639 'playlist_uploader_id': ie_result.get('uploader_id'),
1640 'playlist_index': 0,
1641 **kwargs,
1642 }
1643
30a074c2 1644 def __process_playlist(self, ie_result, download):
1645 # We process each entry in the playlist
1646 playlist = ie_result.get('title') or ie_result.get('id')
1647 self.to_screen('[download] Downloading playlist: %s' % playlist)
1648
498f5606 1649 if 'entries' not in ie_result:
aa9369a2 1650 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1651
1652 MissingEntry = object()
498f5606 1653 incomplete_entries = bool(ie_result.get('requested_entries'))
1654 if incomplete_entries:
bf5f605e 1655 def fill_missing_entries(entries, indices):
7c7f7161 1656 ret = [MissingEntry] * max(indices)
bf5f605e 1657 for i, entry in zip(indices, entries):
498f5606 1658 ret[i - 1] = entry
1659 return ret
1660 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1661
30a074c2 1662 playlist_results = []
1663
56a8fb4f 1664 playliststart = self.params.get('playliststart', 1)
30a074c2 1665 playlistend = self.params.get('playlistend')
1666 # For backwards compatibility, interpret -1 as whole list
1667 if playlistend == -1:
1668 playlistend = None
1669
1670 playlistitems_str = self.params.get('playlist_items')
1671 playlistitems = None
1672 if playlistitems_str is not None:
1673 def iter_playlistitems(format):
1674 for string_segment in format.split(','):
1675 if '-' in string_segment:
1676 start, end = string_segment.split('-')
1677 for item in range(int(start), int(end) + 1):
1678 yield int(item)
1679 else:
1680 yield int(string_segment)
1681 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1682
1683 ie_entries = ie_result['entries']
8e5fecc8 1684 if isinstance(ie_entries, list):
ed8d87f9 1685 playlist_count = len(ie_entries)
f0d785d3 1686 msg = f'Collected {playlist_count} videos; downloading %d of them'
1687 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1688
8e5fecc8 1689 def get_entry(i):
1690 return ie_entries[i - 1]
1691 else:
f0d785d3 1692 msg = 'Downloading %d videos'
c586f9e8 1693 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1694 ie_entries = LazyList(ie_entries)
d37707bd 1695 elif isinstance(ie_entries, InAdvancePagedList):
1696 if ie_entries._pagesize == 1:
1697 playlist_count = ie_entries._pagecount
8e5fecc8 1698
1699 def get_entry(i):
1700 return YoutubeDL.__handle_extraction_exceptions(
1701 lambda self, i: ie_entries[i - 1]
1702 )(self, i)
50fed816 1703
f0d785d3 1704 entries, broken = [], False
ff1c7fc9 1705 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1706 for i in items:
1707 if i == 0:
1708 continue
56a8fb4f 1709 if playlistitems is None and playlistend is not None and playlistend < i:
1710 break
1711 entry = None
1712 try:
50fed816 1713 entry = get_entry(i)
7c7f7161 1714 if entry is MissingEntry:
498f5606 1715 raise EntryNotInPlaylist()
56a8fb4f 1716 except (IndexError, EntryNotInPlaylist):
1717 if incomplete_entries:
aa9369a2 1718 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1719 elif not playlistitems:
1720 break
1721 entries.append(entry)
120fe513 1722 try:
1723 if entry is not None:
e5a998f3 1724 # TODO: Add auto-generated fields
120fe513 1725 self._match_entry(entry, incomplete=True, silent=True)
1726 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1727 broken = True
120fe513 1728 break
56a8fb4f 1729 ie_result['entries'] = entries
30a074c2 1730
56a8fb4f 1731 # Save playlist_index before re-ordering
1732 entries = [
9e598870 1733 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1734 for i, entry in enumerate(entries, 1)
1735 if entry is not None]
1736 n_entries = len(entries)
498f5606 1737
f0d785d3 1738 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1739 ie_result['playlist_count'] = n_entries
1740
e08a85d8 1741 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1742 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1743 ie_result['requested_entries'] = playlistitems
1744
e08a85d8 1745 _infojson_written = False
0bfc53d0 1746 write_playlist_files = self.params.get('allow_playlist_files', True)
1747 if write_playlist_files and self.params.get('list_thumbnails'):
1748 self.list_thumbnails(ie_result)
1749 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1750 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1751 _infojson_written = self._write_info_json(
1752 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1753 if _infojson_written is None:
80c03fa9 1754 return
1755 if self._write_description('playlist', ie_result,
1756 self.prepare_filename(ie_copy, 'pl_description')) is None:
1757 return
681de68e 1758 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1759 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1760
1761 if self.params.get('playlistreverse', False):
1762 entries = entries[::-1]
30a074c2 1763 if self.params.get('playlistrandom', False):
1764 random.shuffle(entries)
1765
1766 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1767
86e5f3ed 1768 self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
26e2805c 1769 failures = 0
1770 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1771 for i, entry_tuple in enumerate(entries, 1):
1772 playlist_index, entry = entry_tuple
81139999 1773 if 'playlist-index' in self.params.get('compat_opts', []):
1774 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
19a03940 1775 self.to_screen('[download] Downloading video %s of %s' % (
1776 self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
30a074c2 1777 # This __x_forwarded_for_ip thing is a bit ugly but requires
1778 # minimal changes
1779 if x_forwarded_for:
1780 entry['__x_forwarded_for_ip'] = x_forwarded_for
1781 extra = {
1782 'n_entries': n_entries,
0a5a191a 1783 '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1784 'playlist_count': ie_result.get('playlist_count'),
71729754 1785 'playlist_index': playlist_index,
1786 'playlist_autonumber': i,
30a074c2 1787 'playlist': playlist,
1788 'playlist_id': ie_result.get('id'),
1789 'playlist_title': ie_result.get('title'),
1790 'playlist_uploader': ie_result.get('uploader'),
1791 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1792 'extractor': ie_result['extractor'],
1793 'webpage_url': ie_result['webpage_url'],
1794 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1795 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1796 'extractor_key': ie_result['extractor_key'],
1797 }
1798
1799 if self._match_entry(entry, incomplete=True) is not None:
1800 continue
1801
1802 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1803 if not entry_result:
1804 failures += 1
1805 if failures >= max_failures:
1806 self.report_error(
1807 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1808 break
30a074c2 1809 playlist_results.append(entry_result)
1810 ie_result['entries'] = playlist_results
e08a85d8 1811
1812 # Write the updated info to json
cb96c5be 1813 if _infojson_written is True and self._write_info_json(
e08a85d8 1814 'updated playlist', ie_result,
1815 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1816 return
ca30f449 1817
ed5835b4 1818 ie_result = self.run_all_pps('playlist', ie_result)
1819 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1820 return ie_result
1821
a0566bbf 1822 @__handle_extraction_exceptions
1823 def __process_iterable_entry(self, entry, download, extra_info):
1824 return self.process_ie_result(
1825 entry, download=download, extra_info=extra_info)
1826
67134eab
JMF
1827 def _build_format_filter(self, filter_spec):
1828 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1829
1830 OPERATORS = {
1831 '<': operator.lt,
1832 '<=': operator.le,
1833 '>': operator.gt,
1834 '>=': operator.ge,
1835 '=': operator.eq,
1836 '!=': operator.ne,
1837 }
67134eab 1838 operator_rex = re.compile(r'''(?x)\s*
187986a8 1839 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1840 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1841 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1842 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1843 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1844 if m:
1845 try:
1846 comparison_value = int(m.group('value'))
1847 except ValueError:
1848 comparison_value = parse_filesize(m.group('value'))
1849 if comparison_value is None:
1850 comparison_value = parse_filesize(m.group('value') + 'B')
1851 if comparison_value is None:
1852 raise ValueError(
1853 'Invalid value %r in format specification %r' % (
67134eab 1854 m.group('value'), filter_spec))
9ddb6925
S
1855 op = OPERATORS[m.group('op')]
1856
083c9df9 1857 if not m:
9ddb6925
S
1858 STR_OPERATORS = {
1859 '=': operator.eq,
10d33b34
YCH
1860 '^=': lambda attr, value: attr.startswith(value),
1861 '$=': lambda attr, value: attr.endswith(value),
1862 '*=': lambda attr, value: value in attr,
1ce9a3cb 1863 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1864 }
187986a8 1865 str_operator_rex = re.compile(r'''(?x)\s*
1866 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1867 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1868 (?P<quote>["'])?
1869 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1870 (?(quote)(?P=quote))\s*
9ddb6925 1871 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1872 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1873 if m:
1ce9a3cb
LF
1874 if m.group('op') == '~=':
1875 comparison_value = re.compile(m.group('value'))
1876 else:
1877 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1878 str_op = STR_OPERATORS[m.group('op')]
1879 if m.group('negation'):
e118a879 1880 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1881 else:
1882 op = str_op
083c9df9 1883
9ddb6925 1884 if not m:
187986a8 1885 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1886
1887 def _filter(f):
1888 actual_value = f.get(m.group('key'))
1889 if actual_value is None:
1890 return m.group('none_inclusive')
1891 return op(actual_value, comparison_value)
67134eab
JMF
1892 return _filter
1893
9f1a1c36 1894 def _check_formats(self, formats):
1895 for f in formats:
1896 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1897 path = self.get_output_path('temp')
1898 if not self._ensure_dir_exists(f'{path}/'):
1899 continue
1900 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1901 temp_file.close()
1902 try:
1903 success, _ = self.dl(temp_file.name, f, test=True)
1904 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1905 success = False
1906 finally:
1907 if os.path.exists(temp_file.name):
1908 try:
1909 os.remove(temp_file.name)
1910 except OSError:
1911 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1912 if success:
1913 yield f
1914 else:
1915 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1916
0017d9ad 1917 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1918
af0f7428
S
1919 def can_merge():
1920 merger = FFmpegMergerPP(self)
1921 return merger.available and merger.can_merge()
1922
91ebc640 1923 prefer_best = (
b7b04c78 1924 not self.params.get('simulate')
91ebc640 1925 and download
1926 and (
1927 not can_merge()
19807826 1928 or info_dict.get('is_live', False)
de6000d9 1929 or self.outtmpl_dict['default'] == '-'))
53ed7066 1930 compat = (
1931 prefer_best
1932 or self.params.get('allow_multiple_audio_streams', False)
1933 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1934
1935 return (
53ed7066 1936 'best/bestvideo+bestaudio' if prefer_best
1937 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1938 else 'bestvideo+bestaudio/best')
0017d9ad 1939
67134eab
JMF
1940 def build_format_selector(self, format_spec):
1941 def syntax_error(note, start):
1942 message = (
1943 'Invalid format specification: '
86e5f3ed 1944 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
1945 return SyntaxError(message)
1946
1947 PICKFIRST = 'PICKFIRST'
1948 MERGE = 'MERGE'
1949 SINGLE = 'SINGLE'
0130afb7 1950 GROUP = 'GROUP'
67134eab
JMF
1951 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1952
91ebc640 1953 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1954 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1955
9f1a1c36 1956 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1957
67134eab
JMF
1958 def _parse_filter(tokens):
1959 filter_parts = []
1960 for type, string, start, _, _ in tokens:
1961 if type == tokenize.OP and string == ']':
1962 return ''.join(filter_parts)
1963 else:
1964 filter_parts.append(string)
1965
232541df 1966 def _remove_unused_ops(tokens):
17cc1534 1967 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1968 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1969 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1970 last_string, last_start, last_end, last_line = None, None, None, None
1971 for type, string, start, end, line in tokens:
1972 if type == tokenize.OP and string == '[':
1973 if last_string:
1974 yield tokenize.NAME, last_string, last_start, last_end, last_line
1975 last_string = None
1976 yield type, string, start, end, line
1977 # everything inside brackets will be handled by _parse_filter
1978 for type, string, start, end, line in tokens:
1979 yield type, string, start, end, line
1980 if type == tokenize.OP and string == ']':
1981 break
1982 elif type == tokenize.OP and string in ALLOWED_OPS:
1983 if last_string:
1984 yield tokenize.NAME, last_string, last_start, last_end, last_line
1985 last_string = None
1986 yield type, string, start, end, line
1987 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1988 if not last_string:
1989 last_string = string
1990 last_start = start
1991 last_end = end
1992 else:
1993 last_string += string
1994 if last_string:
1995 yield tokenize.NAME, last_string, last_start, last_end, last_line
1996
cf2ac6df 1997 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1998 selectors = []
1999 current_selector = None
2000 for type, string, start, _, _ in tokens:
2001 # ENCODING is only defined in python 3.x
2002 if type == getattr(tokenize, 'ENCODING', None):
2003 continue
2004 elif type in [tokenize.NAME, tokenize.NUMBER]:
2005 current_selector = FormatSelector(SINGLE, string, [])
2006 elif type == tokenize.OP:
cf2ac6df
JMF
2007 if string == ')':
2008 if not inside_group:
2009 # ')' will be handled by the parentheses group
2010 tokens.restore_last_token()
67134eab 2011 break
cf2ac6df 2012 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2013 tokens.restore_last_token()
2014 break
cf2ac6df
JMF
2015 elif inside_choice and string == ',':
2016 tokens.restore_last_token()
2017 break
2018 elif string == ',':
0a31a350
JMF
2019 if not current_selector:
2020 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2021 selectors.append(current_selector)
2022 current_selector = None
2023 elif string == '/':
d96d604e
JMF
2024 if not current_selector:
2025 raise syntax_error('"/" must follow a format selector', start)
67134eab 2026 first_choice = current_selector
cf2ac6df 2027 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2028 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2029 elif string == '[':
2030 if not current_selector:
2031 current_selector = FormatSelector(SINGLE, 'best', [])
2032 format_filter = _parse_filter(tokens)
2033 current_selector.filters.append(format_filter)
0130afb7
JMF
2034 elif string == '(':
2035 if current_selector:
2036 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2037 group = _parse_format_selection(tokens, inside_group=True)
2038 current_selector = FormatSelector(GROUP, group, [])
67134eab 2039 elif string == '+':
d03cfdce 2040 if not current_selector:
2041 raise syntax_error('Unexpected "+"', start)
2042 selector_1 = current_selector
2043 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2044 if not selector_2:
2045 raise syntax_error('Expected a selector', start)
2046 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2047 else:
86e5f3ed 2048 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2049 elif type == tokenize.ENDMARKER:
2050 break
2051 if current_selector:
2052 selectors.append(current_selector)
2053 return selectors
2054
f8d4ad9a 2055 def _merge(formats_pair):
2056 format_1, format_2 = formats_pair
2057
2058 formats_info = []
2059 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2060 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2061
2062 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2063 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2064 for (i, fmt_info) in enumerate(formats_info):
551f9388 2065 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2066 formats_info.pop(i)
2067 continue
2068 for aud_vid in ['audio', 'video']:
f8d4ad9a 2069 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2070 if get_no_more[aud_vid]:
2071 formats_info.pop(i)
f5510afe 2072 break
f8d4ad9a 2073 get_no_more[aud_vid] = True
2074
2075 if len(formats_info) == 1:
2076 return formats_info[0]
2077
2078 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2079 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2080
2081 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2082 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2083
2084 output_ext = self.params.get('merge_output_format')
2085 if not output_ext:
2086 if the_only_video:
2087 output_ext = the_only_video['ext']
2088 elif the_only_audio and not video_fmts:
2089 output_ext = the_only_audio['ext']
2090 else:
2091 output_ext = 'mkv'
2092
975a0d0d 2093 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2094
f8d4ad9a 2095 new_dict = {
2096 'requested_formats': formats_info,
975a0d0d 2097 'format': '+'.join(filtered('format')),
2098 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2099 'ext': output_ext,
975a0d0d 2100 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2101 'language': '+'.join(orderedSet(filtered('language'))) or None,
2102 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2103 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2104 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2105 }
2106
2107 if the_only_video:
2108 new_dict.update({
2109 'width': the_only_video.get('width'),
2110 'height': the_only_video.get('height'),
2111 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2112 'fps': the_only_video.get('fps'),
49a57e70 2113 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2114 'vcodec': the_only_video.get('vcodec'),
2115 'vbr': the_only_video.get('vbr'),
2116 'stretched_ratio': the_only_video.get('stretched_ratio'),
2117 })
2118
2119 if the_only_audio:
2120 new_dict.update({
2121 'acodec': the_only_audio.get('acodec'),
2122 'abr': the_only_audio.get('abr'),
975a0d0d 2123 'asr': the_only_audio.get('asr'),
f8d4ad9a 2124 })
2125
2126 return new_dict
2127
e8e73840 2128 def _check_formats(formats):
981052c9 2129 if not check_formats:
2130 yield from formats
b5ac45b1 2131 return
9f1a1c36 2132 yield from self._check_formats(formats)
e8e73840 2133
67134eab 2134 def _build_selector_function(selector):
909d24dd 2135 if isinstance(selector, list): # ,
67134eab
JMF
2136 fs = [_build_selector_function(s) for s in selector]
2137
317f7ab6 2138 def selector_function(ctx):
67134eab 2139 for f in fs:
981052c9 2140 yield from f(ctx)
67134eab 2141 return selector_function
909d24dd 2142
2143 elif selector.type == GROUP: # ()
0130afb7 2144 selector_function = _build_selector_function(selector.selector)
909d24dd 2145
2146 elif selector.type == PICKFIRST: # /
67134eab
JMF
2147 fs = [_build_selector_function(s) for s in selector.selector]
2148
317f7ab6 2149 def selector_function(ctx):
67134eab 2150 for f in fs:
317f7ab6 2151 picked_formats = list(f(ctx))
67134eab
JMF
2152 if picked_formats:
2153 return picked_formats
2154 return []
67134eab 2155
981052c9 2156 elif selector.type == MERGE: # +
2157 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2158
2159 def selector_function(ctx):
adbc4ec4 2160 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2161 yield _merge(pair)
2162
909d24dd 2163 elif selector.type == SINGLE: # atom
598d185d 2164 format_spec = selector.selector or 'best'
909d24dd 2165
f8d4ad9a 2166 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2167 if format_spec == 'all':
2168 def selector_function(ctx):
9222c381 2169 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2170 elif format_spec == 'mergeall':
2171 def selector_function(ctx):
316f2650 2172 formats = list(_check_formats(
2173 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2174 if not formats:
2175 return
921b76ca 2176 merged_format = formats[-1]
2177 for f in formats[-2::-1]:
f8d4ad9a 2178 merged_format = _merge((merged_format, f))
2179 yield merged_format
909d24dd 2180
2181 else:
85e801a9 2182 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2183 mobj = re.match(
2184 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2185 format_spec)
2186 if mobj is not None:
2187 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2188 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2189 format_type = (mobj.group('type') or [None])[0]
2190 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2191 format_modified = mobj.group('mod') is not None
909d24dd 2192
2193 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2194 _filter_f = (
eff63539 2195 (lambda f: f.get('%scodec' % format_type) != 'none')
2196 if format_type and format_modified # bv*, ba*, wv*, wa*
2197 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2198 if format_type # bv, ba, wv, wa
2199 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2200 if not format_modified # b, w
8326b00a 2201 else lambda f: True) # b*, w*
2202 filter_f = lambda f: _filter_f(f) and (
2203 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2204 else:
48ee10ee 2205 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2206 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2207 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2208 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2209 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2210 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2211 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2212 else:
b5ae35ee 2213 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2214
2215 def selector_function(ctx):
2216 formats = list(ctx['formats'])
909d24dd 2217 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2218 if not matches:
2219 if format_fallback and ctx['incomplete_formats']:
2220 # for extractors with incomplete formats (audio only (soundcloud)
2221 # or video only (imgur)) best/worst will fallback to
2222 # best/worst {video,audio}-only format
2223 matches = formats
2224 elif seperate_fallback and not ctx['has_merged_format']:
2225 # for compatibility with youtube-dl when there is no pre-merged format
2226 matches = list(filter(seperate_fallback, formats))
981052c9 2227 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2228 try:
e8e73840 2229 yield matches[format_idx - 1]
4abea8ca 2230 except LazyList.IndexError:
981052c9 2231 return
083c9df9 2232
67134eab 2233 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2234
317f7ab6 2235 def final_selector(ctx):
adbc4ec4 2236 ctx_copy = dict(ctx)
67134eab 2237 for _filter in filters:
317f7ab6
S
2238 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2239 return selector_function(ctx_copy)
67134eab 2240 return final_selector
083c9df9 2241
67134eab 2242 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2243 try:
f9934b96 2244 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2245 except tokenize.TokenError:
2246 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2247
86e5f3ed 2248 class TokenIterator:
0130afb7
JMF
2249 def __init__(self, tokens):
2250 self.tokens = tokens
2251 self.counter = 0
2252
2253 def __iter__(self):
2254 return self
2255
2256 def __next__(self):
2257 if self.counter >= len(self.tokens):
2258 raise StopIteration()
2259 value = self.tokens[self.counter]
2260 self.counter += 1
2261 return value
2262
2263 next = __next__
2264
2265 def restore_last_token(self):
2266 self.counter -= 1
2267
2268 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2269 return _build_selector_function(parsed_selector)
a9c58ad9 2270
e5660ee6 2271 def _calc_headers(self, info_dict):
8b7539d2 2272 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2273
2274 cookies = self._calc_cookies(info_dict)
2275 if cookies:
2276 res['Cookie'] = cookies
2277
0016b84e
S
2278 if 'X-Forwarded-For' not in res:
2279 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2280 if x_forwarded_for_ip:
2281 res['X-Forwarded-For'] = x_forwarded_for_ip
2282
e5660ee6
JMF
2283 return res
2284
2285 def _calc_cookies(self, info_dict):
5c2266df 2286 pr = sanitized_Request(info_dict['url'])
e5660ee6 2287 self.cookiejar.add_cookie_header(pr)
662435f7 2288 return pr.get_header('Cookie')
e5660ee6 2289
9f1a1c36 2290 def _sort_thumbnails(self, thumbnails):
2291 thumbnails.sort(key=lambda t: (
2292 t.get('preference') if t.get('preference') is not None else -1,
2293 t.get('width') if t.get('width') is not None else -1,
2294 t.get('height') if t.get('height') is not None else -1,
2295 t.get('id') if t.get('id') is not None else '',
2296 t.get('url')))
2297
b0249bca 2298 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2299 thumbnails = info_dict.get('thumbnails')
2300 if thumbnails is None:
2301 thumbnail = info_dict.get('thumbnail')
2302 if thumbnail:
2303 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2304 if not thumbnails:
2305 return
2306
2307 def check_thumbnails(thumbnails):
2308 for t in thumbnails:
2309 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2310 try:
2311 self.urlopen(HEADRequest(t['url']))
2312 except network_exceptions as err:
2313 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2314 continue
2315 yield t
2316
2317 self._sort_thumbnails(thumbnails)
2318 for i, t in enumerate(thumbnails):
2319 if t.get('id') is None:
2320 t['id'] = '%d' % i
2321 if t.get('width') and t.get('height'):
2322 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2323 t['url'] = sanitize_url(t['url'])
2324
2325 if self.params.get('check_formats') is True:
282f5709 2326 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2327 else:
2328 info_dict['thumbnails'] = thumbnails
bc516a3f 2329
03f83004
LNO
2330 def _fill_common_fields(self, info_dict, is_video=True):
2331 # TODO: move sanitization here
2332 if is_video:
2333 # playlists are allowed to lack "title"
2334 info_dict['fulltitle'] = info_dict.get('title')
2335 if 'title' not in info_dict:
2336 raise ExtractorError('Missing "title" field in extractor result',
2337 video_id=info_dict['id'], ie=info_dict['extractor'])
2338 elif not info_dict.get('title'):
2339 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2340 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2341
2342 if info_dict.get('duration') is not None:
2343 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2344
2345 for ts_key, date_key in (
2346 ('timestamp', 'upload_date'),
2347 ('release_timestamp', 'release_date'),
2348 ('modified_timestamp', 'modified_date'),
2349 ):
2350 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2351 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2352 # see http://bugs.python.org/issue1646728)
19a03940 2353 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2354 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2355 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2356
2357 live_keys = ('is_live', 'was_live')
2358 live_status = info_dict.get('live_status')
2359 if live_status is None:
2360 for key in live_keys:
2361 if info_dict.get(key) is False:
2362 continue
2363 if info_dict.get(key):
2364 live_status = key
2365 break
2366 if all(info_dict.get(key) is False for key in live_keys):
2367 live_status = 'not_live'
2368 if live_status:
2369 info_dict['live_status'] = live_status
2370 for key in live_keys:
2371 if info_dict.get(key) is None:
2372 info_dict[key] = (live_status == key)
2373
2374 # Auto generate title fields corresponding to the *_number fields when missing
2375 # in order to always have clean titles. This is very common for TV series.
2376 for field in ('chapter', 'season', 'episode'):
2377 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2378 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2379
dd82ffea
JMF
2380 def process_video_result(self, info_dict, download=True):
2381 assert info_dict.get('_type', 'video') == 'video'
9c906919 2382 self._num_videos += 1
dd82ffea 2383
bec1fad2 2384 if 'id' not in info_dict:
fc08bdd6 2385 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2386 elif not info_dict.get('id'):
2387 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2388
c9969434
S
2389 def report_force_conversion(field, field_not, conversion):
2390 self.report_warning(
2391 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2392 % (field, field_not, conversion))
2393
2394 def sanitize_string_field(info, string_field):
2395 field = info.get(string_field)
2396 if field is None or isinstance(field, compat_str):
2397 return
2398 report_force_conversion(string_field, 'a string', 'string')
2399 info[string_field] = compat_str(field)
2400
2401 def sanitize_numeric_fields(info):
2402 for numeric_field in self._NUMERIC_FIELDS:
2403 field = info.get(numeric_field)
f9934b96 2404 if field is None or isinstance(field, (int, float)):
c9969434
S
2405 continue
2406 report_force_conversion(numeric_field, 'numeric', 'int')
2407 info[numeric_field] = int_or_none(field)
2408
2409 sanitize_string_field(info_dict, 'id')
2410 sanitize_numeric_fields(info_dict)
4c3f8c3f 2411 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2412 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2413
dd82ffea
JMF
2414 if 'playlist' not in info_dict:
2415 # It isn't part of a playlist
2416 info_dict['playlist'] = None
2417 info_dict['playlist_index'] = None
2418
bc516a3f 2419 self._sanitize_thumbnails(info_dict)
d5519808 2420
536a55da 2421 thumbnail = info_dict.get('thumbnail')
bc516a3f 2422 thumbnails = info_dict.get('thumbnails')
536a55da
S
2423 if thumbnail:
2424 info_dict['thumbnail'] = sanitize_url(thumbnail)
2425 elif thumbnails:
d5519808
PH
2426 info_dict['thumbnail'] = thumbnails[-1]['url']
2427
ae30b840 2428 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2429 info_dict['display_id'] = info_dict['id']
2430
03f83004 2431 self._fill_common_fields(info_dict)
33d2fc2f 2432
05108a49
S
2433 for cc_kind in ('subtitles', 'automatic_captions'):
2434 cc = info_dict.get(cc_kind)
2435 if cc:
2436 for _, subtitle in cc.items():
2437 for subtitle_format in subtitle:
2438 if subtitle_format.get('url'):
2439 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2440 if subtitle_format.get('ext') is None:
2441 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2442
2443 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2444 subtitles = info_dict.get('subtitles')
4bba3716 2445
360e1ca5 2446 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2447 info_dict['id'], subtitles, automatic_captions)
a504ced0 2448
dd82ffea
JMF
2449 if info_dict.get('formats') is None:
2450 # There's only one format available
2451 formats = [info_dict]
2452 else:
2453 formats = info_dict['formats']
2454
0a5a191a 2455 # or None ensures --clean-infojson removes it
2456 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2457 if not self.params.get('allow_unplayable_formats'):
2458 formats = [f for f in formats if not f.get('has_drm')]
0a5a191a 2459 if info_dict['_has_drm'] and all(
c0b6e5c7 2460 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2461 self.report_warning(
2462 'This video is DRM protected and only images are available for download. '
2463 'Use --list-formats to see them')
88acdbc2 2464
319b6059 2465 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2466 if not get_from_start:
2467 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2468 if info_dict.get('is_live') and formats:
adbc4ec4 2469 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2470 if get_from_start and not formats:
a44ca5a4 2471 self.raise_no_formats(info_dict, msg=(
2472 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2473 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2474
db95dc13 2475 if not formats:
1151c407 2476 self.raise_no_formats(info_dict)
db95dc13 2477
73af5cc8
S
2478 def is_wellformed(f):
2479 url = f.get('url')
a5ac0c47 2480 if not url:
73af5cc8
S
2481 self.report_warning(
2482 '"url" field is missing or empty - skipping format, '
2483 'there is an error in extractor')
a5ac0c47
S
2484 return False
2485 if isinstance(url, bytes):
2486 sanitize_string_field(f, 'url')
2487 return True
73af5cc8
S
2488
2489 # Filter out malformed formats for better extraction robustness
2490 formats = list(filter(is_wellformed, formats))
2491
181c7053
S
2492 formats_dict = {}
2493
dd82ffea 2494 # We check that all the formats have the format and format_id fields
db95dc13 2495 for i, format in enumerate(formats):
c9969434
S
2496 sanitize_string_field(format, 'format_id')
2497 sanitize_numeric_fields(format)
dcf77cf1 2498 format['url'] = sanitize_url(format['url'])
e74e3b63 2499 if not format.get('format_id'):
8016c922 2500 format['format_id'] = compat_str(i)
e2effb08
S
2501 else:
2502 # Sanitize format_id from characters used in format selector expression
ec85ded8 2503 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2504 format_id = format['format_id']
2505 if format_id not in formats_dict:
2506 formats_dict[format_id] = []
2507 formats_dict[format_id].append(format)
2508
2509 # Make sure all formats have unique format_id
03b4de72 2510 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2511 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2512 ambigious_id = len(ambiguous_formats) > 1
2513 for i, format in enumerate(ambiguous_formats):
2514 if ambigious_id:
181c7053 2515 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2516 if format.get('ext') is None:
2517 format['ext'] = determine_ext(format['url']).lower()
2518 # Ensure there is no conflict between id and ext in format selection
2519 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2520 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2521 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2522
2523 for i, format in enumerate(formats):
8c51aa65 2524 if format.get('format') is None:
6febd1c1 2525 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2526 id=format['format_id'],
2527 res=self.format_resolution(format),
b868936c 2528 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2529 )
6f0be937 2530 if format.get('protocol') is None:
b5559424 2531 format['protocol'] = determine_protocol(format)
239df021 2532 if format.get('resolution') is None:
2533 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2534 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2535 format['dynamic_range'] = 'SDR'
f2fe69c7 2536 if (info_dict.get('duration') and format.get('tbr')
2537 and not format.get('filesize') and not format.get('filesize_approx')):
2538 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2539
e5660ee6
JMF
2540 # Add HTTP headers, so that external programs can use them from the
2541 # json output
2542 full_format_info = info_dict.copy()
2543 full_format_info.update(format)
2544 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2545 # Remove private housekeeping stuff
2546 if '__x_forwarded_for_ip' in info_dict:
2547 del info_dict['__x_forwarded_for_ip']
dd82ffea 2548
9f1a1c36 2549 if self.params.get('check_formats') is True:
282f5709 2550 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2551
88acdbc2 2552 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2553 # only set the 'formats' fields if the original info_dict list them
2554 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2555 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2556 # which can't be exported to json
b3d9ef88 2557 info_dict['formats'] = formats
4ec82a72 2558
2559 info_dict, _ = self.pre_process(info_dict)
2560
6db9c4d5 2561 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2562 return info_dict
2563
2564 self.post_extract(info_dict)
2565 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2566
093a1710 2567 # The pre-processors may have modified the formats
2568 formats = info_dict.get('formats', [info_dict])
2569
fa9f30b8 2570 list_only = self.params.get('simulate') is None and (
2571 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2572 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2573 if self.params.get('list_thumbnails'):
2574 self.list_thumbnails(info_dict)
b7b04c78 2575 if self.params.get('listsubtitles'):
2576 if 'automatic_captions' in info_dict:
2577 self.list_subtitles(
2578 info_dict['id'], automatic_captions, 'automatic captions')
2579 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2580 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2581 self.list_formats(info_dict)
169dbde9 2582 if list_only:
b7b04c78 2583 # Without this printing, -F --print-json will not work
169dbde9 2584 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2585 return
2586
187986a8 2587 format_selector = self.format_selector
2588 if format_selector is None:
0017d9ad 2589 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2590 self.write_debug('Default format spec: %s' % req_format)
187986a8 2591 format_selector = self.build_format_selector(req_format)
317f7ab6 2592
fa9f30b8 2593 while True:
2594 if interactive_format_selection:
2595 req_format = input(
2596 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2597 try:
2598 format_selector = self.build_format_selector(req_format)
2599 except SyntaxError as err:
2600 self.report_error(err, tb=False, is_error=False)
2601 continue
2602
85e801a9 2603 formats_to_download = list(format_selector({
fa9f30b8 2604 'formats': formats,
85e801a9 2605 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2606 'incomplete_formats': (
2607 # All formats are video-only or
2608 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2609 # all formats are audio-only
2610 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2611 }))
fa9f30b8 2612 if interactive_format_selection and not formats_to_download:
2613 self.report_error('Requested format is not available', tb=False, is_error=False)
2614 continue
2615 break
317f7ab6 2616
dd82ffea 2617 if not formats_to_download:
b7da73eb 2618 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2619 raise ExtractorError(
2620 'Requested format is not available. Use --list-formats for a list of available formats',
2621 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2622 self.report_warning('Requested format is not available')
2623 # Process what we can, even without any available formats.
2624 formats_to_download = [{}]
a13e6848 2625
b62fa6d7 2626 best_format = formats_to_download[-1]
2627 if download:
2628 if best_format:
2629 self.to_screen(
2630 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2631 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2632 max_downloads_reached = False
f46e2f9d 2633 for i, fmt in enumerate(formats_to_download):
09b49e1f 2634 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2635 new_info.update(fmt)
a13e6848 2636 try:
2637 self.process_info(new_info)
2638 except MaxDownloadsReached:
2639 max_downloads_reached = True
f46e2f9d 2640 # Remove copied info
2641 for key, val in tuple(new_info.items()):
2642 if info_dict.get(key) == val:
2643 new_info.pop(key)
a13e6848 2644 if max_downloads_reached:
2645 break
ebed8b37 2646
86e5f3ed 2647 write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
a13e6848 2648 assert write_archive.issubset({True, False, 'ignore'})
2649 if True in write_archive and False not in write_archive:
2650 self.record_download_archive(info_dict)
be72c624 2651
2652 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2653 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2654 if max_downloads_reached:
2655 raise MaxDownloadsReached()
ebed8b37 2656
49a57e70 2657 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2658 info_dict.update(best_format)
dd82ffea
JMF
2659 return info_dict
2660
98c70d6f 2661 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2662 """Select the requested subtitles and their format"""
d8a58ddc 2663 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2664 if normal_subtitles and self.params.get('writesubtitles'):
2665 available_subs.update(normal_subtitles)
d8a58ddc 2666 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2667 if automatic_captions and self.params.get('writeautomaticsub'):
2668 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2669 if lang not in available_subs:
2670 available_subs[lang] = cap_info
2671
4d171848
JMF
2672 if (not self.params.get('writesubtitles') and not
2673 self.params.get('writeautomaticsub') or not
2674 available_subs):
2675 return None
a504ced0 2676
d8a58ddc 2677 all_sub_langs = tuple(available_subs.keys())
a504ced0 2678 if self.params.get('allsubtitles', False):
c32b0aab 2679 requested_langs = all_sub_langs
2680 elif self.params.get('subtitleslangs', False):
77c4a9ef 2681 # A list is used so that the order of languages will be the same as
2682 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2683 requested_langs = []
2684 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2685 discard = lang_re[0] == '-'
c32b0aab 2686 if discard:
77c4a9ef 2687 lang_re = lang_re[1:]
3aa91540 2688 if lang_re == 'all':
2689 if discard:
2690 requested_langs = []
2691 else:
2692 requested_langs.extend(all_sub_langs)
2693 continue
77c4a9ef 2694 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2695 if discard:
2696 for lang in current_langs:
77c4a9ef 2697 while lang in requested_langs:
2698 requested_langs.remove(lang)
c32b0aab 2699 else:
77c4a9ef 2700 requested_langs.extend(current_langs)
2701 requested_langs = orderedSet(requested_langs)
d8a58ddc 2702 elif normal_sub_langs:
2703 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2704 else:
d8a58ddc 2705 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2706 if requested_langs:
2707 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2708
2709 formats_query = self.params.get('subtitlesformat', 'best')
2710 formats_preference = formats_query.split('/') if formats_query else []
2711 subs = {}
2712 for lang in requested_langs:
2713 formats = available_subs.get(lang)
2714 if formats is None:
86e5f3ed 2715 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2716 continue
a504ced0
JMF
2717 for ext in formats_preference:
2718 if ext == 'best':
2719 f = formats[-1]
2720 break
2721 matches = list(filter(lambda f: f['ext'] == ext, formats))
2722 if matches:
2723 f = matches[-1]
2724 break
2725 else:
2726 f = formats[-1]
2727 self.report_warning(
2728 'No subtitle format found matching "%s" for language %s, '
2729 'using %s' % (formats_query, lang, f['ext']))
2730 subs[lang] = f
2731 return subs
2732
bb66c247 2733 def _forceprint(self, key, info_dict):
2734 if info_dict is None:
2735 return
2736 info_copy = info_dict.copy()
2737 info_copy['formats_table'] = self.render_formats_table(info_dict)
2738 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2739 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2740 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2741
2742 def format_tmpl(tmpl):
2743 mobj = re.match(r'\w+(=?)$', tmpl)
2744 if mobj and mobj.group(1):
2745 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2746 elif mobj:
2747 return f'%({tmpl})s'
2748 return tmpl
8130779d 2749
bb66c247 2750 for tmpl in self.params['forceprint'].get(key, []):
2751 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2752
2753 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2754 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2755 tmpl = format_tmpl(tmpl)
2756 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2757 if self._ensure_dir_exists(filename):
86e5f3ed 2758 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2759 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2760
d06daf23 2761 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2762 def print_mandatory(field, actual_field=None):
2763 if actual_field is None:
2764 actual_field = field
d06daf23 2765 if (self.params.get('force%s' % field, False)
53c18592 2766 and (not incomplete or info_dict.get(actual_field) is not None)):
2767 self.to_stdout(info_dict[actual_field])
d06daf23
S
2768
2769 def print_optional(field):
2770 if (self.params.get('force%s' % field, False)
2771 and info_dict.get(field) is not None):
2772 self.to_stdout(info_dict[field])
2773
53c18592 2774 info_dict = info_dict.copy()
2775 if filename is not None:
2776 info_dict['filename'] = filename
2777 if info_dict.get('requested_formats') is not None:
2778 # For RTMP URLs, also include the playpath
2779 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2780 elif info_dict.get('url'):
53c18592 2781 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2782
bb66c247 2783 if (self.params.get('forcejson')
2784 or self.params['forceprint'].get('video')
2785 or self.params['print_to_file'].get('video')):
2b8a2973 2786 self.post_extract(info_dict)
bb66c247 2787 self._forceprint('video', info_dict)
53c18592 2788
d06daf23
S
2789 print_mandatory('title')
2790 print_mandatory('id')
53c18592 2791 print_mandatory('url', 'urls')
d06daf23
S
2792 print_optional('thumbnail')
2793 print_optional('description')
53c18592 2794 print_optional('filename')
b868936c 2795 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2796 self.to_stdout(formatSeconds(info_dict['duration']))
2797 print_mandatory('format')
53c18592 2798
2b8a2973 2799 if self.params.get('forcejson'):
6e84b215 2800 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2801
e8e73840 2802 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2803 if not info.get('url'):
1151c407 2804 self.raise_no_formats(info, True)
e8e73840 2805
2806 if test:
2807 verbose = self.params.get('verbose')
2808 params = {
2809 'test': True,
a169858f 2810 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2811 'verbose': verbose,
2812 'noprogress': not verbose,
2813 'nopart': True,
2814 'skip_unavailable_fragments': False,
2815 'keep_fragments': False,
2816 'overwrites': True,
2817 '_no_ytdl_file': True,
2818 }
2819 else:
2820 params = self.params
96fccc10 2821 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2822 if not test:
2823 for ph in self._progress_hooks:
2824 fd.add_progress_hook(ph)
42676437
M
2825 urls = '", "'.join(
2826 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2827 for f in info.get('requested_formats', []) or [info])
18e674b4 2828 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2829
adbc4ec4
THD
2830 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2831 # But it may contain objects that are not deep-copyable
2832 new_info = self._copy_infodict(info)
e8e73840 2833 if new_info.get('http_headers') is None:
2834 new_info['http_headers'] = self._calc_headers(new_info)
2835 return fd.download(name, new_info, subtitle)
2836
e04938ab 2837 def existing_file(self, filepaths, *, default_overwrite=True):
2838 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2839 if existing_files and not self.params.get('overwrites', default_overwrite):
2840 return existing_files[0]
2841
2842 for file in existing_files:
2843 self.report_file_delete(file)
2844 os.remove(file)
2845 return None
2846
8222d8de 2847 def process_info(self, info_dict):
09b49e1f 2848 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2849
2850 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2851 original_infodict = info_dict
fd288278 2852
4513a41a 2853 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2854 info_dict['format'] = info_dict['ext']
2855
09b49e1f 2856 # This is mostly just for backward compatibility of process_info
2857 # As a side-effect, this allows for format-specific filters
c77495e3 2858 if self._match_entry(info_dict) is not None:
9e907ebd 2859 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2860 return
2861
09b49e1f 2862 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2863 self.post_extract(info_dict)
0c14d66a 2864 self._num_downloads += 1
8222d8de 2865
dcf64d43 2866 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2867 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2868 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2869 files_to_move = {}
8222d8de
JMF
2870
2871 # Forced printings
4513a41a 2872 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2873
b7b04c78 2874 if self.params.get('simulate'):
9e907ebd 2875 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2876 return
2877
de6000d9 2878 if full_filename is None:
8222d8de 2879 return
e92caff5 2880 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2881 return
e92caff5 2882 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2883 return
2884
80c03fa9 2885 if self._write_description('video', info_dict,
2886 self.prepare_filename(info_dict, 'description')) is None:
2887 return
2888
2889 sub_files = self._write_subtitles(info_dict, temp_filename)
2890 if sub_files is None:
2891 return
2892 files_to_move.update(dict(sub_files))
2893
2894 thumb_files = self._write_thumbnails(
2895 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2896 if thumb_files is None:
2897 return
2898 files_to_move.update(dict(thumb_files))
8222d8de 2899
80c03fa9 2900 infofn = self.prepare_filename(info_dict, 'infojson')
2901 _infojson_written = self._write_info_json('video', info_dict, infofn)
2902 if _infojson_written:
dac5df5a 2903 info_dict['infojson_filename'] = infofn
e75bb0d6 2904 # For backward compatibility, even though it was a private field
80c03fa9 2905 info_dict['__infojson_filename'] = infofn
2906 elif _infojson_written is None:
2907 return
2908
2909 # Note: Annotations are deprecated
2910 annofn = None
1fb07d10 2911 if self.params.get('writeannotations', False):
de6000d9 2912 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2913 if annofn:
e92caff5 2914 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2915 return
0c3d0f51 2916 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2917 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2918 elif not info_dict.get('annotations'):
2919 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2920 else:
2921 try:
6febd1c1 2922 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 2923 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
2924 annofile.write(info_dict['annotations'])
2925 except (KeyError, TypeError):
6febd1c1 2926 self.report_warning('There are no annotations to write.')
86e5f3ed 2927 except OSError:
6febd1c1 2928 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2929 return
1fb07d10 2930
732044af 2931 # Write internet shortcut files
08438d2c 2932 def _write_link_file(link_type):
60f3e995 2933 url = try_get(info_dict['webpage_url'], iri_to_uri)
2934 if not url:
2935 self.report_warning(
2936 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2937 return True
08438d2c 2938 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2939 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2940 return False
10e3742e 2941 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2942 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2943 return True
2944 try:
2945 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 2946 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2947 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2948 template_vars = {'url': url}
08438d2c 2949 if link_type == 'desktop':
2950 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2951 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 2952 except OSError:
08438d2c 2953 self.report_error(f'Cannot write internet shortcut {linkfn}')
2954 return False
732044af 2955 return True
2956
08438d2c 2957 write_links = {
2958 'url': self.params.get('writeurllink'),
2959 'webloc': self.params.get('writewebloclink'),
2960 'desktop': self.params.get('writedesktoplink'),
2961 }
2962 if self.params.get('writelink'):
2963 link_type = ('webloc' if sys.platform == 'darwin'
2964 else 'desktop' if sys.platform.startswith('linux')
2965 else 'url')
2966 write_links[link_type] = True
2967
2968 if any(should_write and not _write_link_file(link_type)
2969 for link_type, should_write in write_links.items()):
2970 return
732044af 2971
f46e2f9d 2972 def replace_info_dict(new_info):
2973 nonlocal info_dict
2974 if new_info == info_dict:
2975 return
2976 info_dict.clear()
2977 info_dict.update(new_info)
2978
56d868db 2979 try:
f46e2f9d 2980 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2981 replace_info_dict(new_info)
56d868db 2982 except PostProcessingError as err:
2983 self.report_error('Preprocessing: %s' % str(err))
2984 return
2985
a13e6848 2986 if self.params.get('skip_download'):
56d868db 2987 info_dict['filepath'] = temp_filename
2988 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2989 info_dict['__files_to_move'] = files_to_move
f46e2f9d 2990 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 2991 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 2992 else:
2993 # Download
b868936c 2994 info_dict.setdefault('__postprocessors', [])
4340deca 2995 try:
0202b52a 2996
e04938ab 2997 def existing_video_file(*filepaths):
6b591b29 2998 ext = info_dict.get('ext')
e04938ab 2999 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3000 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3001 default_overwrite=False)
3002 if file:
3003 info_dict['ext'] = os.path.splitext(file)[1][1:]
3004 return file
0202b52a 3005
3006 success = True
4340deca 3007 if info_dict.get('requested_formats') is not None:
81cd954a
S
3008
3009 def compatible_formats(formats):
d03cfdce 3010 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3011 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3012 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3013 if len(video_formats) > 2 or len(audio_formats) > 2:
3014 return False
3015
81cd954a 3016 # Check extension
86e5f3ed 3017 exts = {format.get('ext') for format in formats}
d03cfdce 3018 COMPATIBLE_EXTS = (
86e5f3ed 3019 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3020 {'webm'},
d03cfdce 3021 )
3022 for ext_sets in COMPATIBLE_EXTS:
3023 if ext_sets.issuperset(exts):
3024 return True
81cd954a
S
3025 # TODO: Check acodec/vcodec
3026 return False
3027
3028 requested_formats = info_dict['requested_formats']
0202b52a 3029 old_ext = info_dict['ext']
4e3b637d 3030 if self.params.get('merge_output_format') is None:
3031 if not compatible_formats(requested_formats):
3032 info_dict['ext'] = 'mkv'
3033 self.report_warning(
3034 'Requested formats are incompatible for merge and will be merged into mkv')
3035 if (info_dict['ext'] == 'webm'
3036 and info_dict.get('thumbnails')
3037 # check with type instead of pp_key, __name__, or isinstance
3038 # since we dont want any custom PPs to trigger this
3039 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3040 info_dict['ext'] = 'mkv'
3041 self.report_warning(
3042 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3043 new_ext = info_dict['ext']
0202b52a 3044
124bc071 3045 def correct_ext(filename, ext=new_ext):
96fccc10 3046 if filename == '-':
3047 return filename
0202b52a 3048 filename_real_ext = os.path.splitext(filename)[1][1:]
3049 filename_wo_ext = (
3050 os.path.splitext(filename)[0]
124bc071 3051 if filename_real_ext in (old_ext, new_ext)
0202b52a 3052 else filename)
86e5f3ed 3053 return f'{filename_wo_ext}.{ext}'
0202b52a 3054
38c6902b 3055 # Ensure filename always has a correct extension for successful merge
0202b52a 3056 full_filename = correct_ext(full_filename)
3057 temp_filename = correct_ext(temp_filename)
e04938ab 3058 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3059 info_dict['__real_download'] = False
18e674b4 3060
adbc4ec4
THD
3061 downloaded = []
3062 merger = FFmpegMergerPP(self)
3063
3064 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3065 if dl_filename is not None:
6c7274ec 3066 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3067 elif fd:
3068 for f in requested_formats if fd != FFmpegFD else []:
3069 f['filepath'] = fname = prepend_extension(
3070 correct_ext(temp_filename, info_dict['ext']),
3071 'f%s' % f['format_id'], info_dict['ext'])
3072 downloaded.append(fname)
dbf5416a 3073 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3074 success, real_download = self.dl(temp_filename, info_dict)
3075 info_dict['__real_download'] = real_download
18e674b4 3076 else:
18e674b4 3077 if self.params.get('allow_unplayable_formats'):
3078 self.report_warning(
3079 'You have requested merging of multiple formats '
3080 'while also allowing unplayable formats to be downloaded. '
3081 'The formats won\'t be merged to prevent data corruption.')
3082 elif not merger.available:
e8969bda 3083 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3084 if not self.params.get('ignoreerrors'):
3085 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3086 return
3087 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3088
96fccc10 3089 if temp_filename == '-':
adbc4ec4 3090 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3091 else 'but the formats are incompatible for simultaneous download' if merger.available
3092 else 'but ffmpeg is not installed')
3093 self.report_warning(
3094 f'You have requested downloading multiple formats to stdout {reason}. '
3095 'The formats will be streamed one after the other')
3096 fname = temp_filename
dbf5416a 3097 for f in requested_formats:
3098 new_info = dict(info_dict)
3099 del new_info['requested_formats']
3100 new_info.update(f)
96fccc10 3101 if temp_filename != '-':
124bc071 3102 fname = prepend_extension(
3103 correct_ext(temp_filename, new_info['ext']),
3104 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3105 if not self._ensure_dir_exists(fname):
3106 return
a21e0ab1 3107 f['filepath'] = fname
96fccc10 3108 downloaded.append(fname)
dbf5416a 3109 partial_success, real_download = self.dl(fname, new_info)
3110 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3111 success = success and partial_success
adbc4ec4
THD
3112
3113 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3114 info_dict['__postprocessors'].append(merger)
3115 info_dict['__files_to_merge'] = downloaded
3116 # Even if there were no downloads, it is being merged only now
3117 info_dict['__real_download'] = True
3118 else:
3119 for file in downloaded:
3120 files_to_move[file] = None
4340deca
P
3121 else:
3122 # Just a single file
e04938ab 3123 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3124 if dl_filename is None or dl_filename == temp_filename:
3125 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3126 # So we should try to resume the download
e8e73840 3127 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3128 info_dict['__real_download'] = real_download
6c7274ec 3129 else:
3130 self.report_file_already_downloaded(dl_filename)
0202b52a 3131
0202b52a 3132 dl_filename = dl_filename or temp_filename
c571435f 3133 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3134
3158150c 3135 except network_exceptions as err:
7960b056 3136 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3137 return
86e5f3ed 3138 except OSError as err:
4340deca
P
3139 raise UnavailableVideoError(err)
3140 except (ContentTooShortError, ) as err:
86e5f3ed 3141 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3142 return
8222d8de 3143
de6000d9 3144 if success and full_filename != '-':
f17f8651 3145
fd7cfb64 3146 def fixup():
3147 do_fixup = True
3148 fixup_policy = self.params.get('fixup')
3149 vid = info_dict['id']
3150
3151 if fixup_policy in ('ignore', 'never'):
3152 return
3153 elif fixup_policy == 'warn':
3fe75fdc 3154 do_fixup = 'warn'
f89b3e2d 3155 elif fixup_policy != 'force':
3156 assert fixup_policy in ('detect_or_warn', None)
3157 if not info_dict.get('__real_download'):
3158 do_fixup = False
fd7cfb64 3159
3160 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3161 if not (do_fixup and cndn):
fd7cfb64 3162 return
3fe75fdc 3163 elif do_fixup == 'warn':
fd7cfb64 3164 self.report_warning(f'{vid}: {msg}')
3165 return
3166 pp = cls(self)
3167 if pp.available:
3168 info_dict['__postprocessors'].append(pp)
3169 else:
3170 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3171
3172 stretched_ratio = info_dict.get('stretched_ratio')
3173 ffmpeg_fixup(
3174 stretched_ratio not in (1, None),
3175 f'Non-uniform pixel ratio {stretched_ratio}',
3176 FFmpegFixupStretchedPP)
3177
3178 ffmpeg_fixup(
3179 (info_dict.get('requested_formats') is None
3180 and info_dict.get('container') == 'm4a_dash'
3181 and info_dict.get('ext') == 'm4a'),
3182 'writing DASH m4a. Only some players support this container',
3183 FFmpegFixupM4aPP)
3184
993191c0 3185 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3186 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3187
3188 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3189 ffmpeg_fixup(downloader == 'HlsFD',
3190 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3191 FFmpegFixupM3u8PP)
3192 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3193 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3194
e04b003e 3195 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3196 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3197
3198 fixup()
8222d8de 3199 try:
f46e2f9d 3200 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3201 except PostProcessingError as err:
3202 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3203 return
ab8e5e51
AM
3204 try:
3205 for ph in self._post_hooks:
23c1a667 3206 ph(info_dict['filepath'])
ab8e5e51
AM
3207 except Exception as err:
3208 self.report_error('post hooks: %s' % str(err))
3209 return
9e907ebd 3210 info_dict['__write_download_archive'] = True
2d30509f 3211
a13e6848 3212 if self.params.get('force_write_download_archive'):
9e907ebd 3213 info_dict['__write_download_archive'] = True
a13e6848 3214
3215 # Make sure the info_dict was modified in-place
f46e2f9d 3216 assert info_dict is original_infodict
a13e6848 3217
c3e6ffba 3218 max_downloads = self.params.get('max_downloads')
3219 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3220 raise MaxDownloadsReached()
8222d8de 3221
aa9369a2 3222 def __download_wrapper(self, func):
3223 @functools.wraps(func)
3224 def wrapper(*args, **kwargs):
3225 try:
3226 res = func(*args, **kwargs)
3227 except UnavailableVideoError as e:
3228 self.report_error(e)
b222c271 3229 except MaxDownloadsReached as e:
aa9369a2 3230 self.to_screen(f'[info] {e}')
3231 raise
b222c271 3232 except DownloadCancelled as e:
3233 self.to_screen(f'[info] {e}')
3234 if not self.params.get('break_per_url'):
3235 raise
aa9369a2 3236 else:
3237 if self.params.get('dump_single_json', False):
3238 self.post_extract(res)
3239 self.to_stdout(json.dumps(self.sanitize_info(res)))
3240 return wrapper
3241
8222d8de
JMF
3242 def download(self, url_list):
3243 """Download a given list of URLs."""
aa9369a2 3244 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3245 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3246 if (len(url_list) > 1
3247 and outtmpl != '-'
3248 and '%' not in outtmpl
3249 and self.params.get('max_downloads') != 1):
acd69589 3250 raise SameFileError(outtmpl)
8222d8de
JMF
3251
3252 for url in url_list:
aa9369a2 3253 self.__download_wrapper(self.extract_info)(
3254 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3255
3256 return self._download_retcode
3257
1dcc4c0c 3258 def download_with_info_file(self, info_filename):
31bd3925
JMF
3259 with contextlib.closing(fileinput.FileInput(
3260 [info_filename], mode='r',
3261 openhook=fileinput.hook_encoded('utf-8'))) as f:
3262 # FileInput doesn't have a read method, we can't call json.load
8012d892 3263 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3264 try:
aa9369a2 3265 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3266 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3267 if not isinstance(e, EntryNotInPlaylist):
3268 self.to_stderr('\r')
d4943898
JMF
3269 webpage_url = info.get('webpage_url')
3270 if webpage_url is not None:
aa9369a2 3271 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3272 return self.download([webpage_url])
3273 else:
3274 raise
3275 return self._download_retcode
1dcc4c0c 3276
cb202fd2 3277 @staticmethod
8012d892 3278 def sanitize_info(info_dict, remove_private_keys=False):
3279 ''' Sanitize the infodict for converting to json '''
3ad56b42 3280 if info_dict is None:
3281 return info_dict
6e84b215 3282 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3283 info_dict.setdefault('_type', 'video')
09b49e1f 3284
8012d892 3285 if remove_private_keys:
0a5a191a 3286 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3287 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3288 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3289 }
ae8f99e6 3290 else:
09b49e1f 3291 reject = lambda k, v: False
adbc4ec4
THD
3292
3293 def filter_fn(obj):
3294 if isinstance(obj, dict):
3295 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3296 elif isinstance(obj, (list, tuple, set, LazyList)):
3297 return list(map(filter_fn, obj))
3298 elif obj is None or isinstance(obj, (str, int, float, bool)):
3299 return obj
3300 else:
3301 return repr(obj)
3302
5226731e 3303 return filter_fn(info_dict)
cb202fd2 3304
8012d892 3305 @staticmethod
3306 def filter_requested_info(info_dict, actually_filter=True):
3307 ''' Alias of sanitize_info for backward compatibility '''
3308 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3309
43d7f5a5 3310 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3311 for filename in set(filter(None, files_to_delete)):
3312 if msg:
3313 self.to_screen(msg % filename)
3314 try:
3315 os.remove(filename)
3316 except OSError:
3317 self.report_warning(f'Unable to delete file {filename}')
3318 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3319 del info['__files_to_move'][filename]
3320
ed5835b4 3321 @staticmethod
3322 def post_extract(info_dict):
3323 def actual_post_extract(info_dict):
3324 if info_dict.get('_type') in ('playlist', 'multi_video'):
3325 for video_dict in info_dict.get('entries', {}):
3326 actual_post_extract(video_dict or {})
3327 return
3328
09b49e1f 3329 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3330 info_dict.update(post_extractor())
ed5835b4 3331
3332 actual_post_extract(info_dict or {})
3333
dcf64d43 3334 def run_pp(self, pp, infodict):
5bfa4862 3335 files_to_delete = []
dcf64d43 3336 if '__files_to_move' not in infodict:
3337 infodict['__files_to_move'] = {}
b1940459 3338 try:
3339 files_to_delete, infodict = pp.run(infodict)
3340 except PostProcessingError as e:
3341 # Must be True and not 'only_download'
3342 if self.params.get('ignoreerrors') is True:
3343 self.report_error(e)
3344 return infodict
3345 raise
3346
5bfa4862 3347 if not files_to_delete:
dcf64d43 3348 return infodict
5bfa4862 3349 if self.params.get('keepvideo', False):
3350 for f in files_to_delete:
dcf64d43 3351 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3352 else:
43d7f5a5 3353 self._delete_downloaded_files(
3354 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3355 return infodict
5bfa4862 3356
ed5835b4 3357 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3358 self._forceprint(key, info)
ed5835b4 3359 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3360 info = self.run_pp(pp, info)
ed5835b4 3361 return info
277d6ff5 3362
56d868db 3363 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3364 info = dict(ie_info)
56d868db 3365 info['__files_to_move'] = files_to_move or {}
ed5835b4 3366 info = self.run_all_pps(key, info)
56d868db 3367 return info, info.pop('__files_to_move', None)
5bfa4862 3368
f46e2f9d 3369 def post_process(self, filename, info, files_to_move=None):
8222d8de 3370 """Run all the postprocessors on the given file."""
8222d8de 3371 info['filepath'] = filename
dcf64d43 3372 info['__files_to_move'] = files_to_move or {}
ed5835b4 3373 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3374 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3375 del info['__files_to_move']
ed5835b4 3376 return self.run_all_pps('after_move', info)
c1c9a79c 3377
5db07df6 3378 def _make_archive_id(self, info_dict):
e9fef7ee
S
3379 video_id = info_dict.get('id')
3380 if not video_id:
3381 return
5db07df6
PH
3382 # Future-proof against any change in case
3383 # and backwards compatibility with prior versions
e9fef7ee 3384 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3385 if extractor is None:
1211bb6d
S
3386 url = str_or_none(info_dict.get('url'))
3387 if not url:
3388 return
e9fef7ee 3389 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3390 for ie_key, ie in self._ies.items():
1211bb6d 3391 if ie.suitable(url):
8b7491c8 3392 extractor = ie_key
e9fef7ee
S
3393 break
3394 else:
3395 return
86e5f3ed 3396 return f'{extractor.lower()} {video_id}'
5db07df6
PH
3397
3398 def in_download_archive(self, info_dict):
3399 fn = self.params.get('download_archive')
3400 if fn is None:
3401 return False
3402
3403 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3404 if not vid_id:
7012b23c 3405 return False # Incomplete video information
5db07df6 3406
a45e8619 3407 return vid_id in self.archive
c1c9a79c
PH
3408
3409 def record_download_archive(self, info_dict):
3410 fn = self.params.get('download_archive')
3411 if fn is None:
3412 return
5db07df6
PH
3413 vid_id = self._make_archive_id(info_dict)
3414 assert vid_id
a13e6848 3415 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3416 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3417 archive_file.write(vid_id + '\n')
a45e8619 3418 self.archive.add(vid_id)
dd82ffea 3419
8c51aa65 3420 @staticmethod
8abeeb94 3421 def format_resolution(format, default='unknown'):
9359f3d4 3422 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3423 return 'audio only'
f49d89ee
PH
3424 if format.get('resolution') is not None:
3425 return format['resolution']
35615307 3426 if format.get('width') and format.get('height'):
ff51ed58 3427 return '%dx%d' % (format['width'], format['height'])
35615307 3428 elif format.get('height'):
ff51ed58 3429 return '%sp' % format['height']
35615307 3430 elif format.get('width'):
ff51ed58 3431 return '%dx?' % format['width']
3432 return default
8c51aa65 3433
8130779d 3434 def _list_format_headers(self, *headers):
3435 if self.params.get('listformats_table', True) is not False:
3436 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3437 return headers
3438
c57f7757
PH
3439 def _format_note(self, fdict):
3440 res = ''
3441 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3442 res += '(unsupported)'
32f90364
PH
3443 if fdict.get('language'):
3444 if res:
3445 res += ' '
f304da8a 3446 res += '[%s]' % fdict['language']
c57f7757 3447 if fdict.get('format_note') is not None:
f304da8a 3448 if res:
3449 res += ' '
3450 res += fdict['format_note']
c57f7757 3451 if fdict.get('tbr') is not None:
f304da8a 3452 if res:
3453 res += ', '
3454 res += '%4dk' % fdict['tbr']
c57f7757
PH
3455 if fdict.get('container') is not None:
3456 if res:
3457 res += ', '
3458 res += '%s container' % fdict['container']
3089bc74
S
3459 if (fdict.get('vcodec') is not None
3460 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3461 if res:
3462 res += ', '
3463 res += fdict['vcodec']
91c7271a 3464 if fdict.get('vbr') is not None:
c57f7757
PH
3465 res += '@'
3466 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3467 res += 'video@'
3468 if fdict.get('vbr') is not None:
3469 res += '%4dk' % fdict['vbr']
fbb21cf5 3470 if fdict.get('fps') is not None:
5d583bdf
S
3471 if res:
3472 res += ', '
3473 res += '%sfps' % fdict['fps']
c57f7757
PH
3474 if fdict.get('acodec') is not None:
3475 if res:
3476 res += ', '
3477 if fdict['acodec'] == 'none':
3478 res += 'video only'
3479 else:
3480 res += '%-5s' % fdict['acodec']
3481 elif fdict.get('abr') is not None:
3482 if res:
3483 res += ', '
3484 res += 'audio'
3485 if fdict.get('abr') is not None:
3486 res += '@%3dk' % fdict['abr']
3487 if fdict.get('asr') is not None:
3488 res += ' (%5dHz)' % fdict['asr']
3489 if fdict.get('filesize') is not None:
3490 if res:
3491 res += ', '
3492 res += format_bytes(fdict['filesize'])
9732d77e
PH
3493 elif fdict.get('filesize_approx') is not None:
3494 if res:
3495 res += ', '
3496 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3497 return res
91c7271a 3498
8130779d 3499 def render_formats_table(self, info_dict):
b69fd25c 3500 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3501 return None
b69fd25c 3502
94badb25 3503 formats = info_dict.get('formats', [info_dict])
8130779d 3504 if not self.params.get('listformats_table', True) is not False:
76d321f6 3505 table = [
3506 [
3507 format_field(f, 'format_id'),
3508 format_field(f, 'ext'),
3509 self.format_resolution(f),
8130779d 3510 self._format_note(f)
3511 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3512 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3513
3514 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3515 table = [
3516 [
3517 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3518 format_field(f, 'ext'),
3519 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3520 format_field(f, 'fps', '\t%d'),
3521 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3522 delim,
3523 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3524 format_field(f, 'tbr', '\t%dk'),
3525 shorten_protocol_name(f.get('protocol', '')),
3526 delim,
3527 format_field(f, 'vcodec', default='unknown').replace(
3528 'none', 'images' if f.get('acodec') == 'none'
3529 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3530 format_field(f, 'vbr', '\t%dk'),
3531 format_field(f, 'acodec', default='unknown').replace(
3532 'none', '' if f.get('vcodec') == 'none'
3533 else self._format_screen('video only', self.Styles.SUPPRESS)),
3534 format_field(f, 'abr', '\t%dk'),
3535 format_field(f, 'asr', '\t%dHz'),
3536 join_nonempty(
3537 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3538 format_field(f, 'language', '[%s]'),
3539 join_nonempty(format_field(f, 'format_note'),
3540 format_field(f, 'container', ignore=(None, f.get('ext'))),
3541 delim=', '),
3542 delim=' '),
3543 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3544 header_line = self._list_format_headers(
3545 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3546 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3547
3548 return render_table(
3549 header_line, table, hide_empty=True,
3550 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3551
3552 def render_thumbnails_table(self, info_dict):
88f23a18 3553 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3554 if not thumbnails:
8130779d 3555 return None
3556 return render_table(
ec11a9f4 3557 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3558 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3559
8130779d 3560 def render_subtitles_table(self, video_id, subtitles):
2412044c 3561 def _row(lang, formats):
49c258e1 3562 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3563 if len(set(names)) == 1:
7aee40c1 3564 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3565 return [lang, ', '.join(names), ', '.join(exts)]
3566
8130779d 3567 if not subtitles:
3568 return None
3569 return render_table(
ec11a9f4 3570 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3571 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3572 hide_empty=True)
3573
3574 def __list_table(self, video_id, name, func, *args):
3575 table = func(*args)
3576 if not table:
3577 self.to_screen(f'{video_id} has no {name}')
3578 return
3579 self.to_screen(f'[info] Available {name} for {video_id}:')
3580 self.to_stdout(table)
3581
3582 def list_formats(self, info_dict):
3583 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3584
3585 def list_thumbnails(self, info_dict):
3586 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3587
3588 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3589 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3590
dca08720
PH
3591 def urlopen(self, req):
3592 """ Start an HTTP download """
f9934b96 3593 if isinstance(req, str):
67dda517 3594 req = sanitized_Request(req)
19a41fc6 3595 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3596
3597 def print_debug_header(self):
3598 if not self.params.get('verbose'):
3599 return
49a57e70 3600
3601 def get_encoding(stream):
2a938746 3602 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3603 if not supports_terminal_sequences(stream):
59f943cd 3604 from .compat import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3605 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3606 return ret
3607
3608 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3609 locale.getpreferredencoding(),
3610 sys.getfilesystemencoding(),
cf4f42cb 3611 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3612 self.get_encoding())
883d4b1e 3613
3614 logger = self.params.get('logger')
3615 if logger:
3616 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3617 write_debug(encoding_str)
3618 else:
96565c7e 3619 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3620 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3621
4c88ff87 3622 source = detect_variant()
36eaf303 3623 write_debug(join_nonempty(
3624 'yt-dlp version', __version__,
3625 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3626 '' if source == 'unknown' else f'({source})',
3627 delim=' '))
6e21fdd2 3628 if not _LAZY_LOADER:
3629 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3630 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3631 else:
49a57e70 3632 write_debug('Lazy loading extractors is disabled')
3ae5e797 3633 if plugin_extractors or plugin_postprocessors:
49a57e70 3634 write_debug('Plugins: %s' % [
3ae5e797 3635 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3636 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3637 if self.params.get('compat_opts'):
49a57e70 3638 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3639
3640 if source == 'source':
dca08720 3641 try:
36eaf303 3642 sp = Popen(
3643 ['git', 'rev-parse', '--short', 'HEAD'],
3644 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3645 cwd=os.path.dirname(os.path.abspath(__file__)))
3646 out, err = sp.communicate_or_kill()
3647 out = out.decode().strip()
3648 if re.match('[0-9a-f]+', out):
3649 write_debug('Git HEAD: %s' % out)
70a1165b 3650 except Exception:
19a03940 3651 with contextlib.suppress(Exception):
36eaf303 3652 sys.exc_clear()
b300cda4
S
3653
3654 def python_implementation():
3655 impl_name = platform.python_implementation()
3656 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3657 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3658 return impl_name
3659
49a57e70 3660 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3661 platform.python_version(),
3662 python_implementation(),
3663 platform.architecture()[0],
b300cda4 3664 platform_name()))
d28b5171 3665
8913ef74 3666 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3667 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3668 if ffmpeg_features:
19a03940 3669 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3670
4c83c967 3671 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3672 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3673 exe_str = ', '.join(
2831b468 3674 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3675 ) or 'none'
49a57e70 3676 write_debug('exe versions: %s' % exe_str)
dca08720 3677
1d485a1a 3678 from .compat.compat_utils import get_package_info
9b8ee23b 3679 from .dependencies import available_dependencies
3680
3681 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3682 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3683 })) or 'none'))
2831b468 3684
97ec5bc5 3685 self._setup_opener()
dca08720
PH
3686 proxy_map = {}
3687 for handler in self._opener.handlers:
3688 if hasattr(handler, 'proxies'):
3689 proxy_map.update(handler.proxies)
49a57e70 3690 write_debug(f'Proxy map: {proxy_map}')
dca08720 3691
49a57e70 3692 # Not implemented
3693 if False and self.params.get('call_home'):
58b1f00d 3694 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3695 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3696 latest_version = self.urlopen(
3697 'https://yt-dl.org/latest/version').read().decode('utf-8')
3698 if version_tuple(latest_version) > version_tuple(__version__):
3699 self.report_warning(
3700 'You are using an outdated version (newest version: %s)! '
3701 'See https://yt-dl.org/update if you need help updating.' %
3702 latest_version)
3703
e344693b 3704 def _setup_opener(self):
97ec5bc5 3705 if hasattr(self, '_opener'):
3706 return
6ad14cab 3707 timeout_val = self.params.get('socket_timeout')
17bddf3e 3708 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3709
982ee69a 3710 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3711 opts_cookiefile = self.params.get('cookiefile')
3712 opts_proxy = self.params.get('proxy')
3713
982ee69a 3714 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3715
6a3f4c3f 3716 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3717 if opts_proxy is not None:
3718 if opts_proxy == '':
3719 proxies = {}
3720 else:
3721 proxies = {'http': opts_proxy, 'https': opts_proxy}
3722 else:
3723 proxies = compat_urllib_request.getproxies()
067aa17e 3724 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3725 if 'http' in proxies and 'https' not in proxies:
3726 proxies['https'] = proxies['http']
91410c9b 3727 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3728
3729 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3730 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3731 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3732 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3733 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3734
3735 # When passing our own FileHandler instance, build_opener won't add the
3736 # default FileHandler and allows us to disable the file protocol, which
3737 # can be used for malicious purposes (see
067aa17e 3738 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3739 file_handler = compat_urllib_request.FileHandler()
3740
3741 def file_open(*args, **kwargs):
7a5c1cfe 3742 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3743 file_handler.file_open = file_open
3744
3745 opener = compat_urllib_request.build_opener(
fca6dba8 3746 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3747
dca08720
PH
3748 # Delete the default user-agent header, which would otherwise apply in
3749 # cases where our custom HTTP handler doesn't come into play
067aa17e 3750 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3751 opener.addheaders = []
3752 self._opener = opener
62fec3b2
PH
3753
3754 def encode(self, s):
3755 if isinstance(s, bytes):
3756 return s # Already encoded
3757
3758 try:
3759 return s.encode(self.get_encoding())
3760 except UnicodeEncodeError as err:
3761 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3762 raise
3763
3764 def get_encoding(self):
3765 encoding = self.params.get('encoding')
3766 if encoding is None:
3767 encoding = preferredencoding()
3768 return encoding
ec82d85a 3769
e08a85d8 3770 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3771 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3772 if overwrite is None:
3773 overwrite = self.params.get('overwrites', True)
80c03fa9 3774 if not self.params.get('writeinfojson'):
3775 return False
3776 elif not infofn:
3777 self.write_debug(f'Skipping writing {label} infojson')
3778 return False
3779 elif not self._ensure_dir_exists(infofn):
3780 return None
e08a85d8 3781 elif not overwrite and os.path.exists(infofn):
80c03fa9 3782 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3783 return 'exists'
3784
3785 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3786 try:
3787 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3788 return True
86e5f3ed 3789 except OSError:
cb96c5be 3790 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3791 return None
80c03fa9 3792
3793 def _write_description(self, label, ie_result, descfn):
3794 ''' Write description and returns True = written, False = skip, None = error '''
3795 if not self.params.get('writedescription'):
3796 return False
3797 elif not descfn:
3798 self.write_debug(f'Skipping writing {label} description')
3799 return False
3800 elif not self._ensure_dir_exists(descfn):
3801 return None
3802 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3803 self.to_screen(f'[info] {label.title()} description is already present')
3804 elif ie_result.get('description') is None:
3805 self.report_warning(f'There\'s no {label} description to write')
3806 return False
3807 else:
3808 try:
3809 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3810 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3811 descfile.write(ie_result['description'])
86e5f3ed 3812 except OSError:
80c03fa9 3813 self.report_error(f'Cannot write {label} description file {descfn}')
3814 return None
3815 return True
3816
3817 def _write_subtitles(self, info_dict, filename):
3818 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3819 ret = []
3820 subtitles = info_dict.get('requested_subtitles')
3821 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3822 # subtitles download errors are already managed as troubles in relevant IE
3823 # that way it will silently go on when used with unsupporting IE
3824 return ret
3825
3826 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3827 if not sub_filename_base:
3828 self.to_screen('[info] Skipping writing video subtitles')
3829 return ret
3830 for sub_lang, sub_info in subtitles.items():
3831 sub_format = sub_info['ext']
3832 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3833 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3834 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3835 if existing_sub:
80c03fa9 3836 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3837 sub_info['filepath'] = existing_sub
3838 ret.append((existing_sub, sub_filename_final))
80c03fa9 3839 continue
3840
3841 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3842 if sub_info.get('data') is not None:
3843 try:
3844 # Use newline='' to prevent conversion of newline characters
3845 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3846 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3847 subfile.write(sub_info['data'])
3848 sub_info['filepath'] = sub_filename
3849 ret.append((sub_filename, sub_filename_final))
3850 continue
86e5f3ed 3851 except OSError:
80c03fa9 3852 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3853 return None
3854
3855 try:
3856 sub_copy = sub_info.copy()
3857 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3858 self.dl(sub_filename, sub_copy, subtitle=True)
3859 sub_info['filepath'] = sub_filename
3860 ret.append((sub_filename, sub_filename_final))
6020e05d 3861 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3862 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3863 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3864 if not self.params.get('ignoreerrors'):
3865 self.report_error(msg)
3866 raise DownloadError(msg)
3867 self.report_warning(msg)
519804a9 3868 return ret
80c03fa9 3869
3870 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3871 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3872 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3873 thumbnails, ret = [], []
6c4fd172 3874 if write_all or self.params.get('writethumbnail', False):
0202b52a 3875 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3876 multiple = write_all and len(thumbnails) > 1
ec82d85a 3877
80c03fa9 3878 if thumb_filename_base is None:
3879 thumb_filename_base = filename
3880 if thumbnails and not thumb_filename_base:
3881 self.write_debug(f'Skipping writing {label} thumbnail')
3882 return ret
3883
dd0228ce 3884 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3885 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3886 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3887 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3888 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3889
e04938ab 3890 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3891 if existing_thumb:
aa9369a2 3892 self.to_screen('[info] %s is already present' % (
3893 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3894 t['filepath'] = existing_thumb
3895 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3896 else:
80c03fa9 3897 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3898 try:
297e9952 3899 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3900 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3901 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3902 shutil.copyfileobj(uf, thumbf)
80c03fa9 3903 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3904 t['filepath'] = thumb_filename
3158150c 3905 except network_exceptions as err:
dd0228ce 3906 thumbnails.pop(idx)
80c03fa9 3907 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3908 if ret and not write_all:
3909 break
0202b52a 3910 return ret