]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[build] Fix `--onedir` on macOS
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
26e63931 2import collections
31bd3925 3import contextlib
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
b5ae35ee 7import functools
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
dca08720 14import platform
f8271158 15import random
8222d8de
JMF
16import re
17import shutil
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
f9934b96 25import urllib.request
961ea474
S
26from string import ascii_letters
27
f8271158 28from .cache import Cache
8c25f81b 29from .compat import (
003c69a8 30 compat_get_terminal_size,
e9c0cdd3 31 compat_os_name,
7d1eb38a 32 compat_shlex_quote,
ce02ed60
PH
33 compat_str,
34 compat_urllib_error,
35 compat_urllib_request,
819e0531 36 windows_enable_vt_mode,
8c25f81b 37)
982ee69a 38from .cookies import load_cookies
f8271158 39from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
40from .downloader.rtmp import rtmpdump_version
41from .extractor import _LAZY_LOADER
42from .extractor import _PLUGIN_CLASSES as plugin_extractors
43from .extractor import gen_extractor_classes, get_info_extractor
44from .extractor.openload import PhantomJSwrapper
45from .minicurses import format_text
46from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
47from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
57 MoveFilesAfterDownloadPP,
58 get_postprocessor,
59)
60from .update import detect_variant
8c25f81b 61from .utils import (
f8271158 62 DEFAULT_OUTTMPL,
63 LINK_TEMPLATES,
64 NO_DEFAULT,
65 OUTTMPL_TYPES,
66 POSTPROCESS_WHEN,
67 STR_FORMAT_RE_TMPL,
68 STR_FORMAT_TYPES,
69 ContentTooShortError,
70 DateRange,
71 DownloadCancelled,
72 DownloadError,
73 EntryNotInPlaylist,
74 ExistingVideoReached,
75 ExtractorError,
76 GeoRestrictedError,
77 HEADRequest,
78 InAdvancePagedList,
79 ISO3166Utils,
80 LazyList,
81 MaxDownloadsReached,
19a03940 82 Namespace,
f8271158 83 PagedList,
84 PerRequestProxyHandler,
85 Popen,
86 PostProcessingError,
87 ReExtractInfo,
88 RejectedVideoReached,
89 SameFileError,
90 UnavailableVideoError,
91 YoutubeDLCookieProcessor,
92 YoutubeDLHandler,
93 YoutubeDLRedirectHandler,
eedb7ba5
S
94 age_restricted,
95 args_to_str,
ce02ed60 96 date_from_str,
ce02ed60 97 determine_ext,
b5559424 98 determine_protocol,
c0384f22 99 encode_compat_str,
ce02ed60 100 encodeFilename,
a06916d9 101 error_to_compat_str,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
34921b43 112 join_nonempty,
ce02ed60 113 locked_file,
0202b52a 114 make_dir,
dca08720 115 make_HTTPS_handler,
8b7539d2 116 merge_headers,
3158150c 117 network_exceptions,
ec11a9f4 118 number_of_digits,
cd6fc19e 119 orderedSet,
083c9df9 120 parse_filesize,
dca08720 121 platform_name,
ce02ed60 122 preferredencoding,
eedb7ba5 123 prepend_extension,
51fb4995 124 register_socks_protocols,
3efb96a6 125 remove_terminal_sequences,
cfb56d1a 126 render_table,
eedb7ba5 127 replace_extension,
ce02ed60 128 sanitize_filename,
1bb5c511 129 sanitize_path,
dcf77cf1 130 sanitize_url,
67dda517 131 sanitized_Request,
e5660ee6 132 std_headers,
1211bb6d 133 str_or_none,
e29663c6 134 strftime_or_none,
ce02ed60 135 subtitles_filename,
819e0531 136 supports_terminal_sequences,
f2ebc5c7 137 timetuple_from_msec,
732044af 138 to_high_limit_path,
324ad820 139 traverse_obj,
6033d980 140 try_get,
29eb5174 141 url_basename,
7d1eb38a 142 variadic,
58b1f00d 143 version_tuple,
ce02ed60
PH
144 write_json_file,
145 write_string,
4f026faf 146)
f8271158 147from .version import RELEASE_GIT_HEAD, __version__
8222d8de 148
e9c0cdd3
YCH
149if compat_os_name == 'nt':
150 import ctypes
151
2459b6e1 152
86e5f3ed 153class YoutubeDL:
8222d8de
JMF
154 """YoutubeDL class.
155
156 YoutubeDL objects are the ones responsible of downloading the
157 actual video file and writing it to disk if the user has requested
158 it, among some other tasks. In most cases there should be one per
159 program. As, given a video URL, the downloader doesn't know how to
160 extract all the needed information, task that InfoExtractors do, it
161 has to pass the URL to one of them.
162
163 For this, YoutubeDL objects have a method that allows
164 InfoExtractors to be registered in a given order. When it is passed
165 a URL, the YoutubeDL object handles it to the first InfoExtractor it
166 finds that reports being able to handle it. The InfoExtractor extracts
167 all the information about the video or videos the URL refers to, and
168 YoutubeDL process the extracted information, possibly using a File
169 Downloader to download the video.
170
171 YoutubeDL objects accept a lot of parameters. In order not to saturate
172 the object constructor with arguments, it receives a dictionary of
173 options instead. These options are available through the params
174 attribute for the InfoExtractors to use. The YoutubeDL also
175 registers itself as the downloader in charge for the InfoExtractors
176 that are added to it, so this is a "mutual registration".
177
178 Available options:
179
180 username: Username for authentication purposes.
181 password: Password for authentication purposes.
180940e0 182 videopassword: Password for accessing a video.
1da50aa3
S
183 ap_mso: Adobe Pass multiple-system operator identifier.
184 ap_username: Multiple-system operator account username.
185 ap_password: Multiple-system operator account password.
8222d8de
JMF
186 usenetrc: Use netrc for authentication instead.
187 verbose: Print additional info to stdout.
188 quiet: Do not print messages to stdout.
ad8915b7 189 no_warnings: Do not print out anything for warnings.
bb66c247 190 forceprint: A dict with keys WHEN mapped to a list of templates to
191 print to stdout. The allowed keys are video or any of the
192 items in utils.POSTPROCESS_WHEN.
ca30f449 193 For compatibility, a single list is also accepted
bb66c247 194 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
195 a list of tuples with (template, filename)
53c18592 196 forceurl: Force printing final URL. (Deprecated)
197 forcetitle: Force printing title. (Deprecated)
198 forceid: Force printing ID. (Deprecated)
199 forcethumbnail: Force printing thumbnail URL. (Deprecated)
200 forcedescription: Force printing description. (Deprecated)
201 forcefilename: Force printing final filename. (Deprecated)
202 forceduration: Force printing duration. (Deprecated)
8694c600 203 forcejson: Force printing info_dict as JSON.
63e0be34
PH
204 dump_single_json: Force printing the info_dict of the whole playlist
205 (or video) as a single JSON line.
c25228e5 206 force_write_download_archive: Force writing download archive regardless
207 of 'skip_download' or 'simulate'.
b7b04c78 208 simulate: Do not download the video files. If unset (or None),
209 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 210 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 211 You can also pass a function. The function takes 'ctx' as
212 argument and returns the formats to download.
213 See "build_format_selector" for an implementation
63ad4d43 214 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 215 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
216 extracting metadata even if the video is not actually
217 available for download (experimental)
0930b11f 218 format_sort: A list of fields by which to sort the video formats.
219 See "Sorting Formats" for more details.
c25228e5 220 format_sort_force: Force the given format_sort. see "Sorting Formats"
221 for more details.
08d30158 222 prefer_free_formats: Whether to prefer video formats with free containers
223 over non-free ones of same quality.
c25228e5 224 allow_multiple_video_streams: Allow multiple video streams to be merged
225 into a single file
226 allow_multiple_audio_streams: Allow multiple audio streams to be merged
227 into a single file
0ba692ac 228 check_formats Whether to test if the formats are downloadable.
9f1a1c36 229 Can be True (check all), False (check none),
230 'selected' (check selected formats),
0ba692ac 231 or None (check only if requested by extractor)
4524baf0 232 paths: Dictionary of output paths. The allowed keys are 'home'
233 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 234 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 235 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 236 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
237 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
238 restrictfilenames: Do not allow "&" and spaces in file names
239 trim_file_name: Limit length of filename (extension excluded)
4524baf0 240 windowsfilenames: Force the filenames to be windows compatible
b1940459 241 ignoreerrors: Do not stop on download/postprocessing errors.
242 Can be 'only_download' to ignore only download errors.
243 Default is 'only_download' for CLI, but False for API
26e2805c 244 skip_playlist_after_errors: Number of allowed failures until the rest of
245 the playlist is skipped
d22dec74 246 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 247 overwrites: Overwrite all video and metadata files if True,
248 overwrite only non-video files if None
249 and don't overwrite any file if False
34488702 250 For compatibility with youtube-dl,
251 "nooverwrites" may also be used instead
8222d8de
JMF
252 playliststart: Playlist item to start at.
253 playlistend: Playlist item to end at.
c14e88f0 254 playlist_items: Specific indices of playlist to download.
ff815fe6 255 playlistreverse: Download playlist items in reverse order.
75822ca7 256 playlistrandom: Download playlist items in random order.
8222d8de
JMF
257 matchtitle: Download only matching titles.
258 rejecttitle: Reject downloads for matching titles.
8bf9319e 259 logger: Log messages to a logging.Logger instance.
8222d8de 260 logtostderr: Log messages to stderr instead of stdout.
819e0531 261 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
262 writedescription: Write the video description to a .description file
263 writeinfojson: Write the video description to a .info.json file
75d43ca0 264 clean_infojson: Remove private fields from the infojson
34488702 265 getcomments: Extract video comments. This will not be written to disk
06167fbb 266 unless writeinfojson is also given
1fb07d10 267 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 268 writethumbnail: Write the thumbnail image to a file
c25228e5 269 allow_playlist_files: Whether to write playlists' description, infojson etc
270 also to disk when using the 'write*' options
ec82d85a 271 write_all_thumbnails: Write all thumbnail formats to files
732044af 272 writelink: Write an internet shortcut file, depending on the
273 current platform (.url/.webloc/.desktop)
274 writeurllink: Write a Windows internet shortcut file (.url)
275 writewebloclink: Write a macOS internet shortcut file (.webloc)
276 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 277 writesubtitles: Write the video subtitles to a file
741dd8ea 278 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 279 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 280 Downloads all the subtitles of the video
0b7f3118 281 (requires writesubtitles or writeautomaticsub)
8222d8de 282 listsubtitles: Lists all available subtitles for the video
a504ced0 283 subtitlesformat: The format code for subtitles
c32b0aab 284 subtitleslangs: List of languages of the subtitles to download (can be regex).
285 The list may contain "all" to refer to all the available
286 subtitles. The language can be prefixed with a "-" to
287 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
288 keepvideo: Keep the video file after post-processing
289 daterange: A DateRange object, download only if the upload_date is in the range.
290 skip_download: Skip the actual download of the video file
c35f9e72 291 cachedir: Location of the cache files in the filesystem.
a0e07d31 292 False to disable filesystem cache.
47192f92 293 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
294 age_limit: An integer representing the user's age in years.
295 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
296 min_views: An integer representing the minimum view count the video
297 must have in order to not be skipped.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 max_views: An integer representing the maximum view count.
301 Videos that are more popular than that are not
302 downloaded.
303 Videos without view count information are always
304 downloaded. None for no limit.
305 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
306 Videos already present in the file are not downloaded
307 again.
8a51f564 308 break_on_existing: Stop the download process after attempting to download a
309 file that is in the archive.
310 break_on_reject: Stop the download process when encountering a video that
311 has been filtered out.
b222c271 312 break_per_url: Whether break_on_reject and break_on_existing
313 should act on each input URL as opposed to for the entire queue
8a51f564 314 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
315 cookiesfrombrowser: A tuple containing the name of the browser, the profile
316 name/pathfrom where cookies are loaded, and the name of the
317 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
f59f5ef8 320 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
321 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
322 At the moment, this is only supported by YouTube.
8b7539d2 323 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 324 proxy: URL of the proxy server to use
38cce791 325 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 326 on geo-restricted sites.
e344693b 327 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
328 bidi_workaround: Work around buggy terminals without bidirectional text
329 support, using fridibi
a0ddb8a2 330 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 331 include_ads: Download ads as well (deprecated)
04b4d394
PH
332 default_search: Prepend this string if an input url is not valid.
333 'auto' for elaborate guessing
62fec3b2 334 encoding: Use this encoding instead of the system-specified.
e8ee972c 335 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
336 Pass in 'in_playlist' to only show this behavior for
337 playlist items.
f2ebc5c7 338 wait_for_video: If given, wait for scheduled streams to become available.
339 The value should be a tuple containing the range
340 (min_secs, max_secs) to wait between retries
4f026faf 341 postprocessors: A list of dictionaries, each with an entry
71b640cc 342 * key: The name of the postprocessor. See
7a5c1cfe 343 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 344 * when: When to run the postprocessor. Allowed values are
345 the entries of utils.POSTPROCESS_WHEN
56d868db 346 Assumed to be 'post_process' if not given
b5ae35ee 347 post_hooks: Deprecated - Register a custom postprocessor instead
348 A list of functions that get called as the final step
ab8e5e51
AM
349 for each video file, after all postprocessors have been
350 called. The filename will be passed as the only argument.
71b640cc
PH
351 progress_hooks: A list of functions that get called on download
352 progress, with a dictionary with the entries
5cda4eda 353 * status: One of "downloading", "error", or "finished".
ee69b99a 354 Check this first and ignore unknown values.
3ba7740d 355 * info_dict: The extracted info_dict
71b640cc 356
5cda4eda 357 If status is one of "downloading", or "finished", the
ee69b99a
PH
358 following properties may also be present:
359 * filename: The final filename (always present)
5cda4eda 360 * tmpfilename: The filename we're currently writing to
71b640cc
PH
361 * downloaded_bytes: Bytes on disk
362 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
363 * total_bytes_estimate: Guess of the eventual file size,
364 None if unavailable.
365 * elapsed: The number of seconds since download started.
71b640cc
PH
366 * eta: The estimated time in seconds, None if unknown
367 * speed: The download speed in bytes/second, None if
368 unknown
5cda4eda
PH
369 * fragment_index: The counter of the currently
370 downloaded video fragment.
371 * fragment_count: The number of fragments (= individual
372 files that will be merged)
71b640cc
PH
373
374 Progress hooks are guaranteed to be called at least once
375 (with status "finished") if the download is successful.
819e0531 376 postprocessor_hooks: A list of functions that get called on postprocessing
377 progress, with a dictionary with the entries
378 * status: One of "started", "processing", or "finished".
379 Check this first and ignore unknown values.
380 * postprocessor: Name of the postprocessor
381 * info_dict: The extracted info_dict
382
383 Progress hooks are guaranteed to be called at least twice
384 (with status "started" and "finished") if the processing is successful.
45598f15 385 merge_output_format: Extension to use when merging formats.
6b591b29 386 final_ext: Expected final extension; used to detect when the file was
59a7a13e 387 already downloaded and converted
6271f1ca
PH
388 fixup: Automatically correct known faults of the file.
389 One of:
390 - "never": do nothing
391 - "warn": only emit a warning
392 - "detect_or_warn": check whether we can do anything
62cd676c 393 about it, warn otherwise (default)
504f20dd 394 source_address: Client-side IP address to bind to.
6ec6cb4e 395 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 396 yt-dlp servers for debugging. (BROKEN)
1cf376f5 397 sleep_interval_requests: Number of seconds to sleep between requests
398 during extraction
7aa589a5
S
399 sleep_interval: Number of seconds to sleep before each download when
400 used alone or a lower bound of a range for randomized
401 sleep before each download (minimum possible number
402 of seconds to sleep) when used along with
403 max_sleep_interval.
404 max_sleep_interval:Upper bound of a range for randomized sleep before each
405 download (maximum possible number of seconds to sleep).
406 Must only be used along with sleep_interval.
407 Actual sleep time will be a random float from range
408 [sleep_interval; max_sleep_interval].
1cf376f5 409 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
410 listformats: Print an overview of available video formats and exit.
411 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
412 match_filter: A function that gets called with the info_dict of
413 every video.
414 If it returns a message, the video is ignored.
415 If it returns None, the video is downloaded.
492272fe 416 If it returns utils.NO_DEFAULT, the user is interactively
417 asked whether to download the video.
347de493 418 match_filter_func in utils.py is one example for this.
7e5db8c9 419 no_color: Do not emit color codes in output.
0a840f58 420 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 421 HTTP header
0a840f58 422 geo_bypass_country:
773f291d
S
423 Two-letter ISO 3166-2 country code that will be used for
424 explicit geographic restriction bypassing via faking
504f20dd 425 X-Forwarded-For HTTP header
5f95927a
S
426 geo_bypass_ip_block:
427 IP range in CIDR notation that will be used similarly to
504f20dd 428 geo_bypass_country
71b640cc 429
85729c51 430 The following options determine which downloader is picked:
52a8a1e1 431 external_downloader: A dictionary of protocol keys and the executable of the
432 external downloader to use for it. The allowed protocols
433 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
434 Set the value to 'native' to use the native downloader
435 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
436 or {'m3u8': 'ffmpeg'} instead.
437 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
438 if True, otherwise use ffmpeg/avconv if False, otherwise
439 use downloader suggested by extractor if None.
53ed7066 440 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 441 The following options do not work when used through the API:
b5ae35ee 442 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 443 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 444 Refer __init__.py for their implementation
819e0531 445 progress_template: Dictionary of templates for progress outputs.
446 Allowed keys are 'download', 'postprocess',
447 'download-title' (console title) and 'postprocess-title'.
448 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 449
8222d8de 450 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 451 the downloader (see yt_dlp/downloader/common.py):
51d9739f 452 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
453 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
454 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 455 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
456
457 The following options are used by the post processors:
d4a24f40 458 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 459 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
460 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
461 to the binary or its containing directory.
43820c03 462 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 463 and a list of additional command-line arguments for the
464 postprocessor/executable. The dict can also have "PP+EXE" keys
465 which are used when the given exe is used by the given PP.
466 Use 'default' as the name for arguments to passed to all PP
467 For compatibility with youtube-dl, a single list of args
468 can also be used
e409895f 469
470 The following options are used by the extractors:
62bff2c1 471 extractor_retries: Number of times to retry for known errors
472 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 473 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 474 discontinuities such as ad breaks (default: False)
5d3a0e79 475 extractor_args: A dictionary of arguments to be passed to the extractors.
476 See "EXTRACTOR ARGUMENTS" for details.
477 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 478 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 479 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
480 If True (default), DASH manifests and related
62bff2c1 481 data will be downloaded and processed by extractor.
482 You can reduce network I/O by disabling it if you don't
483 care about DASH. (only for youtube)
5d3a0e79 484 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
485 If True (default), HLS manifests and related
62bff2c1 486 data will be downloaded and processed by extractor.
487 You can reduce network I/O by disabling it if you don't
488 care about HLS. (only for youtube)
8222d8de
JMF
489 """
490
86e5f3ed 491 _NUMERIC_FIELDS = {
c9969434 492 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 493 'timestamp', 'release_timestamp',
c9969434
S
494 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
495 'average_rating', 'comment_count', 'age_limit',
496 'start_time', 'end_time',
497 'chapter_number', 'season_number', 'episode_number',
498 'track_number', 'disc_number', 'release_year',
86e5f3ed 499 }
c9969434 500
6db9c4d5 501 _format_fields = {
502 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 503 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
6db9c4d5 504 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
505 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
506 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
507 'preference', 'language', 'language_preference', 'quality', 'source_preference',
508 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
509 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
510 }
48ee10ee 511 _format_selection_exts = {
512 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
513 'video': {'mp4', 'flv', 'webm', '3gp'},
514 'storyboards': {'mhtml'},
515 }
516
3511266b 517 def __init__(self, params=None, auto_init=True):
883d4b1e 518 """Create a FileDownloader object with the given options.
519 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 520 Set to 'no_verbose_header' to not print the header
883d4b1e 521 """
e9f9a10f
JMF
522 if params is None:
523 params = {}
592b7485 524 self.params = params
8b7491c8 525 self._ies = {}
56c73665 526 self._ies_instances = {}
1e43a6f7 527 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 528 self._printed_messages = set()
1cf376f5 529 self._first_webpage_request = True
ab8e5e51 530 self._post_hooks = []
933605d7 531 self._progress_hooks = []
819e0531 532 self._postprocessor_hooks = []
8222d8de
JMF
533 self._download_retcode = 0
534 self._num_downloads = 0
9c906919 535 self._num_videos = 0
592b7485 536 self._playlist_level = 0
537 self._playlist_urls = set()
a0e07d31 538 self.cache = Cache(self)
34308b30 539
819e0531 540 windows_enable_vt_mode()
cf4f42cb 541 self._out_files = {
542 'error': sys.stderr,
543 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
544 'console': None if compat_os_name == 'nt' else next(
545 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
546 }
547 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 548 self._allow_colors = {
cf4f42cb 549 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
550 for type_ in ('screen', 'error')
ec11a9f4 551 }
819e0531 552
a61f4b28 553 if sys.version_info < (3, 6):
554 self.report_warning(
0181adef 555 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 556
88acdbc2 557 if self.params.get('allow_unplayable_formats'):
558 self.report_warning(
ec11a9f4 559 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 560 'This is a developer option intended for debugging. \n'
561 ' If you experience any issues while using this option, '
ec11a9f4 562 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 563
be5df5ee
S
564 def check_deprecated(param, option, suggestion):
565 if self.params.get(param) is not None:
86e5f3ed 566 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
567 return True
568 return False
569
570 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
571 if self.params.get('geo_verification_proxy') is None:
572 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
573
0d1bb027 574 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
575 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 576 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 577
49a57e70 578 for msg in self.params.get('_warnings', []):
0d1bb027 579 self.report_warning(msg)
ee8dd27a 580 for msg in self.params.get('_deprecation_warnings', []):
581 self.deprecation_warning(msg)
0d1bb027 582
ec11a9f4 583 if 'list-formats' in self.params.get('compat_opts', []):
584 self.params['listformats_table'] = False
585
b5ae35ee 586 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 587 # nooverwrites was unnecessarily changed to overwrites
588 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
589 # This ensures compatibility with both keys
590 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 591 elif self.params.get('overwrites') is None:
592 self.params.pop('overwrites', None)
b868936c 593 else:
594 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 595
455a15e2 596 self.params.setdefault('forceprint', {})
597 self.params.setdefault('print_to_file', {})
bb66c247 598
599 # Compatibility with older syntax
ca30f449 600 if not isinstance(params['forceprint'], dict):
455a15e2 601 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 602
455a15e2 603 if self.params.get('bidi_workaround', False):
1c088fa8
PH
604 try:
605 import pty
606 master, slave = pty.openpty()
003c69a8 607 width = compat_get_terminal_size().columns
1c088fa8
PH
608 if width is None:
609 width_args = []
610 else:
611 width_args = ['-w', str(width)]
5d681e96 612 sp_kwargs = dict(
1c088fa8
PH
613 stdin=subprocess.PIPE,
614 stdout=slave,
cf4f42cb 615 stderr=self._out_files['error'])
5d681e96 616 try:
d3c93ec2 617 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 618 except OSError:
d3c93ec2 619 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 620 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 621 except OSError as ose:
66e7ace1 622 if ose.errno == errno.ENOENT:
49a57e70 623 self.report_warning(
624 'Could not find fribidi executable, ignoring --bidi-workaround. '
625 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
626 else:
627 raise
0783b09b 628
97ec5bc5 629 if auto_init:
630 if auto_init != 'no_verbose_header':
631 self.print_debug_header()
632 self.add_default_info_extractors()
633
3089bc74
S
634 if (sys.platform != 'win32'
635 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 636 and not self.params.get('restrictfilenames', False)):
e9137224 637 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 638 self.report_warning(
6febd1c1 639 'Assuming --restrict-filenames since file system encoding '
1b725173 640 'cannot encode all characters. '
6febd1c1 641 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 642 self.params['restrictfilenames'] = True
34308b30 643
de6000d9 644 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 645
187986a8 646 # Creating format selector here allows us to catch syntax errors before the extraction
647 self.format_selector = (
fa9f30b8 648 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 649 else self.params['format'] if callable(self.params['format'])
187986a8 650 else self.build_format_selector(self.params['format']))
651
8b7539d2 652 # Set http_headers defaults according to std_headers
653 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
654
013b50b7 655 hooks = {
656 'post_hooks': self.add_post_hook,
657 'progress_hooks': self.add_progress_hook,
658 'postprocessor_hooks': self.add_postprocessor_hook,
659 }
660 for opt, fn in hooks.items():
661 for ph in self.params.get(opt, []):
662 fn(ph)
71b640cc 663
5bfc8bee 664 for pp_def_raw in self.params.get('postprocessors', []):
665 pp_def = dict(pp_def_raw)
666 when = pp_def.pop('when', 'post_process')
667 self.add_post_processor(
f9934b96 668 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 669 when=when)
670
97ec5bc5 671 self._setup_opener()
51fb4995
YCH
672 register_socks_protocols()
673
ed39cac5 674 def preload_download_archive(fn):
675 """Preload the archive, if any is specified"""
676 if fn is None:
677 return False
49a57e70 678 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 679 try:
680 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
681 for line in archive_file:
682 self.archive.add(line.strip())
86e5f3ed 683 except OSError as ioe:
ed39cac5 684 if ioe.errno != errno.ENOENT:
685 raise
686 return False
687 return True
688
689 self.archive = set()
690 preload_download_archive(self.params.get('download_archive'))
691
7d4111ed
PH
692 def warn_if_short_id(self, argv):
693 # short YouTube ID starting with dash?
694 idxs = [
695 i for i, a in enumerate(argv)
696 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
697 if idxs:
698 correct_argv = (
7a5c1cfe 699 ['yt-dlp']
3089bc74
S
700 + [a for i, a in enumerate(argv) if i not in idxs]
701 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
702 )
703 self.report_warning(
704 'Long argument string detected. '
49a57e70 705 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
706 args_to_str(correct_argv))
707
8222d8de
JMF
708 def add_info_extractor(self, ie):
709 """Add an InfoExtractor object to the end of the list."""
8b7491c8 710 ie_key = ie.ie_key()
711 self._ies[ie_key] = ie
e52d7f85 712 if not isinstance(ie, type):
8b7491c8 713 self._ies_instances[ie_key] = ie
e52d7f85 714 ie.set_downloader(self)
8222d8de 715
8b7491c8 716 def _get_info_extractor_class(self, ie_key):
717 ie = self._ies.get(ie_key)
718 if ie is None:
719 ie = get_info_extractor(ie_key)
720 self.add_info_extractor(ie)
721 return ie
722
56c73665
JMF
723 def get_info_extractor(self, ie_key):
724 """
725 Get an instance of an IE with name ie_key, it will try to get one from
726 the _ies list, if there's no instance it will create a new one and add
727 it to the extractor list.
728 """
729 ie = self._ies_instances.get(ie_key)
730 if ie is None:
731 ie = get_info_extractor(ie_key)()
732 self.add_info_extractor(ie)
733 return ie
734
023fa8c4
JMF
735 def add_default_info_extractors(self):
736 """
737 Add the InfoExtractors returned by gen_extractors to the end of the list
738 """
e52d7f85 739 for ie in gen_extractor_classes():
023fa8c4
JMF
740 self.add_info_extractor(ie)
741
56d868db 742 def add_post_processor(self, pp, when='post_process'):
8222d8de 743 """Add a PostProcessor object to the end of the chain."""
5bfa4862 744 self._pps[when].append(pp)
8222d8de
JMF
745 pp.set_downloader(self)
746
ab8e5e51
AM
747 def add_post_hook(self, ph):
748 """Add the post hook"""
749 self._post_hooks.append(ph)
750
933605d7 751 def add_progress_hook(self, ph):
819e0531 752 """Add the download progress hook"""
933605d7 753 self._progress_hooks.append(ph)
8ab470f1 754
819e0531 755 def add_postprocessor_hook(self, ph):
756 """Add the postprocessing progress hook"""
757 self._postprocessor_hooks.append(ph)
5bfc8bee 758 for pps in self._pps.values():
759 for pp in pps:
760 pp.add_progress_hook(ph)
819e0531 761
1c088fa8 762 def _bidi_workaround(self, message):
5d681e96 763 if not hasattr(self, '_output_channel'):
1c088fa8
PH
764 return message
765
5d681e96 766 assert hasattr(self, '_output_process')
11b85ce6 767 assert isinstance(message, compat_str)
6febd1c1
PH
768 line_count = message.count('\n') + 1
769 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 770 self._output_process.stdin.flush()
6febd1c1 771 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 772 for _ in range(line_count))
6febd1c1 773 return res[:-len('\n')]
1c088fa8 774
b35496d8 775 def _write_string(self, message, out=None, only_once=False):
776 if only_once:
777 if message in self._printed_messages:
778 return
779 self._printed_messages.add(message)
780 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 781
cf4f42cb 782 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 783 """Print message to stdout"""
cf4f42cb 784 if quiet is not None:
ae6a1b95 785 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 786 self._write_string(
787 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
788 self._out_files['print'])
789
790 def to_screen(self, message, skip_eol=False, quiet=None):
791 """Print message to screen if not in quiet mode"""
8bf9319e 792 if self.params.get('logger'):
43afe285 793 self.params['logger'].debug(message)
cf4f42cb 794 return
795 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
796 return
797 self._write_string(
798 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
799 self._out_files['screen'])
8222d8de 800
b35496d8 801 def to_stderr(self, message, only_once=False):
0760b0a7 802 """Print message to stderr"""
11b85ce6 803 assert isinstance(message, compat_str)
8bf9319e 804 if self.params.get('logger'):
43afe285
IB
805 self.params['logger'].error(message)
806 else:
cf4f42cb 807 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
808
809 def _send_console_code(self, code):
810 if compat_os_name == 'nt' or not self._out_files['console']:
811 return
812 self._write_string(code, self._out_files['console'])
8222d8de 813
1e5b9a95
PH
814 def to_console_title(self, message):
815 if not self.params.get('consoletitle', False):
816 return
3efb96a6 817 message = remove_terminal_sequences(message)
4bede0d8
C
818 if compat_os_name == 'nt':
819 if ctypes.windll.kernel32.GetConsoleWindow():
820 # c_wchar_p() might not be necessary if `message` is
821 # already of type unicode()
822 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 823 else:
824 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 825
bdde425c 826 def save_console_title(self):
cf4f42cb 827 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 828 return
592b7485 829 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
830
831 def restore_console_title(self):
cf4f42cb 832 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 833 return
592b7485 834 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
835
836 def __enter__(self):
837 self.save_console_title()
838 return self
839
840 def __exit__(self, *args):
841 self.restore_console_title()
f89197d7 842
dca08720 843 if self.params.get('cookiefile') is not None:
1bab3437 844 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 845
fa9f30b8 846 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
847 """Determine action to take when a download problem appears.
848
849 Depending on if the downloader has been configured to ignore
850 download errors or not, this method may throw an exception or
851 not when errors are found, after printing the message.
852
fa9f30b8 853 @param tb If given, is additional traceback information
854 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
855 """
856 if message is not None:
857 self.to_stderr(message)
858 if self.params.get('verbose'):
859 if tb is None:
860 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 861 tb = ''
8222d8de 862 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 863 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 864 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
865 else:
866 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 867 tb = ''.join(tb_data)
c19bc311 868 if tb:
869 self.to_stderr(tb)
fa9f30b8 870 if not is_error:
871 return
b1940459 872 if not self.params.get('ignoreerrors'):
8222d8de
JMF
873 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
874 exc_info = sys.exc_info()[1].exc_info
875 else:
876 exc_info = sys.exc_info()
877 raise DownloadError(message, exc_info)
878 self._download_retcode = 1
879
19a03940 880 Styles = Namespace(
881 HEADERS='yellow',
882 EMPHASIS='light blue',
492272fe 883 FILENAME='green',
19a03940 884 ID='green',
885 DELIM='blue',
886 ERROR='red',
887 WARNING='yellow',
888 SUPPRESS='light black',
889 )
ec11a9f4 890
7578d77d 891 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 892 text = str(text)
ec11a9f4 893 if test_encoding:
894 original_text = text
5c104538 895 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
896 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 897 text = text.encode(encoding, 'ignore').decode(encoding)
898 if fallback is not None and text != original_text:
899 text = fallback
7578d77d 900 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 901
902 def _format_screen(self, *args, **kwargs):
7578d77d 903 return self._format_text(
cf4f42cb 904 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 905
906 def _format_err(self, *args, **kwargs):
7578d77d 907 return self._format_text(
cf4f42cb 908 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 909
c84aeac6 910 def report_warning(self, message, only_once=False):
8222d8de
JMF
911 '''
912 Print the message to stderr, it will be prefixed with 'WARNING:'
913 If stderr is a tty file the 'WARNING:' will be colored
914 '''
6d07ce01
JMF
915 if self.params.get('logger') is not None:
916 self.params['logger'].warning(message)
8222d8de 917 else:
ad8915b7
PH
918 if self.params.get('no_warnings'):
919 return
ec11a9f4 920 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 921
ee8dd27a 922 def deprecation_warning(self, message):
923 if self.params.get('logger') is not None:
a44ca5a4 924 self.params['logger'].warning(f'DeprecationWarning: {message}')
ee8dd27a 925 else:
926 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
927
fa9f30b8 928 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
929 '''
930 Do the same as trouble, but prefixes the message with 'ERROR:', colored
931 in red if stderr is a tty file.
932 '''
fa9f30b8 933 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 934
b35496d8 935 def write_debug(self, message, only_once=False):
0760b0a7 936 '''Log debug message or Print message to stderr'''
937 if not self.params.get('verbose', False):
938 return
939 message = '[debug] %s' % message
940 if self.params.get('logger'):
941 self.params['logger'].debug(message)
942 else:
b35496d8 943 self.to_stderr(message, only_once)
0760b0a7 944
8222d8de
JMF
945 def report_file_already_downloaded(self, file_name):
946 """Report file has already been fully downloaded."""
947 try:
6febd1c1 948 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 949 except UnicodeEncodeError:
6febd1c1 950 self.to_screen('[download] The file has already been downloaded')
8222d8de 951
0c3d0f51 952 def report_file_delete(self, file_name):
953 """Report that existing file will be deleted."""
954 try:
c25228e5 955 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 956 except UnicodeEncodeError:
c25228e5 957 self.to_screen('Deleting existing file')
0c3d0f51 958
319b6059 959 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 960 has_drm = info.get('_has_drm')
319b6059 961 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
962 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
963 if forced or not ignored:
1151c407 964 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 965 expected=has_drm or ignored or expected)
88acdbc2 966 else:
967 self.report_warning(msg)
968
de6000d9 969 def parse_outtmpl(self):
970 outtmpl_dict = self.params.get('outtmpl', {})
971 if not isinstance(outtmpl_dict, dict):
972 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 973 # Remove spaces in the default template
974 if self.params.get('restrictfilenames'):
975 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
976 else:
977 sanitize = lambda x: x
de6000d9 978 outtmpl_dict.update({
71ce444a 979 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 980 if outtmpl_dict.get(k) is None})
86e5f3ed 981 for _, val in outtmpl_dict.items():
de6000d9 982 if isinstance(val, bytes):
86e5f3ed 983 self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
de6000d9 984 return outtmpl_dict
985
21cd8fae 986 def get_output_path(self, dir_type='', filename=None):
987 paths = self.params.get('paths', {})
988 assert isinstance(paths, dict)
989 path = os.path.join(
990 expand_path(paths.get('home', '').strip()),
991 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
992 filename or '')
21cd8fae 993 return sanitize_path(path, force=self.params.get('windowsfilenames'))
994
76a264ac 995 @staticmethod
901130bb 996 def _outtmpl_expandpath(outtmpl):
997 # expand_path translates '%%' into '%' and '$$' into '$'
998 # correspondingly that is not what we want since we need to keep
999 # '%%' intact for template dict substitution step. Working around
1000 # with boundary-alike separator hack.
1001 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
86e5f3ed 1002 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1003
1004 # outtmpl should be expand_path'ed before template dict substitution
1005 # because meta fields may contain env variables we don't want to
1006 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1007 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1008 return expand_path(outtmpl).replace(sep, '')
1009
1010 @staticmethod
1011 def escape_outtmpl(outtmpl):
1012 ''' Escape any remaining strings like %s, %abc% etc. '''
1013 return re.sub(
1014 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1015 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1016 outtmpl)
1017
1018 @classmethod
1019 def validate_outtmpl(cls, outtmpl):
76a264ac 1020 ''' @return None or Exception object '''
7d1eb38a 1021 outtmpl = re.sub(
37893bb0 1022 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1023 lambda mobj: f'{mobj.group(0)[:-1]}s',
1024 cls._outtmpl_expandpath(outtmpl))
76a264ac 1025 try:
7d1eb38a 1026 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1027 return None
1028 except ValueError as err:
1029 return err
1030
03b4de72 1031 @staticmethod
1032 def _copy_infodict(info_dict):
1033 info_dict = dict(info_dict)
09b49e1f 1034 info_dict.pop('__postprocessors', None)
03b4de72 1035 return info_dict
1036
e0fd9573 1037 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1038 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1039 @param sanitize Whether to sanitize the output as a filename.
1040 For backward compatibility, a function can also be passed
1041 """
1042
6e84b215 1043 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1044
03b4de72 1045 info_dict = self._copy_infodict(info_dict)
752cda38 1046 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1047 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1048 if info_dict.get('duration', None) is not None
1049 else None)
752cda38 1050 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
9c906919 1051 info_dict['video_autonumber'] = self._num_videos
752cda38 1052 if info_dict.get('resolution') is None:
1053 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1054
e6f21b3d 1055 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1056 # of %(field)s to %(field)0Nd for backward compatibility
1057 field_size_compat_map = {
0a5a191a 1058 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1059 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1060 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1061 }
752cda38 1062
385a27fa 1063 TMPL_DICT = {}
37893bb0 1064 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1065 MATH_FUNCTIONS = {
1066 '+': float.__add__,
1067 '-': float.__sub__,
1068 }
e625be0d 1069 # Field is of the form key1.key2...
1070 # where keys (except first) can be string, int or slice
2b8a2973 1071 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1072 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1073 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1074 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1075 (?P<negate>-)?
385a27fa 1076 (?P<fields>{field})
1077 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1078 (?:>(?P<strf_format>.+?))?
34baa9fd 1079 (?P<remaining>
1080 (?P<alternate>(?<!\\),[^|&)]+)?
1081 (?:&(?P<replacement>.*?))?
1082 (?:\|(?P<default>.*?))?
1083 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1084
2b8a2973 1085 def _traverse_infodict(k):
1086 k = k.split('.')
1087 if k[0] == '':
1088 k.pop(0)
1089 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1090
752cda38 1091 def get_value(mdict):
1092 # Object traversal
2b8a2973 1093 value = _traverse_infodict(mdict['fields'])
752cda38 1094 # Negative
1095 if mdict['negate']:
1096 value = float_or_none(value)
1097 if value is not None:
1098 value *= -1
1099 # Do maths
385a27fa 1100 offset_key = mdict['maths']
1101 if offset_key:
752cda38 1102 value = float_or_none(value)
1103 operator = None
385a27fa 1104 while offset_key:
1105 item = re.match(
1106 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1107 offset_key).group(0)
1108 offset_key = offset_key[len(item):]
1109 if operator is None:
752cda38 1110 operator = MATH_FUNCTIONS[item]
385a27fa 1111 continue
1112 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1113 offset = float_or_none(item)
1114 if offset is None:
2b8a2973 1115 offset = float_or_none(_traverse_infodict(item))
385a27fa 1116 try:
1117 value = operator(value, multiplier * offset)
1118 except (TypeError, ZeroDivisionError):
1119 return None
1120 operator = None
752cda38 1121 # Datetime formatting
1122 if mdict['strf_format']:
7c37ff97 1123 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1124
1125 return value
1126
b868936c 1127 na = self.params.get('outtmpl_na_placeholder', 'NA')
1128
e0fd9573 1129 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1130 return sanitize_filename(str(value), restricted=restricted, is_id=(
1131 bool(re.search(r'(^|[_.])id(\.|$)', key))
1132 if 'filename-sanitization' in self.params.get('compat_opts', [])
1133 else NO_DEFAULT))
e0fd9573 1134
1135 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1136 sanitize = bool(sanitize)
1137
6e84b215 1138 def _dumpjson_default(obj):
1139 if isinstance(obj, (set, LazyList)):
1140 return list(obj)
adbc4ec4 1141 return repr(obj)
6e84b215 1142
752cda38 1143 def create_key(outer_mobj):
1144 if not outer_mobj.group('has_key'):
b836dc94 1145 return outer_mobj.group(0)
752cda38 1146 key = outer_mobj.group('key')
752cda38 1147 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1148 initial_field = mobj.group('fields') if mobj else ''
e978789f 1149 value, replacement, default = None, None, na
7c37ff97 1150 while mobj:
e625be0d 1151 mobj = mobj.groupdict()
7c37ff97 1152 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1153 value = get_value(mobj)
e978789f 1154 replacement = mobj['replacement']
7c37ff97 1155 if value is None and mobj['alternate']:
34baa9fd 1156 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1157 else:
1158 break
752cda38 1159
b868936c 1160 fmt = outer_mobj.group('format')
752cda38 1161 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1162 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1163
e978789f 1164 value = default if value is None else value if replacement is None else replacement
752cda38 1165
4476d2c7 1166 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1167 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1168 if fmt[-1] == 'l': # list
4476d2c7 1169 delim = '\n' if '#' in flags else ', '
9e907ebd 1170 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1171 elif fmt[-1] == 'j': # json
4476d2c7 1172 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1173 elif fmt[-1] == 'q': # quoted
4476d2c7 1174 value = map(str, variadic(value) if '#' in flags else [value])
1175 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1176 elif fmt[-1] == 'B': # bytes
86e5f3ed 1177 value = f'%{str_fmt}'.encode() % str(value).encode('utf-8')
f5aa5cfb 1178 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1179 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1180 value, fmt = unicodedata.normalize(
1181 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1182 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1183 value), str_fmt
e0fd9573 1184 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1185 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1186 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1187 factor=1024 if '#' in flags else 1000)
37893bb0 1188 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1189 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1190 elif fmt[-1] == 'c':
524e2e4f 1191 if value:
1192 value = str(value)[0]
76a264ac 1193 else:
524e2e4f 1194 fmt = str_fmt
76a264ac 1195 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1196 value = float_or_none(value)
752cda38 1197 if value is None:
1198 value, fmt = default, 's'
901130bb 1199
752cda38 1200 if sanitize:
1201 if fmt[-1] == 'r':
1202 # If value is an object, sanitize might convert it to a string
1203 # So we convert it to repr first
7d1eb38a 1204 value, fmt = repr(value), str_fmt
639f1cea 1205 if fmt[-1] in 'csr':
e0fd9573 1206 value = sanitizer(initial_field, value)
901130bb 1207
b868936c 1208 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1209 TMPL_DICT[key] = value
b868936c 1210 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1211
385a27fa 1212 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1213
819e0531 1214 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1215 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1216 return self.escape_outtmpl(outtmpl) % info_dict
1217
5127e92a 1218 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1219 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1220 if outtmpl is None:
1221 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
8222d8de 1222 try:
5127e92a 1223 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1224 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1225 if not filename:
1226 return None
15da37c7 1227
5127e92a 1228 if tmpl_type in ('', 'temp'):
6a0546e3 1229 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231 filename = replace_extension(filename, ext, final_ext)
5127e92a 1232 elif tmpl_type:
6a0546e3 1233 force_ext = OUTTMPL_TYPES[tmpl_type]
1234 if force_ext:
1235 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1236
bdc3fd2f
U
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name = self.params.get('trim_file_name', False)
1239 if trim_file_name:
5c22c63d 1240 no_ext, *ext = filename.rsplit('.', 2)
1241 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1242
0202b52a 1243 return filename
8222d8de 1244 except ValueError as err:
6febd1c1 1245 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1246 return None
1247
5127e92a 1248 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1249 """Generate the output filename"""
1250 if outtmpl:
1251 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1252 dir_type = None
1253 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1254 if not filename and dir_type not in ('', 'temp'):
1255 return ''
de6000d9 1256
c84aeac6 1257 if warn:
21cd8fae 1258 if not self.params.get('paths'):
de6000d9 1259 pass
1260 elif filename == '-':
c84aeac6 1261 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1262 elif os.path.isabs(filename):
c84aeac6 1263 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1264 if filename == '-' or not filename:
1265 return filename
1266
21cd8fae 1267 return self.get_output_path(dir_type, filename)
0202b52a 1268
120fe513 1269 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1270 """ Returns None if the file should be downloaded """
8222d8de 1271
c77495e3 1272 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1273
8b0d7497 1274 def check_filter():
8b0d7497 1275 if 'title' in info_dict:
1276 # This can happen when we're just evaluating the playlist
1277 title = info_dict['title']
1278 matchtitle = self.params.get('matchtitle', False)
1279 if matchtitle:
1280 if not re.search(matchtitle, title, re.IGNORECASE):
1281 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1282 rejecttitle = self.params.get('rejecttitle', False)
1283 if rejecttitle:
1284 if re.search(rejecttitle, title, re.IGNORECASE):
1285 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1286 date = info_dict.get('upload_date')
1287 if date is not None:
1288 dateRange = self.params.get('daterange', DateRange())
1289 if date not in dateRange:
86e5f3ed 1290 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1291 view_count = info_dict.get('view_count')
1292 if view_count is not None:
1293 min_views = self.params.get('min_views')
1294 if min_views is not None and view_count < min_views:
1295 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1296 max_views = self.params.get('max_views')
1297 if max_views is not None and view_count > max_views:
1298 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1299 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1300 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1301
8f18aca8 1302 match_filter = self.params.get('match_filter')
1303 if match_filter is not None:
1304 try:
1305 ret = match_filter(info_dict, incomplete=incomplete)
1306 except TypeError:
1307 # For backward compatibility
1308 ret = None if incomplete else match_filter(info_dict)
492272fe 1309 if ret is NO_DEFAULT:
1310 while True:
1311 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1312 reply = input(self._format_screen(
1313 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1314 if reply in {'y', ''}:
1315 return None
1316 elif reply == 'n':
1317 return f'Skipping {video_title}'
1318 return True
1319 elif ret is not None:
8f18aca8 1320 return ret
8b0d7497 1321 return None
1322
c77495e3 1323 if self.in_download_archive(info_dict):
1324 reason = '%s has already been recorded in the archive' % video_title
1325 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1326 else:
1327 reason = check_filter()
1328 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1329 if reason is not None:
120fe513 1330 if not silent:
1331 self.to_screen('[download] ' + reason)
c77495e3 1332 if self.params.get(break_opt, False):
1333 raise break_err()
8b0d7497 1334 return reason
fe7e0c98 1335
b6c45014
JMF
1336 @staticmethod
1337 def add_extra_info(info_dict, extra_info):
1338 '''Set the keys from extra_info in info dict if they are missing'''
1339 for key, value in extra_info.items():
1340 info_dict.setdefault(key, value)
1341
409e1828 1342 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1343 process=True, force_generic_extractor=False):
41d1cca3 1344 """
1345 Return a list with a dictionary for each video extracted.
1346
1347 Arguments:
1348 url -- URL to extract
1349
1350 Keyword arguments:
1351 download -- whether to download videos during extraction
1352 ie_key -- extractor key hint
1353 extra_info -- dictionary containing the extra values to add to each result
1354 process -- whether to resolve all unresolved references (URLs, playlist items),
1355 must be True for download to work.
1356 force_generic_extractor -- force using the generic extractor
1357 """
fe7e0c98 1358
409e1828 1359 if extra_info is None:
1360 extra_info = {}
1361
61aa5ba3 1362 if not ie_key and force_generic_extractor:
d22dec74
S
1363 ie_key = 'Generic'
1364
8222d8de 1365 if ie_key:
8b7491c8 1366 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1367 else:
1368 ies = self._ies
1369
8b7491c8 1370 for ie_key, ie in ies.items():
8222d8de
JMF
1371 if not ie.suitable(url):
1372 continue
1373
1374 if not ie.working():
6febd1c1
PH
1375 self.report_warning('The program functionality for this site has been marked as broken, '
1376 'and will probably not work.')
8222d8de 1377
1151c407 1378 temp_id = ie.get_temp_id(url)
a0566bbf 1379 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1380 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1381 if self.params.get('break_on_existing', False):
1382 raise ExistingVideoReached()
a0566bbf 1383 break
8b7491c8 1384 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1385 else:
1386 self.report_error('no suitable InfoExtractor for URL %s' % url)
1387
8e5fecc8 1388 def __handle_extraction_exceptions(func):
b5ae35ee 1389 @functools.wraps(func)
a0566bbf 1390 def wrapper(self, *args, **kwargs):
6da22e7d 1391 while True:
1392 try:
1393 return func(self, *args, **kwargs)
1394 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1395 raise
6da22e7d 1396 except ReExtractInfo as e:
1397 if e.expected:
1398 self.to_screen(f'{e}; Re-extracting data')
1399 else:
1400 self.to_stderr('\r')
1401 self.report_warning(f'{e}; Re-extracting data')
1402 continue
1403 except GeoRestrictedError as e:
1404 msg = e.msg
1405 if e.countries:
1406 msg += '\nThis video is available in %s.' % ', '.join(
1407 map(ISO3166Utils.short2full, e.countries))
1408 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1409 self.report_error(msg)
1410 except ExtractorError as e: # An error we somewhat expected
1411 self.report_error(str(e), e.format_traceback())
1412 except Exception as e:
1413 if self.params.get('ignoreerrors'):
1414 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1415 else:
1416 raise
1417 break
a0566bbf 1418 return wrapper
1419
f2ebc5c7 1420 def _wait_for_video(self, ie_result):
1421 if (not self.params.get('wait_for_video')
1422 or ie_result.get('_type', 'video') != 'video'
1423 or ie_result.get('formats') or ie_result.get('url')):
1424 return
1425
1426 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1427 last_msg = ''
1428
1429 def progress(msg):
1430 nonlocal last_msg
1431 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1432 last_msg = msg
1433
1434 min_wait, max_wait = self.params.get('wait_for_video')
1435 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1436 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1437 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1438 self.report_warning('Release time of video is not known')
1439 elif (diff or 0) <= 0:
1440 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1441 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1442 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1443
1444 wait_till = time.time() + diff
1445 try:
1446 while True:
1447 diff = wait_till - time.time()
1448 if diff <= 0:
1449 progress('')
1450 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1451 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1452 time.sleep(1)
1453 except KeyboardInterrupt:
1454 progress('')
1455 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1456 except BaseException as e:
1457 if not isinstance(e, ReExtractInfo):
1458 self.to_screen('')
1459 raise
1460
a0566bbf 1461 @__handle_extraction_exceptions
58f197b7 1462 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1463 ie_result = ie.extract(url)
1464 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1465 return
1466 if isinstance(ie_result, list):
1467 # Backwards compatibility: old IE result format
1468 ie_result = {
1469 '_type': 'compat_list',
1470 'entries': ie_result,
1471 }
e37d0efb 1472 if extra_info.get('original_url'):
1473 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1474 self.add_default_extra_info(ie_result, ie, url)
1475 if process:
f2ebc5c7 1476 self._wait_for_video(ie_result)
a0566bbf 1477 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1478 else:
a0566bbf 1479 return ie_result
fe7e0c98 1480
ea38e55f 1481 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1482 if url is not None:
1483 self.add_extra_info(ie_result, {
1484 'webpage_url': url,
1485 'original_url': url,
57ebfca3 1486 })
1487 webpage_url = ie_result.get('webpage_url')
1488 if webpage_url:
1489 self.add_extra_info(ie_result, {
1490 'webpage_url_basename': url_basename(webpage_url),
1491 'webpage_url_domain': get_domain(webpage_url),
6033d980 1492 })
1493 if ie is not None:
1494 self.add_extra_info(ie_result, {
1495 'extractor': ie.IE_NAME,
1496 'extractor_key': ie.ie_key(),
1497 })
ea38e55f 1498
58adec46 1499 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1500 """
1501 Take the result of the ie(may be modified) and resolve all unresolved
1502 references (URLs, playlist items).
1503
1504 It will also download the videos if 'download'.
1505 Returns the resolved ie_result.
1506 """
58adec46 1507 if extra_info is None:
1508 extra_info = {}
e8ee972c
PH
1509 result_type = ie_result.get('_type', 'video')
1510
057a5206 1511 if result_type in ('url', 'url_transparent'):
134c6ea8 1512 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1513 if ie_result.get('original_url'):
1514 extra_info.setdefault('original_url', ie_result['original_url'])
1515
057a5206 1516 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1517 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1518 or extract_flat is True):
ecb54191 1519 info_copy = ie_result.copy()
6033d980 1520 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1521 if ie and not ie_result.get('id'):
4614bc22 1522 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1523 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1524 self.add_extra_info(info_copy, extra_info)
b5475f11 1525 info_copy, _ = self.pre_process(info_copy)
ecb54191 1526 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1527 if self.params.get('force_write_download_archive', False):
1528 self.record_download_archive(info_copy)
e8ee972c
PH
1529 return ie_result
1530
8222d8de 1531 if result_type == 'video':
b6c45014 1532 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1533 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1534 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1535 if additional_urls:
e9f4ccd1 1536 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1537 if isinstance(additional_urls, compat_str):
1538 additional_urls = [additional_urls]
1539 self.to_screen(
1540 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1541 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1542 ie_result['additional_entries'] = [
1543 self.extract_info(
b69fd25c 1544 url, download, extra_info=extra_info,
9c2b75b5 1545 force_generic_extractor=self.params.get('force_generic_extractor'))
1546 for url in additional_urls
1547 ]
1548 return ie_result
8222d8de
JMF
1549 elif result_type == 'url':
1550 # We have to add extra_info to the results because it may be
1551 # contained in a playlist
07cce701 1552 return self.extract_info(
1553 ie_result['url'], download,
1554 ie_key=ie_result.get('ie_key'),
1555 extra_info=extra_info)
7fc3fa05
PH
1556 elif result_type == 'url_transparent':
1557 # Use the information from the embedding page
1558 info = self.extract_info(
1559 ie_result['url'], ie_key=ie_result.get('ie_key'),
1560 extra_info=extra_info, download=False, process=False)
1561
1640eb09
S
1562 # extract_info may return None when ignoreerrors is enabled and
1563 # extraction failed with an error, don't crash and return early
1564 # in this case
1565 if not info:
1566 return info
1567
412c617d 1568 new_result = info.copy()
90137ca4 1569 new_result.update(filter_dict(ie_result, lambda k, v: (
1570 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
7fc3fa05 1571
0563f7ac
S
1572 # Extracted info may not be a video result (i.e.
1573 # info.get('_type', 'video') != video) but rather an url or
1574 # url_transparent. In such cases outer metadata (from ie_result)
1575 # should be propagated to inner one (info). For this to happen
1576 # _type of info should be overridden with url_transparent. This
067aa17e 1577 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1578 if new_result.get('_type') == 'url':
1579 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1580
1581 return self.process_ie_result(
1582 new_result, download=download, extra_info=extra_info)
40fcba5e 1583 elif result_type in ('playlist', 'multi_video'):
30a074c2 1584 # Protect from infinite recursion due to recursively nested playlists
1585 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1586 webpage_url = ie_result['webpage_url']
1587 if webpage_url in self._playlist_urls:
7e85e872 1588 self.to_screen(
30a074c2 1589 '[download] Skipping already downloaded playlist: %s'
1590 % ie_result.get('title') or ie_result.get('id'))
1591 return
7e85e872 1592
30a074c2 1593 self._playlist_level += 1
1594 self._playlist_urls.add(webpage_url)
03f83004 1595 self._fill_common_fields(ie_result, False)
bc516a3f 1596 self._sanitize_thumbnails(ie_result)
30a074c2 1597 try:
1598 return self.__process_playlist(ie_result, download)
1599 finally:
1600 self._playlist_level -= 1
1601 if not self._playlist_level:
1602 self._playlist_urls.clear()
8222d8de 1603 elif result_type == 'compat_list':
c9bf4114
PH
1604 self.report_warning(
1605 'Extractor %s returned a compat_list result. '
1606 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1607
8222d8de 1608 def _fixup(r):
b868936c 1609 self.add_extra_info(r, {
1610 'extractor': ie_result['extractor'],
1611 'webpage_url': ie_result['webpage_url'],
1612 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1613 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1614 'extractor_key': ie_result['extractor_key'],
1615 })
8222d8de
JMF
1616 return r
1617 ie_result['entries'] = [
b6c45014 1618 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1619 for r in ie_result['entries']
1620 ]
1621 return ie_result
1622 else:
1623 raise Exception('Invalid result type: %s' % result_type)
1624
e92caff5 1625 def _ensure_dir_exists(self, path):
1626 return make_dir(path, self.report_error)
1627
3b603dbd 1628 @staticmethod
1629 def _playlist_infodict(ie_result, **kwargs):
1630 return {
1631 **ie_result,
1632 'playlist': ie_result.get('title') or ie_result.get('id'),
1633 'playlist_id': ie_result.get('id'),
1634 'playlist_title': ie_result.get('title'),
1635 'playlist_uploader': ie_result.get('uploader'),
1636 'playlist_uploader_id': ie_result.get('uploader_id'),
1637 'playlist_index': 0,
1638 **kwargs,
1639 }
1640
30a074c2 1641 def __process_playlist(self, ie_result, download):
1642 # We process each entry in the playlist
1643 playlist = ie_result.get('title') or ie_result.get('id')
1644 self.to_screen('[download] Downloading playlist: %s' % playlist)
1645
498f5606 1646 if 'entries' not in ie_result:
aa9369a2 1647 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1648
1649 MissingEntry = object()
498f5606 1650 incomplete_entries = bool(ie_result.get('requested_entries'))
1651 if incomplete_entries:
bf5f605e 1652 def fill_missing_entries(entries, indices):
7c7f7161 1653 ret = [MissingEntry] * max(indices)
bf5f605e 1654 for i, entry in zip(indices, entries):
498f5606 1655 ret[i - 1] = entry
1656 return ret
1657 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1658
30a074c2 1659 playlist_results = []
1660
56a8fb4f 1661 playliststart = self.params.get('playliststart', 1)
30a074c2 1662 playlistend = self.params.get('playlistend')
1663 # For backwards compatibility, interpret -1 as whole list
1664 if playlistend == -1:
1665 playlistend = None
1666
1667 playlistitems_str = self.params.get('playlist_items')
1668 playlistitems = None
1669 if playlistitems_str is not None:
1670 def iter_playlistitems(format):
1671 for string_segment in format.split(','):
1672 if '-' in string_segment:
1673 start, end = string_segment.split('-')
1674 for item in range(int(start), int(end) + 1):
1675 yield int(item)
1676 else:
1677 yield int(string_segment)
1678 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1679
1680 ie_entries = ie_result['entries']
8e5fecc8 1681 if isinstance(ie_entries, list):
ed8d87f9 1682 playlist_count = len(ie_entries)
f0d785d3 1683 msg = f'Collected {playlist_count} videos; downloading %d of them'
1684 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1685
8e5fecc8 1686 def get_entry(i):
1687 return ie_entries[i - 1]
1688 else:
f0d785d3 1689 msg = 'Downloading %d videos'
c586f9e8 1690 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1691 ie_entries = LazyList(ie_entries)
d37707bd 1692 elif isinstance(ie_entries, InAdvancePagedList):
1693 if ie_entries._pagesize == 1:
1694 playlist_count = ie_entries._pagecount
8e5fecc8 1695
1696 def get_entry(i):
1697 return YoutubeDL.__handle_extraction_exceptions(
1698 lambda self, i: ie_entries[i - 1]
1699 )(self, i)
50fed816 1700
f0d785d3 1701 entries, broken = [], False
ff1c7fc9 1702 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1703 for i in items:
1704 if i == 0:
1705 continue
56a8fb4f 1706 if playlistitems is None and playlistend is not None and playlistend < i:
1707 break
1708 entry = None
1709 try:
50fed816 1710 entry = get_entry(i)
7c7f7161 1711 if entry is MissingEntry:
498f5606 1712 raise EntryNotInPlaylist()
56a8fb4f 1713 except (IndexError, EntryNotInPlaylist):
1714 if incomplete_entries:
aa9369a2 1715 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1716 elif not playlistitems:
1717 break
1718 entries.append(entry)
120fe513 1719 try:
1720 if entry is not None:
e5a998f3 1721 # TODO: Add auto-generated fields
120fe513 1722 self._match_entry(entry, incomplete=True, silent=True)
1723 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1724 broken = True
120fe513 1725 break
56a8fb4f 1726 ie_result['entries'] = entries
30a074c2 1727
56a8fb4f 1728 # Save playlist_index before re-ordering
1729 entries = [
9e598870 1730 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1731 for i, entry in enumerate(entries, 1)
1732 if entry is not None]
1733 n_entries = len(entries)
498f5606 1734
f0d785d3 1735 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1736 ie_result['playlist_count'] = n_entries
1737
e08a85d8 1738 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1739 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1740 ie_result['requested_entries'] = playlistitems
1741
e08a85d8 1742 _infojson_written = False
0bfc53d0 1743 write_playlist_files = self.params.get('allow_playlist_files', True)
1744 if write_playlist_files and self.params.get('list_thumbnails'):
1745 self.list_thumbnails(ie_result)
1746 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1747 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1748 _infojson_written = self._write_info_json(
1749 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1750 if _infojson_written is None:
80c03fa9 1751 return
1752 if self._write_description('playlist', ie_result,
1753 self.prepare_filename(ie_copy, 'pl_description')) is None:
1754 return
681de68e 1755 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1756 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1757
1758 if self.params.get('playlistreverse', False):
1759 entries = entries[::-1]
30a074c2 1760 if self.params.get('playlistrandom', False):
1761 random.shuffle(entries)
1762
1763 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1764
86e5f3ed 1765 self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
26e2805c 1766 failures = 0
1767 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1768 for i, entry_tuple in enumerate(entries, 1):
1769 playlist_index, entry = entry_tuple
81139999 1770 if 'playlist-index' in self.params.get('compat_opts', []):
1771 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
19a03940 1772 self.to_screen('[download] Downloading video %s of %s' % (
1773 self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
30a074c2 1774 # This __x_forwarded_for_ip thing is a bit ugly but requires
1775 # minimal changes
1776 if x_forwarded_for:
1777 entry['__x_forwarded_for_ip'] = x_forwarded_for
1778 extra = {
1779 'n_entries': n_entries,
0a5a191a 1780 '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1781 'playlist_count': ie_result.get('playlist_count'),
71729754 1782 'playlist_index': playlist_index,
1783 'playlist_autonumber': i,
30a074c2 1784 'playlist': playlist,
1785 'playlist_id': ie_result.get('id'),
1786 'playlist_title': ie_result.get('title'),
1787 'playlist_uploader': ie_result.get('uploader'),
1788 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1789 'extractor': ie_result['extractor'],
1790 'webpage_url': ie_result['webpage_url'],
1791 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1792 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1793 'extractor_key': ie_result['extractor_key'],
1794 }
1795
1796 if self._match_entry(entry, incomplete=True) is not None:
1797 continue
1798
1799 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1800 if not entry_result:
1801 failures += 1
1802 if failures >= max_failures:
1803 self.report_error(
1804 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1805 break
30a074c2 1806 playlist_results.append(entry_result)
1807 ie_result['entries'] = playlist_results
e08a85d8 1808
1809 # Write the updated info to json
cb96c5be 1810 if _infojson_written is True and self._write_info_json(
e08a85d8 1811 'updated playlist', ie_result,
1812 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1813 return
ca30f449 1814
ed5835b4 1815 ie_result = self.run_all_pps('playlist', ie_result)
1816 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1817 return ie_result
1818
a0566bbf 1819 @__handle_extraction_exceptions
1820 def __process_iterable_entry(self, entry, download, extra_info):
1821 return self.process_ie_result(
1822 entry, download=download, extra_info=extra_info)
1823
67134eab
JMF
1824 def _build_format_filter(self, filter_spec):
1825 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1826
1827 OPERATORS = {
1828 '<': operator.lt,
1829 '<=': operator.le,
1830 '>': operator.gt,
1831 '>=': operator.ge,
1832 '=': operator.eq,
1833 '!=': operator.ne,
1834 }
67134eab 1835 operator_rex = re.compile(r'''(?x)\s*
187986a8 1836 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1837 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1838 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1839 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1840 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1841 if m:
1842 try:
1843 comparison_value = int(m.group('value'))
1844 except ValueError:
1845 comparison_value = parse_filesize(m.group('value'))
1846 if comparison_value is None:
1847 comparison_value = parse_filesize(m.group('value') + 'B')
1848 if comparison_value is None:
1849 raise ValueError(
1850 'Invalid value %r in format specification %r' % (
67134eab 1851 m.group('value'), filter_spec))
9ddb6925
S
1852 op = OPERATORS[m.group('op')]
1853
083c9df9 1854 if not m:
9ddb6925
S
1855 STR_OPERATORS = {
1856 '=': operator.eq,
10d33b34
YCH
1857 '^=': lambda attr, value: attr.startswith(value),
1858 '$=': lambda attr, value: attr.endswith(value),
1859 '*=': lambda attr, value: value in attr,
1ce9a3cb 1860 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1861 }
187986a8 1862 str_operator_rex = re.compile(r'''(?x)\s*
1863 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1864 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1865 (?P<quote>["'])?
1866 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1867 (?(quote)(?P=quote))\s*
9ddb6925 1868 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1869 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1870 if m:
1ce9a3cb
LF
1871 if m.group('op') == '~=':
1872 comparison_value = re.compile(m.group('value'))
1873 else:
1874 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1875 str_op = STR_OPERATORS[m.group('op')]
1876 if m.group('negation'):
e118a879 1877 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1878 else:
1879 op = str_op
083c9df9 1880
9ddb6925 1881 if not m:
187986a8 1882 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1883
1884 def _filter(f):
1885 actual_value = f.get(m.group('key'))
1886 if actual_value is None:
1887 return m.group('none_inclusive')
1888 return op(actual_value, comparison_value)
67134eab
JMF
1889 return _filter
1890
9f1a1c36 1891 def _check_formats(self, formats):
1892 for f in formats:
1893 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1894 path = self.get_output_path('temp')
1895 if not self._ensure_dir_exists(f'{path}/'):
1896 continue
1897 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1898 temp_file.close()
1899 try:
1900 success, _ = self.dl(temp_file.name, f, test=True)
1901 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1902 success = False
1903 finally:
1904 if os.path.exists(temp_file.name):
1905 try:
1906 os.remove(temp_file.name)
1907 except OSError:
1908 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1909 if success:
1910 yield f
1911 else:
1912 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1913
0017d9ad 1914 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1915
af0f7428
S
1916 def can_merge():
1917 merger = FFmpegMergerPP(self)
1918 return merger.available and merger.can_merge()
1919
91ebc640 1920 prefer_best = (
b7b04c78 1921 not self.params.get('simulate')
91ebc640 1922 and download
1923 and (
1924 not can_merge()
19807826 1925 or info_dict.get('is_live', False)
de6000d9 1926 or self.outtmpl_dict['default'] == '-'))
53ed7066 1927 compat = (
1928 prefer_best
1929 or self.params.get('allow_multiple_audio_streams', False)
1930 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1931
1932 return (
53ed7066 1933 'best/bestvideo+bestaudio' if prefer_best
1934 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1935 else 'bestvideo+bestaudio/best')
0017d9ad 1936
67134eab
JMF
1937 def build_format_selector(self, format_spec):
1938 def syntax_error(note, start):
1939 message = (
1940 'Invalid format specification: '
86e5f3ed 1941 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
1942 return SyntaxError(message)
1943
1944 PICKFIRST = 'PICKFIRST'
1945 MERGE = 'MERGE'
1946 SINGLE = 'SINGLE'
0130afb7 1947 GROUP = 'GROUP'
67134eab
JMF
1948 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1949
91ebc640 1950 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1951 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1952
9f1a1c36 1953 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1954
67134eab
JMF
1955 def _parse_filter(tokens):
1956 filter_parts = []
1957 for type, string, start, _, _ in tokens:
1958 if type == tokenize.OP and string == ']':
1959 return ''.join(filter_parts)
1960 else:
1961 filter_parts.append(string)
1962
232541df 1963 def _remove_unused_ops(tokens):
17cc1534 1964 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1965 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1966 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1967 last_string, last_start, last_end, last_line = None, None, None, None
1968 for type, string, start, end, line in tokens:
1969 if type == tokenize.OP and string == '[':
1970 if last_string:
1971 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972 last_string = None
1973 yield type, string, start, end, line
1974 # everything inside brackets will be handled by _parse_filter
1975 for type, string, start, end, line in tokens:
1976 yield type, string, start, end, line
1977 if type == tokenize.OP and string == ']':
1978 break
1979 elif type == tokenize.OP and string in ALLOWED_OPS:
1980 if last_string:
1981 yield tokenize.NAME, last_string, last_start, last_end, last_line
1982 last_string = None
1983 yield type, string, start, end, line
1984 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1985 if not last_string:
1986 last_string = string
1987 last_start = start
1988 last_end = end
1989 else:
1990 last_string += string
1991 if last_string:
1992 yield tokenize.NAME, last_string, last_start, last_end, last_line
1993
cf2ac6df 1994 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1995 selectors = []
1996 current_selector = None
1997 for type, string, start, _, _ in tokens:
1998 # ENCODING is only defined in python 3.x
1999 if type == getattr(tokenize, 'ENCODING', None):
2000 continue
2001 elif type in [tokenize.NAME, tokenize.NUMBER]:
2002 current_selector = FormatSelector(SINGLE, string, [])
2003 elif type == tokenize.OP:
cf2ac6df
JMF
2004 if string == ')':
2005 if not inside_group:
2006 # ')' will be handled by the parentheses group
2007 tokens.restore_last_token()
67134eab 2008 break
cf2ac6df 2009 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2010 tokens.restore_last_token()
2011 break
cf2ac6df
JMF
2012 elif inside_choice and string == ',':
2013 tokens.restore_last_token()
2014 break
2015 elif string == ',':
0a31a350
JMF
2016 if not current_selector:
2017 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2018 selectors.append(current_selector)
2019 current_selector = None
2020 elif string == '/':
d96d604e
JMF
2021 if not current_selector:
2022 raise syntax_error('"/" must follow a format selector', start)
67134eab 2023 first_choice = current_selector
cf2ac6df 2024 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2025 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2026 elif string == '[':
2027 if not current_selector:
2028 current_selector = FormatSelector(SINGLE, 'best', [])
2029 format_filter = _parse_filter(tokens)
2030 current_selector.filters.append(format_filter)
0130afb7
JMF
2031 elif string == '(':
2032 if current_selector:
2033 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2034 group = _parse_format_selection(tokens, inside_group=True)
2035 current_selector = FormatSelector(GROUP, group, [])
67134eab 2036 elif string == '+':
d03cfdce 2037 if not current_selector:
2038 raise syntax_error('Unexpected "+"', start)
2039 selector_1 = current_selector
2040 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2041 if not selector_2:
2042 raise syntax_error('Expected a selector', start)
2043 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2044 else:
86e5f3ed 2045 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2046 elif type == tokenize.ENDMARKER:
2047 break
2048 if current_selector:
2049 selectors.append(current_selector)
2050 return selectors
2051
f8d4ad9a 2052 def _merge(formats_pair):
2053 format_1, format_2 = formats_pair
2054
2055 formats_info = []
2056 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2057 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2058
2059 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2060 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2061 for (i, fmt_info) in enumerate(formats_info):
551f9388 2062 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2063 formats_info.pop(i)
2064 continue
2065 for aud_vid in ['audio', 'video']:
f8d4ad9a 2066 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2067 if get_no_more[aud_vid]:
2068 formats_info.pop(i)
f5510afe 2069 break
f8d4ad9a 2070 get_no_more[aud_vid] = True
2071
2072 if len(formats_info) == 1:
2073 return formats_info[0]
2074
2075 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2076 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2077
2078 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2079 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2080
2081 output_ext = self.params.get('merge_output_format')
2082 if not output_ext:
2083 if the_only_video:
2084 output_ext = the_only_video['ext']
2085 elif the_only_audio and not video_fmts:
2086 output_ext = the_only_audio['ext']
2087 else:
2088 output_ext = 'mkv'
2089
975a0d0d 2090 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2091
f8d4ad9a 2092 new_dict = {
2093 'requested_formats': formats_info,
975a0d0d 2094 'format': '+'.join(filtered('format')),
2095 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2096 'ext': output_ext,
975a0d0d 2097 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2098 'language': '+'.join(orderedSet(filtered('language'))) or None,
2099 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2100 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2101 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2102 }
2103
2104 if the_only_video:
2105 new_dict.update({
2106 'width': the_only_video.get('width'),
2107 'height': the_only_video.get('height'),
2108 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2109 'fps': the_only_video.get('fps'),
49a57e70 2110 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2111 'vcodec': the_only_video.get('vcodec'),
2112 'vbr': the_only_video.get('vbr'),
2113 'stretched_ratio': the_only_video.get('stretched_ratio'),
2114 })
2115
2116 if the_only_audio:
2117 new_dict.update({
2118 'acodec': the_only_audio.get('acodec'),
2119 'abr': the_only_audio.get('abr'),
975a0d0d 2120 'asr': the_only_audio.get('asr'),
f8d4ad9a 2121 })
2122
2123 return new_dict
2124
e8e73840 2125 def _check_formats(formats):
981052c9 2126 if not check_formats:
2127 yield from formats
b5ac45b1 2128 return
9f1a1c36 2129 yield from self._check_formats(formats)
e8e73840 2130
67134eab 2131 def _build_selector_function(selector):
909d24dd 2132 if isinstance(selector, list): # ,
67134eab
JMF
2133 fs = [_build_selector_function(s) for s in selector]
2134
317f7ab6 2135 def selector_function(ctx):
67134eab 2136 for f in fs:
981052c9 2137 yield from f(ctx)
67134eab 2138 return selector_function
909d24dd 2139
2140 elif selector.type == GROUP: # ()
0130afb7 2141 selector_function = _build_selector_function(selector.selector)
909d24dd 2142
2143 elif selector.type == PICKFIRST: # /
67134eab
JMF
2144 fs = [_build_selector_function(s) for s in selector.selector]
2145
317f7ab6 2146 def selector_function(ctx):
67134eab 2147 for f in fs:
317f7ab6 2148 picked_formats = list(f(ctx))
67134eab
JMF
2149 if picked_formats:
2150 return picked_formats
2151 return []
67134eab 2152
981052c9 2153 elif selector.type == MERGE: # +
2154 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2155
2156 def selector_function(ctx):
adbc4ec4 2157 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2158 yield _merge(pair)
2159
909d24dd 2160 elif selector.type == SINGLE: # atom
598d185d 2161 format_spec = selector.selector or 'best'
909d24dd 2162
f8d4ad9a 2163 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2164 if format_spec == 'all':
2165 def selector_function(ctx):
9222c381 2166 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2167 elif format_spec == 'mergeall':
2168 def selector_function(ctx):
316f2650 2169 formats = list(_check_formats(
2170 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2171 if not formats:
2172 return
921b76ca 2173 merged_format = formats[-1]
2174 for f in formats[-2::-1]:
f8d4ad9a 2175 merged_format = _merge((merged_format, f))
2176 yield merged_format
909d24dd 2177
2178 else:
85e801a9 2179 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2180 mobj = re.match(
2181 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2182 format_spec)
2183 if mobj is not None:
2184 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2185 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2186 format_type = (mobj.group('type') or [None])[0]
2187 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2188 format_modified = mobj.group('mod') is not None
909d24dd 2189
2190 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2191 _filter_f = (
eff63539 2192 (lambda f: f.get('%scodec' % format_type) != 'none')
2193 if format_type and format_modified # bv*, ba*, wv*, wa*
2194 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2195 if format_type # bv, ba, wv, wa
2196 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2197 if not format_modified # b, w
8326b00a 2198 else lambda f: True) # b*, w*
2199 filter_f = lambda f: _filter_f(f) and (
2200 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2201 else:
48ee10ee 2202 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2203 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2204 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2205 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2206 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2207 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2208 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2209 else:
b5ae35ee 2210 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2211
2212 def selector_function(ctx):
2213 formats = list(ctx['formats'])
909d24dd 2214 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2215 if not matches:
2216 if format_fallback and ctx['incomplete_formats']:
2217 # for extractors with incomplete formats (audio only (soundcloud)
2218 # or video only (imgur)) best/worst will fallback to
2219 # best/worst {video,audio}-only format
2220 matches = formats
2221 elif seperate_fallback and not ctx['has_merged_format']:
2222 # for compatibility with youtube-dl when there is no pre-merged format
2223 matches = list(filter(seperate_fallback, formats))
981052c9 2224 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2225 try:
e8e73840 2226 yield matches[format_idx - 1]
4abea8ca 2227 except LazyList.IndexError:
981052c9 2228 return
083c9df9 2229
67134eab 2230 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2231
317f7ab6 2232 def final_selector(ctx):
adbc4ec4 2233 ctx_copy = dict(ctx)
67134eab 2234 for _filter in filters:
317f7ab6
S
2235 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2236 return selector_function(ctx_copy)
67134eab 2237 return final_selector
083c9df9 2238
67134eab 2239 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2240 try:
f9934b96 2241 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2242 except tokenize.TokenError:
2243 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2244
86e5f3ed 2245 class TokenIterator:
0130afb7
JMF
2246 def __init__(self, tokens):
2247 self.tokens = tokens
2248 self.counter = 0
2249
2250 def __iter__(self):
2251 return self
2252
2253 def __next__(self):
2254 if self.counter >= len(self.tokens):
2255 raise StopIteration()
2256 value = self.tokens[self.counter]
2257 self.counter += 1
2258 return value
2259
2260 next = __next__
2261
2262 def restore_last_token(self):
2263 self.counter -= 1
2264
2265 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2266 return _build_selector_function(parsed_selector)
a9c58ad9 2267
e5660ee6 2268 def _calc_headers(self, info_dict):
8b7539d2 2269 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2270
2271 cookies = self._calc_cookies(info_dict)
2272 if cookies:
2273 res['Cookie'] = cookies
2274
0016b84e
S
2275 if 'X-Forwarded-For' not in res:
2276 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2277 if x_forwarded_for_ip:
2278 res['X-Forwarded-For'] = x_forwarded_for_ip
2279
e5660ee6
JMF
2280 return res
2281
2282 def _calc_cookies(self, info_dict):
5c2266df 2283 pr = sanitized_Request(info_dict['url'])
e5660ee6 2284 self.cookiejar.add_cookie_header(pr)
662435f7 2285 return pr.get_header('Cookie')
e5660ee6 2286
9f1a1c36 2287 def _sort_thumbnails(self, thumbnails):
2288 thumbnails.sort(key=lambda t: (
2289 t.get('preference') if t.get('preference') is not None else -1,
2290 t.get('width') if t.get('width') is not None else -1,
2291 t.get('height') if t.get('height') is not None else -1,
2292 t.get('id') if t.get('id') is not None else '',
2293 t.get('url')))
2294
b0249bca 2295 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2296 thumbnails = info_dict.get('thumbnails')
2297 if thumbnails is None:
2298 thumbnail = info_dict.get('thumbnail')
2299 if thumbnail:
2300 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2301 if not thumbnails:
2302 return
2303
2304 def check_thumbnails(thumbnails):
2305 for t in thumbnails:
2306 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2307 try:
2308 self.urlopen(HEADRequest(t['url']))
2309 except network_exceptions as err:
2310 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2311 continue
2312 yield t
2313
2314 self._sort_thumbnails(thumbnails)
2315 for i, t in enumerate(thumbnails):
2316 if t.get('id') is None:
2317 t['id'] = '%d' % i
2318 if t.get('width') and t.get('height'):
2319 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2320 t['url'] = sanitize_url(t['url'])
2321
2322 if self.params.get('check_formats') is True:
282f5709 2323 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2324 else:
2325 info_dict['thumbnails'] = thumbnails
bc516a3f 2326
03f83004
LNO
2327 def _fill_common_fields(self, info_dict, is_video=True):
2328 # TODO: move sanitization here
2329 if is_video:
2330 # playlists are allowed to lack "title"
2331 info_dict['fulltitle'] = info_dict.get('title')
2332 if 'title' not in info_dict:
2333 raise ExtractorError('Missing "title" field in extractor result',
2334 video_id=info_dict['id'], ie=info_dict['extractor'])
2335 elif not info_dict.get('title'):
2336 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2337 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2338
2339 if info_dict.get('duration') is not None:
2340 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2341
2342 for ts_key, date_key in (
2343 ('timestamp', 'upload_date'),
2344 ('release_timestamp', 'release_date'),
2345 ('modified_timestamp', 'modified_date'),
2346 ):
2347 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2348 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2349 # see http://bugs.python.org/issue1646728)
19a03940 2350 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2351 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2352 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2353
2354 live_keys = ('is_live', 'was_live')
2355 live_status = info_dict.get('live_status')
2356 if live_status is None:
2357 for key in live_keys:
2358 if info_dict.get(key) is False:
2359 continue
2360 if info_dict.get(key):
2361 live_status = key
2362 break
2363 if all(info_dict.get(key) is False for key in live_keys):
2364 live_status = 'not_live'
2365 if live_status:
2366 info_dict['live_status'] = live_status
2367 for key in live_keys:
2368 if info_dict.get(key) is None:
2369 info_dict[key] = (live_status == key)
2370
2371 # Auto generate title fields corresponding to the *_number fields when missing
2372 # in order to always have clean titles. This is very common for TV series.
2373 for field in ('chapter', 'season', 'episode'):
2374 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2375 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2376
dd82ffea
JMF
2377 def process_video_result(self, info_dict, download=True):
2378 assert info_dict.get('_type', 'video') == 'video'
9c906919 2379 self._num_videos += 1
dd82ffea 2380
bec1fad2 2381 if 'id' not in info_dict:
fc08bdd6 2382 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2383 elif not info_dict.get('id'):
2384 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2385
c9969434
S
2386 def report_force_conversion(field, field_not, conversion):
2387 self.report_warning(
2388 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2389 % (field, field_not, conversion))
2390
2391 def sanitize_string_field(info, string_field):
2392 field = info.get(string_field)
2393 if field is None or isinstance(field, compat_str):
2394 return
2395 report_force_conversion(string_field, 'a string', 'string')
2396 info[string_field] = compat_str(field)
2397
2398 def sanitize_numeric_fields(info):
2399 for numeric_field in self._NUMERIC_FIELDS:
2400 field = info.get(numeric_field)
f9934b96 2401 if field is None or isinstance(field, (int, float)):
c9969434
S
2402 continue
2403 report_force_conversion(numeric_field, 'numeric', 'int')
2404 info[numeric_field] = int_or_none(field)
2405
2406 sanitize_string_field(info_dict, 'id')
2407 sanitize_numeric_fields(info_dict)
4c3f8c3f 2408 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2409 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2410
dd82ffea
JMF
2411 if 'playlist' not in info_dict:
2412 # It isn't part of a playlist
2413 info_dict['playlist'] = None
2414 info_dict['playlist_index'] = None
2415
bc516a3f 2416 self._sanitize_thumbnails(info_dict)
d5519808 2417
536a55da 2418 thumbnail = info_dict.get('thumbnail')
bc516a3f 2419 thumbnails = info_dict.get('thumbnails')
536a55da
S
2420 if thumbnail:
2421 info_dict['thumbnail'] = sanitize_url(thumbnail)
2422 elif thumbnails:
d5519808
PH
2423 info_dict['thumbnail'] = thumbnails[-1]['url']
2424
ae30b840 2425 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2426 info_dict['display_id'] = info_dict['id']
2427
03f83004 2428 self._fill_common_fields(info_dict)
33d2fc2f 2429
05108a49
S
2430 for cc_kind in ('subtitles', 'automatic_captions'):
2431 cc = info_dict.get(cc_kind)
2432 if cc:
2433 for _, subtitle in cc.items():
2434 for subtitle_format in subtitle:
2435 if subtitle_format.get('url'):
2436 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2437 if subtitle_format.get('ext') is None:
2438 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2439
2440 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2441 subtitles = info_dict.get('subtitles')
4bba3716 2442
360e1ca5 2443 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2444 info_dict['id'], subtitles, automatic_captions)
a504ced0 2445
dd82ffea
JMF
2446 if info_dict.get('formats') is None:
2447 # There's only one format available
2448 formats = [info_dict]
2449 else:
2450 formats = info_dict['formats']
2451
0a5a191a 2452 # or None ensures --clean-infojson removes it
2453 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2454 if not self.params.get('allow_unplayable_formats'):
2455 formats = [f for f in formats if not f.get('has_drm')]
0a5a191a 2456 if info_dict['_has_drm'] and all(
c0b6e5c7 2457 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2458 self.report_warning(
2459 'This video is DRM protected and only images are available for download. '
2460 'Use --list-formats to see them')
88acdbc2 2461
319b6059 2462 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2463 if not get_from_start:
2464 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2465 if info_dict.get('is_live') and formats:
adbc4ec4 2466 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2467 if get_from_start and not formats:
a44ca5a4 2468 self.raise_no_formats(info_dict, msg=(
2469 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2470 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2471
db95dc13 2472 if not formats:
1151c407 2473 self.raise_no_formats(info_dict)
db95dc13 2474
73af5cc8
S
2475 def is_wellformed(f):
2476 url = f.get('url')
a5ac0c47 2477 if not url:
73af5cc8
S
2478 self.report_warning(
2479 '"url" field is missing or empty - skipping format, '
2480 'there is an error in extractor')
a5ac0c47
S
2481 return False
2482 if isinstance(url, bytes):
2483 sanitize_string_field(f, 'url')
2484 return True
73af5cc8
S
2485
2486 # Filter out malformed formats for better extraction robustness
2487 formats = list(filter(is_wellformed, formats))
2488
181c7053
S
2489 formats_dict = {}
2490
dd82ffea 2491 # We check that all the formats have the format and format_id fields
db95dc13 2492 for i, format in enumerate(formats):
c9969434
S
2493 sanitize_string_field(format, 'format_id')
2494 sanitize_numeric_fields(format)
dcf77cf1 2495 format['url'] = sanitize_url(format['url'])
e74e3b63 2496 if not format.get('format_id'):
8016c922 2497 format['format_id'] = compat_str(i)
e2effb08
S
2498 else:
2499 # Sanitize format_id from characters used in format selector expression
ec85ded8 2500 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2501 format_id = format['format_id']
2502 if format_id not in formats_dict:
2503 formats_dict[format_id] = []
2504 formats_dict[format_id].append(format)
2505
2506 # Make sure all formats have unique format_id
03b4de72 2507 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2508 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2509 ambigious_id = len(ambiguous_formats) > 1
2510 for i, format in enumerate(ambiguous_formats):
2511 if ambigious_id:
181c7053 2512 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2513 if format.get('ext') is None:
2514 format['ext'] = determine_ext(format['url']).lower()
2515 # Ensure there is no conflict between id and ext in format selection
2516 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2517 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2518 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2519
2520 for i, format in enumerate(formats):
8c51aa65 2521 if format.get('format') is None:
6febd1c1 2522 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2523 id=format['format_id'],
2524 res=self.format_resolution(format),
b868936c 2525 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2526 )
6f0be937 2527 if format.get('protocol') is None:
b5559424 2528 format['protocol'] = determine_protocol(format)
239df021 2529 if format.get('resolution') is None:
2530 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2531 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2532 format['dynamic_range'] = 'SDR'
f2fe69c7 2533 if (info_dict.get('duration') and format.get('tbr')
2534 and not format.get('filesize') and not format.get('filesize_approx')):
2535 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2536
e5660ee6
JMF
2537 # Add HTTP headers, so that external programs can use them from the
2538 # json output
2539 full_format_info = info_dict.copy()
2540 full_format_info.update(format)
2541 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2542 # Remove private housekeeping stuff
2543 if '__x_forwarded_for_ip' in info_dict:
2544 del info_dict['__x_forwarded_for_ip']
dd82ffea 2545
9f1a1c36 2546 if self.params.get('check_formats') is True:
282f5709 2547 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2548
88acdbc2 2549 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2550 # only set the 'formats' fields if the original info_dict list them
2551 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2552 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2553 # which can't be exported to json
b3d9ef88 2554 info_dict['formats'] = formats
4ec82a72 2555
2556 info_dict, _ = self.pre_process(info_dict)
2557
6db9c4d5 2558 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2559 return info_dict
2560
2561 self.post_extract(info_dict)
2562 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2563
093a1710 2564 # The pre-processors may have modified the formats
2565 formats = info_dict.get('formats', [info_dict])
2566
fa9f30b8 2567 list_only = self.params.get('simulate') is None and (
2568 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2569 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2570 if self.params.get('list_thumbnails'):
2571 self.list_thumbnails(info_dict)
b7b04c78 2572 if self.params.get('listsubtitles'):
2573 if 'automatic_captions' in info_dict:
2574 self.list_subtitles(
2575 info_dict['id'], automatic_captions, 'automatic captions')
2576 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2577 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2578 self.list_formats(info_dict)
169dbde9 2579 if list_only:
b7b04c78 2580 # Without this printing, -F --print-json will not work
169dbde9 2581 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2582 return
2583
187986a8 2584 format_selector = self.format_selector
2585 if format_selector is None:
0017d9ad 2586 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2587 self.write_debug('Default format spec: %s' % req_format)
187986a8 2588 format_selector = self.build_format_selector(req_format)
317f7ab6 2589
fa9f30b8 2590 while True:
2591 if interactive_format_selection:
2592 req_format = input(
2593 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2594 try:
2595 format_selector = self.build_format_selector(req_format)
2596 except SyntaxError as err:
2597 self.report_error(err, tb=False, is_error=False)
2598 continue
2599
85e801a9 2600 formats_to_download = list(format_selector({
fa9f30b8 2601 'formats': formats,
85e801a9 2602 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2603 'incomplete_formats': (
2604 # All formats are video-only or
2605 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2606 # all formats are audio-only
2607 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2608 }))
fa9f30b8 2609 if interactive_format_selection and not formats_to_download:
2610 self.report_error('Requested format is not available', tb=False, is_error=False)
2611 continue
2612 break
317f7ab6 2613
dd82ffea 2614 if not formats_to_download:
b7da73eb 2615 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2616 raise ExtractorError(
2617 'Requested format is not available. Use --list-formats for a list of available formats',
2618 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2619 self.report_warning('Requested format is not available')
2620 # Process what we can, even without any available formats.
2621 formats_to_download = [{}]
a13e6848 2622
b62fa6d7 2623 best_format = formats_to_download[-1]
2624 if download:
2625 if best_format:
2626 self.to_screen(
2627 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2628 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2629 max_downloads_reached = False
f46e2f9d 2630 for i, fmt in enumerate(formats_to_download):
09b49e1f 2631 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2632 new_info.update(fmt)
a13e6848 2633 try:
2634 self.process_info(new_info)
2635 except MaxDownloadsReached:
2636 max_downloads_reached = True
f46e2f9d 2637 # Remove copied info
2638 for key, val in tuple(new_info.items()):
2639 if info_dict.get(key) == val:
2640 new_info.pop(key)
a13e6848 2641 if max_downloads_reached:
2642 break
ebed8b37 2643
86e5f3ed 2644 write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
a13e6848 2645 assert write_archive.issubset({True, False, 'ignore'})
2646 if True in write_archive and False not in write_archive:
2647 self.record_download_archive(info_dict)
be72c624 2648
2649 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2650 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2651 if max_downloads_reached:
2652 raise MaxDownloadsReached()
ebed8b37 2653
49a57e70 2654 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2655 info_dict.update(best_format)
dd82ffea
JMF
2656 return info_dict
2657
98c70d6f 2658 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2659 """Select the requested subtitles and their format"""
d8a58ddc 2660 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2661 if normal_subtitles and self.params.get('writesubtitles'):
2662 available_subs.update(normal_subtitles)
d8a58ddc 2663 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2664 if automatic_captions and self.params.get('writeautomaticsub'):
2665 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2666 if lang not in available_subs:
2667 available_subs[lang] = cap_info
2668
4d171848
JMF
2669 if (not self.params.get('writesubtitles') and not
2670 self.params.get('writeautomaticsub') or not
2671 available_subs):
2672 return None
a504ced0 2673
d8a58ddc 2674 all_sub_langs = tuple(available_subs.keys())
a504ced0 2675 if self.params.get('allsubtitles', False):
c32b0aab 2676 requested_langs = all_sub_langs
2677 elif self.params.get('subtitleslangs', False):
77c4a9ef 2678 # A list is used so that the order of languages will be the same as
2679 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2680 requested_langs = []
2681 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2682 discard = lang_re[0] == '-'
c32b0aab 2683 if discard:
77c4a9ef 2684 lang_re = lang_re[1:]
3aa91540 2685 if lang_re == 'all':
2686 if discard:
2687 requested_langs = []
2688 else:
2689 requested_langs.extend(all_sub_langs)
2690 continue
77c4a9ef 2691 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2692 if discard:
2693 for lang in current_langs:
77c4a9ef 2694 while lang in requested_langs:
2695 requested_langs.remove(lang)
c32b0aab 2696 else:
77c4a9ef 2697 requested_langs.extend(current_langs)
2698 requested_langs = orderedSet(requested_langs)
d8a58ddc 2699 elif normal_sub_langs:
2700 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2701 else:
d8a58ddc 2702 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2703 if requested_langs:
2704 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2705
2706 formats_query = self.params.get('subtitlesformat', 'best')
2707 formats_preference = formats_query.split('/') if formats_query else []
2708 subs = {}
2709 for lang in requested_langs:
2710 formats = available_subs.get(lang)
2711 if formats is None:
86e5f3ed 2712 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2713 continue
a504ced0
JMF
2714 for ext in formats_preference:
2715 if ext == 'best':
2716 f = formats[-1]
2717 break
2718 matches = list(filter(lambda f: f['ext'] == ext, formats))
2719 if matches:
2720 f = matches[-1]
2721 break
2722 else:
2723 f = formats[-1]
2724 self.report_warning(
2725 'No subtitle format found matching "%s" for language %s, '
2726 'using %s' % (formats_query, lang, f['ext']))
2727 subs[lang] = f
2728 return subs
2729
bb66c247 2730 def _forceprint(self, key, info_dict):
2731 if info_dict is None:
2732 return
2733 info_copy = info_dict.copy()
2734 info_copy['formats_table'] = self.render_formats_table(info_dict)
2735 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2736 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2737 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2738
2739 def format_tmpl(tmpl):
2740 mobj = re.match(r'\w+(=?)$', tmpl)
2741 if mobj and mobj.group(1):
2742 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2743 elif mobj:
2744 return f'%({tmpl})s'
2745 return tmpl
8130779d 2746
bb66c247 2747 for tmpl in self.params['forceprint'].get(key, []):
2748 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2749
2750 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2751 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2752 tmpl = format_tmpl(tmpl)
2753 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2754 if self._ensure_dir_exists(filename):
86e5f3ed 2755 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2756 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2757
d06daf23 2758 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2759 def print_mandatory(field, actual_field=None):
2760 if actual_field is None:
2761 actual_field = field
d06daf23 2762 if (self.params.get('force%s' % field, False)
53c18592 2763 and (not incomplete or info_dict.get(actual_field) is not None)):
2764 self.to_stdout(info_dict[actual_field])
d06daf23
S
2765
2766 def print_optional(field):
2767 if (self.params.get('force%s' % field, False)
2768 and info_dict.get(field) is not None):
2769 self.to_stdout(info_dict[field])
2770
53c18592 2771 info_dict = info_dict.copy()
2772 if filename is not None:
2773 info_dict['filename'] = filename
2774 if info_dict.get('requested_formats') is not None:
2775 # For RTMP URLs, also include the playpath
2776 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2777 elif info_dict.get('url'):
53c18592 2778 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2779
bb66c247 2780 if (self.params.get('forcejson')
2781 or self.params['forceprint'].get('video')
2782 or self.params['print_to_file'].get('video')):
2b8a2973 2783 self.post_extract(info_dict)
bb66c247 2784 self._forceprint('video', info_dict)
53c18592 2785
d06daf23
S
2786 print_mandatory('title')
2787 print_mandatory('id')
53c18592 2788 print_mandatory('url', 'urls')
d06daf23
S
2789 print_optional('thumbnail')
2790 print_optional('description')
53c18592 2791 print_optional('filename')
b868936c 2792 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2793 self.to_stdout(formatSeconds(info_dict['duration']))
2794 print_mandatory('format')
53c18592 2795
2b8a2973 2796 if self.params.get('forcejson'):
6e84b215 2797 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2798
e8e73840 2799 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2800 if not info.get('url'):
1151c407 2801 self.raise_no_formats(info, True)
e8e73840 2802
2803 if test:
2804 verbose = self.params.get('verbose')
2805 params = {
2806 'test': True,
a169858f 2807 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2808 'verbose': verbose,
2809 'noprogress': not verbose,
2810 'nopart': True,
2811 'skip_unavailable_fragments': False,
2812 'keep_fragments': False,
2813 'overwrites': True,
2814 '_no_ytdl_file': True,
2815 }
2816 else:
2817 params = self.params
96fccc10 2818 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2819 if not test:
2820 for ph in self._progress_hooks:
2821 fd.add_progress_hook(ph)
42676437
M
2822 urls = '", "'.join(
2823 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2824 for f in info.get('requested_formats', []) or [info])
18e674b4 2825 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2826
adbc4ec4
THD
2827 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2828 # But it may contain objects that are not deep-copyable
2829 new_info = self._copy_infodict(info)
e8e73840 2830 if new_info.get('http_headers') is None:
2831 new_info['http_headers'] = self._calc_headers(new_info)
2832 return fd.download(name, new_info, subtitle)
2833
e04938ab 2834 def existing_file(self, filepaths, *, default_overwrite=True):
2835 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2836 if existing_files and not self.params.get('overwrites', default_overwrite):
2837 return existing_files[0]
2838
2839 for file in existing_files:
2840 self.report_file_delete(file)
2841 os.remove(file)
2842 return None
2843
8222d8de 2844 def process_info(self, info_dict):
09b49e1f 2845 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2846
2847 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2848 original_infodict = info_dict
fd288278 2849
4513a41a 2850 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2851 info_dict['format'] = info_dict['ext']
2852
09b49e1f 2853 # This is mostly just for backward compatibility of process_info
2854 # As a side-effect, this allows for format-specific filters
c77495e3 2855 if self._match_entry(info_dict) is not None:
9e907ebd 2856 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2857 return
2858
09b49e1f 2859 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2860 self.post_extract(info_dict)
0c14d66a 2861 self._num_downloads += 1
8222d8de 2862
dcf64d43 2863 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2864 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2865 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2866 files_to_move = {}
8222d8de
JMF
2867
2868 # Forced printings
4513a41a 2869 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2870
b7b04c78 2871 if self.params.get('simulate'):
9e907ebd 2872 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2873 return
2874
de6000d9 2875 if full_filename is None:
8222d8de 2876 return
e92caff5 2877 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2878 return
e92caff5 2879 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2880 return
2881
80c03fa9 2882 if self._write_description('video', info_dict,
2883 self.prepare_filename(info_dict, 'description')) is None:
2884 return
2885
2886 sub_files = self._write_subtitles(info_dict, temp_filename)
2887 if sub_files is None:
2888 return
2889 files_to_move.update(dict(sub_files))
2890
2891 thumb_files = self._write_thumbnails(
2892 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2893 if thumb_files is None:
2894 return
2895 files_to_move.update(dict(thumb_files))
8222d8de 2896
80c03fa9 2897 infofn = self.prepare_filename(info_dict, 'infojson')
2898 _infojson_written = self._write_info_json('video', info_dict, infofn)
2899 if _infojson_written:
dac5df5a 2900 info_dict['infojson_filename'] = infofn
e75bb0d6 2901 # For backward compatibility, even though it was a private field
80c03fa9 2902 info_dict['__infojson_filename'] = infofn
2903 elif _infojson_written is None:
2904 return
2905
2906 # Note: Annotations are deprecated
2907 annofn = None
1fb07d10 2908 if self.params.get('writeannotations', False):
de6000d9 2909 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2910 if annofn:
e92caff5 2911 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2912 return
0c3d0f51 2913 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2914 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2915 elif not info_dict.get('annotations'):
2916 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2917 else:
2918 try:
6febd1c1 2919 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 2920 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
2921 annofile.write(info_dict['annotations'])
2922 except (KeyError, TypeError):
6febd1c1 2923 self.report_warning('There are no annotations to write.')
86e5f3ed 2924 except OSError:
6febd1c1 2925 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2926 return
1fb07d10 2927
732044af 2928 # Write internet shortcut files
08438d2c 2929 def _write_link_file(link_type):
60f3e995 2930 url = try_get(info_dict['webpage_url'], iri_to_uri)
2931 if not url:
2932 self.report_warning(
2933 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2934 return True
08438d2c 2935 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2936 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2937 return False
10e3742e 2938 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2939 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2940 return True
2941 try:
2942 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 2943 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2944 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2945 template_vars = {'url': url}
08438d2c 2946 if link_type == 'desktop':
2947 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2948 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 2949 except OSError:
08438d2c 2950 self.report_error(f'Cannot write internet shortcut {linkfn}')
2951 return False
732044af 2952 return True
2953
08438d2c 2954 write_links = {
2955 'url': self.params.get('writeurllink'),
2956 'webloc': self.params.get('writewebloclink'),
2957 'desktop': self.params.get('writedesktoplink'),
2958 }
2959 if self.params.get('writelink'):
2960 link_type = ('webloc' if sys.platform == 'darwin'
2961 else 'desktop' if sys.platform.startswith('linux')
2962 else 'url')
2963 write_links[link_type] = True
2964
2965 if any(should_write and not _write_link_file(link_type)
2966 for link_type, should_write in write_links.items()):
2967 return
732044af 2968
f46e2f9d 2969 def replace_info_dict(new_info):
2970 nonlocal info_dict
2971 if new_info == info_dict:
2972 return
2973 info_dict.clear()
2974 info_dict.update(new_info)
2975
56d868db 2976 try:
f46e2f9d 2977 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2978 replace_info_dict(new_info)
56d868db 2979 except PostProcessingError as err:
2980 self.report_error('Preprocessing: %s' % str(err))
2981 return
2982
a13e6848 2983 if self.params.get('skip_download'):
56d868db 2984 info_dict['filepath'] = temp_filename
2985 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2986 info_dict['__files_to_move'] = files_to_move
f46e2f9d 2987 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 2988 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 2989 else:
2990 # Download
b868936c 2991 info_dict.setdefault('__postprocessors', [])
4340deca 2992 try:
0202b52a 2993
e04938ab 2994 def existing_video_file(*filepaths):
6b591b29 2995 ext = info_dict.get('ext')
e04938ab 2996 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2997 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2998 default_overwrite=False)
2999 if file:
3000 info_dict['ext'] = os.path.splitext(file)[1][1:]
3001 return file
0202b52a 3002
3003 success = True
4340deca 3004 if info_dict.get('requested_formats') is not None:
81cd954a
S
3005
3006 def compatible_formats(formats):
d03cfdce 3007 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3008 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3009 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3010 if len(video_formats) > 2 or len(audio_formats) > 2:
3011 return False
3012
81cd954a 3013 # Check extension
86e5f3ed 3014 exts = {format.get('ext') for format in formats}
d03cfdce 3015 COMPATIBLE_EXTS = (
86e5f3ed 3016 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3017 {'webm'},
d03cfdce 3018 )
3019 for ext_sets in COMPATIBLE_EXTS:
3020 if ext_sets.issuperset(exts):
3021 return True
81cd954a
S
3022 # TODO: Check acodec/vcodec
3023 return False
3024
3025 requested_formats = info_dict['requested_formats']
0202b52a 3026 old_ext = info_dict['ext']
4e3b637d 3027 if self.params.get('merge_output_format') is None:
3028 if not compatible_formats(requested_formats):
3029 info_dict['ext'] = 'mkv'
3030 self.report_warning(
3031 'Requested formats are incompatible for merge and will be merged into mkv')
3032 if (info_dict['ext'] == 'webm'
3033 and info_dict.get('thumbnails')
3034 # check with type instead of pp_key, __name__, or isinstance
3035 # since we dont want any custom PPs to trigger this
3036 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3037 info_dict['ext'] = 'mkv'
3038 self.report_warning(
3039 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3040 new_ext = info_dict['ext']
0202b52a 3041
124bc071 3042 def correct_ext(filename, ext=new_ext):
96fccc10 3043 if filename == '-':
3044 return filename
0202b52a 3045 filename_real_ext = os.path.splitext(filename)[1][1:]
3046 filename_wo_ext = (
3047 os.path.splitext(filename)[0]
124bc071 3048 if filename_real_ext in (old_ext, new_ext)
0202b52a 3049 else filename)
86e5f3ed 3050 return f'{filename_wo_ext}.{ext}'
0202b52a 3051
38c6902b 3052 # Ensure filename always has a correct extension for successful merge
0202b52a 3053 full_filename = correct_ext(full_filename)
3054 temp_filename = correct_ext(temp_filename)
e04938ab 3055 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3056 info_dict['__real_download'] = False
18e674b4 3057
adbc4ec4
THD
3058 downloaded = []
3059 merger = FFmpegMergerPP(self)
3060
3061 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3062 if dl_filename is not None:
6c7274ec 3063 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3064 elif fd:
3065 for f in requested_formats if fd != FFmpegFD else []:
3066 f['filepath'] = fname = prepend_extension(
3067 correct_ext(temp_filename, info_dict['ext']),
3068 'f%s' % f['format_id'], info_dict['ext'])
3069 downloaded.append(fname)
dbf5416a 3070 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3071 success, real_download = self.dl(temp_filename, info_dict)
3072 info_dict['__real_download'] = real_download
18e674b4 3073 else:
18e674b4 3074 if self.params.get('allow_unplayable_formats'):
3075 self.report_warning(
3076 'You have requested merging of multiple formats '
3077 'while also allowing unplayable formats to be downloaded. '
3078 'The formats won\'t be merged to prevent data corruption.')
3079 elif not merger.available:
e8969bda 3080 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3081 if not self.params.get('ignoreerrors'):
3082 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3083 return
3084 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3085
96fccc10 3086 if temp_filename == '-':
adbc4ec4 3087 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3088 else 'but the formats are incompatible for simultaneous download' if merger.available
3089 else 'but ffmpeg is not installed')
3090 self.report_warning(
3091 f'You have requested downloading multiple formats to stdout {reason}. '
3092 'The formats will be streamed one after the other')
3093 fname = temp_filename
dbf5416a 3094 for f in requested_formats:
3095 new_info = dict(info_dict)
3096 del new_info['requested_formats']
3097 new_info.update(f)
96fccc10 3098 if temp_filename != '-':
124bc071 3099 fname = prepend_extension(
3100 correct_ext(temp_filename, new_info['ext']),
3101 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3102 if not self._ensure_dir_exists(fname):
3103 return
a21e0ab1 3104 f['filepath'] = fname
96fccc10 3105 downloaded.append(fname)
dbf5416a 3106 partial_success, real_download = self.dl(fname, new_info)
3107 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3108 success = success and partial_success
adbc4ec4
THD
3109
3110 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3111 info_dict['__postprocessors'].append(merger)
3112 info_dict['__files_to_merge'] = downloaded
3113 # Even if there were no downloads, it is being merged only now
3114 info_dict['__real_download'] = True
3115 else:
3116 for file in downloaded:
3117 files_to_move[file] = None
4340deca
P
3118 else:
3119 # Just a single file
e04938ab 3120 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3121 if dl_filename is None or dl_filename == temp_filename:
3122 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3123 # So we should try to resume the download
e8e73840 3124 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3125 info_dict['__real_download'] = real_download
6c7274ec 3126 else:
3127 self.report_file_already_downloaded(dl_filename)
0202b52a 3128
0202b52a 3129 dl_filename = dl_filename or temp_filename
c571435f 3130 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3131
3158150c 3132 except network_exceptions as err:
7960b056 3133 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3134 return
86e5f3ed 3135 except OSError as err:
4340deca
P
3136 raise UnavailableVideoError(err)
3137 except (ContentTooShortError, ) as err:
86e5f3ed 3138 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3139 return
8222d8de 3140
de6000d9 3141 if success and full_filename != '-':
f17f8651 3142
fd7cfb64 3143 def fixup():
3144 do_fixup = True
3145 fixup_policy = self.params.get('fixup')
3146 vid = info_dict['id']
3147
3148 if fixup_policy in ('ignore', 'never'):
3149 return
3150 elif fixup_policy == 'warn':
3151 do_fixup = False
f89b3e2d 3152 elif fixup_policy != 'force':
3153 assert fixup_policy in ('detect_or_warn', None)
3154 if not info_dict.get('__real_download'):
3155 do_fixup = False
fd7cfb64 3156
3157 def ffmpeg_fixup(cndn, msg, cls):
3158 if not cndn:
3159 return
3160 if not do_fixup:
3161 self.report_warning(f'{vid}: {msg}')
3162 return
3163 pp = cls(self)
3164 if pp.available:
3165 info_dict['__postprocessors'].append(pp)
3166 else:
3167 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3168
3169 stretched_ratio = info_dict.get('stretched_ratio')
3170 ffmpeg_fixup(
3171 stretched_ratio not in (1, None),
3172 f'Non-uniform pixel ratio {stretched_ratio}',
3173 FFmpegFixupStretchedPP)
3174
3175 ffmpeg_fixup(
3176 (info_dict.get('requested_formats') is None
3177 and info_dict.get('container') == 'm4a_dash'
3178 and info_dict.get('ext') == 'm4a'),
3179 'writing DASH m4a. Only some players support this container',
3180 FFmpegFixupM4aPP)
3181
993191c0 3182 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3183 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3184
3185 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3186 ffmpeg_fixup(downloader == 'HlsFD',
3187 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3188 FFmpegFixupM3u8PP)
3189 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3190 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3191
e04b003e 3192 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3193 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3194
3195 fixup()
8222d8de 3196 try:
f46e2f9d 3197 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3198 except PostProcessingError as err:
3199 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3200 return
ab8e5e51
AM
3201 try:
3202 for ph in self._post_hooks:
23c1a667 3203 ph(info_dict['filepath'])
ab8e5e51
AM
3204 except Exception as err:
3205 self.report_error('post hooks: %s' % str(err))
3206 return
9e907ebd 3207 info_dict['__write_download_archive'] = True
2d30509f 3208
a13e6848 3209 if self.params.get('force_write_download_archive'):
9e907ebd 3210 info_dict['__write_download_archive'] = True
a13e6848 3211
3212 # Make sure the info_dict was modified in-place
f46e2f9d 3213 assert info_dict is original_infodict
a13e6848 3214
c3e6ffba 3215 max_downloads = self.params.get('max_downloads')
3216 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3217 raise MaxDownloadsReached()
8222d8de 3218
aa9369a2 3219 def __download_wrapper(self, func):
3220 @functools.wraps(func)
3221 def wrapper(*args, **kwargs):
3222 try:
3223 res = func(*args, **kwargs)
3224 except UnavailableVideoError as e:
3225 self.report_error(e)
b222c271 3226 except MaxDownloadsReached as e:
aa9369a2 3227 self.to_screen(f'[info] {e}')
3228 raise
b222c271 3229 except DownloadCancelled as e:
3230 self.to_screen(f'[info] {e}')
3231 if not self.params.get('break_per_url'):
3232 raise
aa9369a2 3233 else:
3234 if self.params.get('dump_single_json', False):
3235 self.post_extract(res)
3236 self.to_stdout(json.dumps(self.sanitize_info(res)))
3237 return wrapper
3238
8222d8de
JMF
3239 def download(self, url_list):
3240 """Download a given list of URLs."""
aa9369a2 3241 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3242 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3243 if (len(url_list) > 1
3244 and outtmpl != '-'
3245 and '%' not in outtmpl
3246 and self.params.get('max_downloads') != 1):
acd69589 3247 raise SameFileError(outtmpl)
8222d8de
JMF
3248
3249 for url in url_list:
aa9369a2 3250 self.__download_wrapper(self.extract_info)(
3251 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3252
3253 return self._download_retcode
3254
1dcc4c0c 3255 def download_with_info_file(self, info_filename):
31bd3925
JMF
3256 with contextlib.closing(fileinput.FileInput(
3257 [info_filename], mode='r',
3258 openhook=fileinput.hook_encoded('utf-8'))) as f:
3259 # FileInput doesn't have a read method, we can't call json.load
8012d892 3260 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3261 try:
aa9369a2 3262 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3263 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3264 if not isinstance(e, EntryNotInPlaylist):
3265 self.to_stderr('\r')
d4943898
JMF
3266 webpage_url = info.get('webpage_url')
3267 if webpage_url is not None:
aa9369a2 3268 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3269 return self.download([webpage_url])
3270 else:
3271 raise
3272 return self._download_retcode
1dcc4c0c 3273
cb202fd2 3274 @staticmethod
8012d892 3275 def sanitize_info(info_dict, remove_private_keys=False):
3276 ''' Sanitize the infodict for converting to json '''
3ad56b42 3277 if info_dict is None:
3278 return info_dict
6e84b215 3279 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3280 info_dict.setdefault('_type', 'video')
09b49e1f 3281
8012d892 3282 if remove_private_keys:
0a5a191a 3283 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3284 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3285 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3286 }
ae8f99e6 3287 else:
09b49e1f 3288 reject = lambda k, v: False
adbc4ec4
THD
3289
3290 def filter_fn(obj):
3291 if isinstance(obj, dict):
3292 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3293 elif isinstance(obj, (list, tuple, set, LazyList)):
3294 return list(map(filter_fn, obj))
3295 elif obj is None or isinstance(obj, (str, int, float, bool)):
3296 return obj
3297 else:
3298 return repr(obj)
3299
5226731e 3300 return filter_fn(info_dict)
cb202fd2 3301
8012d892 3302 @staticmethod
3303 def filter_requested_info(info_dict, actually_filter=True):
3304 ''' Alias of sanitize_info for backward compatibility '''
3305 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3306
ed5835b4 3307 @staticmethod
3308 def post_extract(info_dict):
3309 def actual_post_extract(info_dict):
3310 if info_dict.get('_type') in ('playlist', 'multi_video'):
3311 for video_dict in info_dict.get('entries', {}):
3312 actual_post_extract(video_dict or {})
3313 return
3314
09b49e1f 3315 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3316 info_dict.update(post_extractor())
ed5835b4 3317
3318 actual_post_extract(info_dict or {})
3319
dcf64d43 3320 def run_pp(self, pp, infodict):
5bfa4862 3321 files_to_delete = []
dcf64d43 3322 if '__files_to_move' not in infodict:
3323 infodict['__files_to_move'] = {}
b1940459 3324 try:
3325 files_to_delete, infodict = pp.run(infodict)
3326 except PostProcessingError as e:
3327 # Must be True and not 'only_download'
3328 if self.params.get('ignoreerrors') is True:
3329 self.report_error(e)
3330 return infodict
3331 raise
3332
5bfa4862 3333 if not files_to_delete:
dcf64d43 3334 return infodict
5bfa4862 3335 if self.params.get('keepvideo', False):
3336 for f in files_to_delete:
dcf64d43 3337 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3338 else:
3339 for old_filename in set(files_to_delete):
3340 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3341 try:
3342 os.remove(encodeFilename(old_filename))
86e5f3ed 3343 except OSError:
5bfa4862 3344 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3345 if old_filename in infodict['__files_to_move']:
3346 del infodict['__files_to_move'][old_filename]
3347 return infodict
5bfa4862 3348
ed5835b4 3349 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3350 self._forceprint(key, info)
ed5835b4 3351 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3352 info = self.run_pp(pp, info)
ed5835b4 3353 return info
277d6ff5 3354
56d868db 3355 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3356 info = dict(ie_info)
56d868db 3357 info['__files_to_move'] = files_to_move or {}
ed5835b4 3358 info = self.run_all_pps(key, info)
56d868db 3359 return info, info.pop('__files_to_move', None)
5bfa4862 3360
f46e2f9d 3361 def post_process(self, filename, info, files_to_move=None):
8222d8de 3362 """Run all the postprocessors on the given file."""
8222d8de 3363 info['filepath'] = filename
dcf64d43 3364 info['__files_to_move'] = files_to_move or {}
ed5835b4 3365 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3366 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3367 del info['__files_to_move']
ed5835b4 3368 return self.run_all_pps('after_move', info)
c1c9a79c 3369
5db07df6 3370 def _make_archive_id(self, info_dict):
e9fef7ee
S
3371 video_id = info_dict.get('id')
3372 if not video_id:
3373 return
5db07df6
PH
3374 # Future-proof against any change in case
3375 # and backwards compatibility with prior versions
e9fef7ee 3376 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3377 if extractor is None:
1211bb6d
S
3378 url = str_or_none(info_dict.get('url'))
3379 if not url:
3380 return
e9fef7ee 3381 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3382 for ie_key, ie in self._ies.items():
1211bb6d 3383 if ie.suitable(url):
8b7491c8 3384 extractor = ie_key
e9fef7ee
S
3385 break
3386 else:
3387 return
86e5f3ed 3388 return f'{extractor.lower()} {video_id}'
5db07df6
PH
3389
3390 def in_download_archive(self, info_dict):
3391 fn = self.params.get('download_archive')
3392 if fn is None:
3393 return False
3394
3395 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3396 if not vid_id:
7012b23c 3397 return False # Incomplete video information
5db07df6 3398
a45e8619 3399 return vid_id in self.archive
c1c9a79c
PH
3400
3401 def record_download_archive(self, info_dict):
3402 fn = self.params.get('download_archive')
3403 if fn is None:
3404 return
5db07df6
PH
3405 vid_id = self._make_archive_id(info_dict)
3406 assert vid_id
a13e6848 3407 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3408 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3409 archive_file.write(vid_id + '\n')
a45e8619 3410 self.archive.add(vid_id)
dd82ffea 3411
8c51aa65 3412 @staticmethod
8abeeb94 3413 def format_resolution(format, default='unknown'):
9359f3d4 3414 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3415 return 'audio only'
f49d89ee
PH
3416 if format.get('resolution') is not None:
3417 return format['resolution']
35615307 3418 if format.get('width') and format.get('height'):
ff51ed58 3419 return '%dx%d' % (format['width'], format['height'])
35615307 3420 elif format.get('height'):
ff51ed58 3421 return '%sp' % format['height']
35615307 3422 elif format.get('width'):
ff51ed58 3423 return '%dx?' % format['width']
3424 return default
8c51aa65 3425
8130779d 3426 def _list_format_headers(self, *headers):
3427 if self.params.get('listformats_table', True) is not False:
3428 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3429 return headers
3430
c57f7757
PH
3431 def _format_note(self, fdict):
3432 res = ''
3433 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3434 res += '(unsupported)'
32f90364
PH
3435 if fdict.get('language'):
3436 if res:
3437 res += ' '
f304da8a 3438 res += '[%s]' % fdict['language']
c57f7757 3439 if fdict.get('format_note') is not None:
f304da8a 3440 if res:
3441 res += ' '
3442 res += fdict['format_note']
c57f7757 3443 if fdict.get('tbr') is not None:
f304da8a 3444 if res:
3445 res += ', '
3446 res += '%4dk' % fdict['tbr']
c57f7757
PH
3447 if fdict.get('container') is not None:
3448 if res:
3449 res += ', '
3450 res += '%s container' % fdict['container']
3089bc74
S
3451 if (fdict.get('vcodec') is not None
3452 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3453 if res:
3454 res += ', '
3455 res += fdict['vcodec']
91c7271a 3456 if fdict.get('vbr') is not None:
c57f7757
PH
3457 res += '@'
3458 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3459 res += 'video@'
3460 if fdict.get('vbr') is not None:
3461 res += '%4dk' % fdict['vbr']
fbb21cf5 3462 if fdict.get('fps') is not None:
5d583bdf
S
3463 if res:
3464 res += ', '
3465 res += '%sfps' % fdict['fps']
c57f7757
PH
3466 if fdict.get('acodec') is not None:
3467 if res:
3468 res += ', '
3469 if fdict['acodec'] == 'none':
3470 res += 'video only'
3471 else:
3472 res += '%-5s' % fdict['acodec']
3473 elif fdict.get('abr') is not None:
3474 if res:
3475 res += ', '
3476 res += 'audio'
3477 if fdict.get('abr') is not None:
3478 res += '@%3dk' % fdict['abr']
3479 if fdict.get('asr') is not None:
3480 res += ' (%5dHz)' % fdict['asr']
3481 if fdict.get('filesize') is not None:
3482 if res:
3483 res += ', '
3484 res += format_bytes(fdict['filesize'])
9732d77e
PH
3485 elif fdict.get('filesize_approx') is not None:
3486 if res:
3487 res += ', '
3488 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3489 return res
91c7271a 3490
8130779d 3491 def render_formats_table(self, info_dict):
b69fd25c 3492 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3493 return None
b69fd25c 3494
94badb25 3495 formats = info_dict.get('formats', [info_dict])
8130779d 3496 if not self.params.get('listformats_table', True) is not False:
76d321f6 3497 table = [
3498 [
3499 format_field(f, 'format_id'),
3500 format_field(f, 'ext'),
3501 self.format_resolution(f),
8130779d 3502 self._format_note(f)
3503 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3504 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3505
3506 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3507 table = [
3508 [
3509 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3510 format_field(f, 'ext'),
3511 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3512 format_field(f, 'fps', '\t%d'),
3513 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3514 delim,
3515 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3516 format_field(f, 'tbr', '\t%dk'),
3517 shorten_protocol_name(f.get('protocol', '')),
3518 delim,
3519 format_field(f, 'vcodec', default='unknown').replace(
3520 'none', 'images' if f.get('acodec') == 'none'
3521 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3522 format_field(f, 'vbr', '\t%dk'),
3523 format_field(f, 'acodec', default='unknown').replace(
3524 'none', '' if f.get('vcodec') == 'none'
3525 else self._format_screen('video only', self.Styles.SUPPRESS)),
3526 format_field(f, 'abr', '\t%dk'),
3527 format_field(f, 'asr', '\t%dHz'),
3528 join_nonempty(
3529 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3530 format_field(f, 'language', '[%s]'),
3531 join_nonempty(format_field(f, 'format_note'),
3532 format_field(f, 'container', ignore=(None, f.get('ext'))),
3533 delim=', '),
3534 delim=' '),
3535 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3536 header_line = self._list_format_headers(
3537 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3538 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3539
3540 return render_table(
3541 header_line, table, hide_empty=True,
3542 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3543
3544 def render_thumbnails_table(self, info_dict):
88f23a18 3545 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3546 if not thumbnails:
8130779d 3547 return None
3548 return render_table(
ec11a9f4 3549 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3550 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3551
8130779d 3552 def render_subtitles_table(self, video_id, subtitles):
2412044c 3553 def _row(lang, formats):
49c258e1 3554 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3555 if len(set(names)) == 1:
7aee40c1 3556 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3557 return [lang, ', '.join(names), ', '.join(exts)]
3558
8130779d 3559 if not subtitles:
3560 return None
3561 return render_table(
ec11a9f4 3562 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3563 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3564 hide_empty=True)
3565
3566 def __list_table(self, video_id, name, func, *args):
3567 table = func(*args)
3568 if not table:
3569 self.to_screen(f'{video_id} has no {name}')
3570 return
3571 self.to_screen(f'[info] Available {name} for {video_id}:')
3572 self.to_stdout(table)
3573
3574 def list_formats(self, info_dict):
3575 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3576
3577 def list_thumbnails(self, info_dict):
3578 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3579
3580 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3581 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3582
dca08720
PH
3583 def urlopen(self, req):
3584 """ Start an HTTP download """
f9934b96 3585 if isinstance(req, str):
67dda517 3586 req = sanitized_Request(req)
19a41fc6 3587 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3588
3589 def print_debug_header(self):
3590 if not self.params.get('verbose'):
3591 return
49a57e70 3592
3593 def get_encoding(stream):
2a938746 3594 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3595 if not supports_terminal_sequences(stream):
59f943cd 3596 from .compat import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3597 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3598 return ret
3599
3600 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3601 locale.getpreferredencoding(),
3602 sys.getfilesystemencoding(),
cf4f42cb 3603 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3604 self.get_encoding())
883d4b1e 3605
3606 logger = self.params.get('logger')
3607 if logger:
3608 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3609 write_debug(encoding_str)
3610 else:
96565c7e 3611 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3612 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3613
4c88ff87 3614 source = detect_variant()
36eaf303 3615 write_debug(join_nonempty(
3616 'yt-dlp version', __version__,
3617 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3618 '' if source == 'unknown' else f'({source})',
3619 delim=' '))
6e21fdd2 3620 if not _LAZY_LOADER:
3621 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3622 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3623 else:
49a57e70 3624 write_debug('Lazy loading extractors is disabled')
3ae5e797 3625 if plugin_extractors or plugin_postprocessors:
49a57e70 3626 write_debug('Plugins: %s' % [
3ae5e797 3627 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3628 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3629 if self.params.get('compat_opts'):
49a57e70 3630 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3631
3632 if source == 'source':
dca08720 3633 try:
36eaf303 3634 sp = Popen(
3635 ['git', 'rev-parse', '--short', 'HEAD'],
3636 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3637 cwd=os.path.dirname(os.path.abspath(__file__)))
3638 out, err = sp.communicate_or_kill()
3639 out = out.decode().strip()
3640 if re.match('[0-9a-f]+', out):
3641 write_debug('Git HEAD: %s' % out)
70a1165b 3642 except Exception:
19a03940 3643 with contextlib.suppress(Exception):
36eaf303 3644 sys.exc_clear()
b300cda4
S
3645
3646 def python_implementation():
3647 impl_name = platform.python_implementation()
3648 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3649 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3650 return impl_name
3651
49a57e70 3652 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3653 platform.python_version(),
3654 python_implementation(),
3655 platform.architecture()[0],
b300cda4 3656 platform_name()))
d28b5171 3657
8913ef74 3658 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3659 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3660 if ffmpeg_features:
19a03940 3661 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3662
4c83c967 3663 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3664 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3665 exe_str = ', '.join(
2831b468 3666 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3667 ) or 'none'
49a57e70 3668 write_debug('exe versions: %s' % exe_str)
dca08720 3669
9b8ee23b 3670 from .dependencies import available_dependencies
3671
3672 write_debug('Optional libraries: %s' % (', '.join(sorted({
3673 module.__name__.split('.')[0] for module in available_dependencies.values()
3674 })) or 'none'))
2831b468 3675
97ec5bc5 3676 self._setup_opener()
dca08720
PH
3677 proxy_map = {}
3678 for handler in self._opener.handlers:
3679 if hasattr(handler, 'proxies'):
3680 proxy_map.update(handler.proxies)
49a57e70 3681 write_debug(f'Proxy map: {proxy_map}')
dca08720 3682
49a57e70 3683 # Not implemented
3684 if False and self.params.get('call_home'):
58b1f00d 3685 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3686 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3687 latest_version = self.urlopen(
3688 'https://yt-dl.org/latest/version').read().decode('utf-8')
3689 if version_tuple(latest_version) > version_tuple(__version__):
3690 self.report_warning(
3691 'You are using an outdated version (newest version: %s)! '
3692 'See https://yt-dl.org/update if you need help updating.' %
3693 latest_version)
3694
e344693b 3695 def _setup_opener(self):
97ec5bc5 3696 if hasattr(self, '_opener'):
3697 return
6ad14cab 3698 timeout_val = self.params.get('socket_timeout')
17bddf3e 3699 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3700
982ee69a 3701 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3702 opts_cookiefile = self.params.get('cookiefile')
3703 opts_proxy = self.params.get('proxy')
3704
982ee69a 3705 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3706
6a3f4c3f 3707 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3708 if opts_proxy is not None:
3709 if opts_proxy == '':
3710 proxies = {}
3711 else:
3712 proxies = {'http': opts_proxy, 'https': opts_proxy}
3713 else:
3714 proxies = compat_urllib_request.getproxies()
067aa17e 3715 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3716 if 'http' in proxies and 'https' not in proxies:
3717 proxies['https'] = proxies['http']
91410c9b 3718 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3719
3720 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3721 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3722 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3723 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3724 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3725
3726 # When passing our own FileHandler instance, build_opener won't add the
3727 # default FileHandler and allows us to disable the file protocol, which
3728 # can be used for malicious purposes (see
067aa17e 3729 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3730 file_handler = compat_urllib_request.FileHandler()
3731
3732 def file_open(*args, **kwargs):
7a5c1cfe 3733 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3734 file_handler.file_open = file_open
3735
3736 opener = compat_urllib_request.build_opener(
fca6dba8 3737 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3738
dca08720
PH
3739 # Delete the default user-agent header, which would otherwise apply in
3740 # cases where our custom HTTP handler doesn't come into play
067aa17e 3741 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3742 opener.addheaders = []
3743 self._opener = opener
62fec3b2
PH
3744
3745 def encode(self, s):
3746 if isinstance(s, bytes):
3747 return s # Already encoded
3748
3749 try:
3750 return s.encode(self.get_encoding())
3751 except UnicodeEncodeError as err:
3752 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3753 raise
3754
3755 def get_encoding(self):
3756 encoding = self.params.get('encoding')
3757 if encoding is None:
3758 encoding = preferredencoding()
3759 return encoding
ec82d85a 3760
e08a85d8 3761 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3762 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3763 if overwrite is None:
3764 overwrite = self.params.get('overwrites', True)
80c03fa9 3765 if not self.params.get('writeinfojson'):
3766 return False
3767 elif not infofn:
3768 self.write_debug(f'Skipping writing {label} infojson')
3769 return False
3770 elif not self._ensure_dir_exists(infofn):
3771 return None
e08a85d8 3772 elif not overwrite and os.path.exists(infofn):
80c03fa9 3773 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3774 return 'exists'
3775
3776 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3777 try:
3778 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3779 return True
86e5f3ed 3780 except OSError:
cb96c5be 3781 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3782 return None
80c03fa9 3783
3784 def _write_description(self, label, ie_result, descfn):
3785 ''' Write description and returns True = written, False = skip, None = error '''
3786 if not self.params.get('writedescription'):
3787 return False
3788 elif not descfn:
3789 self.write_debug(f'Skipping writing {label} description')
3790 return False
3791 elif not self._ensure_dir_exists(descfn):
3792 return None
3793 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3794 self.to_screen(f'[info] {label.title()} description is already present')
3795 elif ie_result.get('description') is None:
3796 self.report_warning(f'There\'s no {label} description to write')
3797 return False
3798 else:
3799 try:
3800 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3801 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3802 descfile.write(ie_result['description'])
86e5f3ed 3803 except OSError:
80c03fa9 3804 self.report_error(f'Cannot write {label} description file {descfn}')
3805 return None
3806 return True
3807
3808 def _write_subtitles(self, info_dict, filename):
3809 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3810 ret = []
3811 subtitles = info_dict.get('requested_subtitles')
3812 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3813 # subtitles download errors are already managed as troubles in relevant IE
3814 # that way it will silently go on when used with unsupporting IE
3815 return ret
3816
3817 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3818 if not sub_filename_base:
3819 self.to_screen('[info] Skipping writing video subtitles')
3820 return ret
3821 for sub_lang, sub_info in subtitles.items():
3822 sub_format = sub_info['ext']
3823 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3824 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3825 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3826 if existing_sub:
80c03fa9 3827 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3828 sub_info['filepath'] = existing_sub
3829 ret.append((existing_sub, sub_filename_final))
80c03fa9 3830 continue
3831
3832 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3833 if sub_info.get('data') is not None:
3834 try:
3835 # Use newline='' to prevent conversion of newline characters
3836 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3837 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3838 subfile.write(sub_info['data'])
3839 sub_info['filepath'] = sub_filename
3840 ret.append((sub_filename, sub_filename_final))
3841 continue
86e5f3ed 3842 except OSError:
80c03fa9 3843 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3844 return None
3845
3846 try:
3847 sub_copy = sub_info.copy()
3848 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3849 self.dl(sub_filename, sub_copy, subtitle=True)
3850 sub_info['filepath'] = sub_filename
3851 ret.append((sub_filename, sub_filename_final))
6020e05d 3852 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3853 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3854 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3855 if not self.params.get('ignoreerrors'):
3856 self.report_error(msg)
3857 raise DownloadError(msg)
3858 self.report_warning(msg)
519804a9 3859 return ret
80c03fa9 3860
3861 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3862 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3863 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3864 thumbnails, ret = [], []
6c4fd172 3865 if write_all or self.params.get('writethumbnail', False):
0202b52a 3866 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3867 multiple = write_all and len(thumbnails) > 1
ec82d85a 3868
80c03fa9 3869 if thumb_filename_base is None:
3870 thumb_filename_base = filename
3871 if thumbnails and not thumb_filename_base:
3872 self.write_debug(f'Skipping writing {label} thumbnail')
3873 return ret
3874
dd0228ce 3875 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3876 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3877 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3878 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3879 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3880
e04938ab 3881 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3882 if existing_thumb:
aa9369a2 3883 self.to_screen('[info] %s is already present' % (
3884 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3885 t['filepath'] = existing_thumb
3886 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3887 else:
80c03fa9 3888 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3889 try:
297e9952 3890 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3891 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3892 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3893 shutil.copyfileobj(uf, thumbf)
80c03fa9 3894 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3895 t['filepath'] = thumb_filename
3158150c 3896 except network_exceptions as err:
dd0228ce 3897 thumbnails.pop(idx)
80c03fa9 3898 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3899 if ret and not write_all:
3900 break
0202b52a 3901 return ret