]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[cleanup] Misc cleanup
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
003c69a8 35 compat_get_terminal_size,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
edf65256 39 compat_pycrypto_AES,
7d1eb38a 40 compat_shlex_quote,
ce02ed60 41 compat_str,
67134eab 42 compat_tokenize_tokenize,
ce02ed60
PH
43 compat_urllib_error,
44 compat_urllib_request,
8b172c2e 45 compat_urllib_request_DataHandler,
819e0531 46 windows_enable_vt_mode,
8c25f81b 47)
982ee69a 48from .cookies import load_cookies
8c25f81b 49from .utils import (
eedb7ba5
S
50 age_restricted,
51 args_to_str,
ce02ed60
PH
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
acd69589 55 DEFAULT_OUTTMPL,
ce02ed60 56 determine_ext,
b5559424 57 determine_protocol,
48f79687 58 DownloadCancelled,
ce02ed60 59 DownloadError,
c0384f22 60 encode_compat_str,
ce02ed60 61 encodeFilename,
498f5606 62 EntryNotInPlaylist,
a06916d9 63 error_to_compat_str,
8b0d7497 64 ExistingVideoReached,
590bc6f6 65 expand_path,
ce02ed60 66 ExtractorError,
e29663c6 67 float_or_none,
02dbf93f 68 format_bytes,
76d321f6 69 format_field,
e0fd9573 70 format_decimal_suffix,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
0bb322b9 73 get_domain,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
34921b43 78 join_nonempty,
56a8fb4f 79 LazyList,
08438d2c 80 LINK_TEMPLATES,
ce02ed60 81 locked_file,
0202b52a 82 make_dir,
dca08720 83 make_HTTPS_handler,
ce02ed60 84 MaxDownloadsReached,
3158150c 85 network_exceptions,
ec11a9f4 86 number_of_digits,
cd6fc19e 87 orderedSet,
a06916d9 88 OUTTMPL_TYPES,
b7ab0590 89 PagedList,
083c9df9 90 parse_filesize,
91410c9b 91 PerRequestProxyHandler,
dca08720 92 platform_name,
d3c93ec2 93 Popen,
eedb7ba5 94 PostProcessingError,
ce02ed60 95 preferredencoding,
eedb7ba5 96 prepend_extension,
f2ebc5c7 97 ReExtractInfo,
51fb4995 98 register_socks_protocols,
a06916d9 99 RejectedVideoReached,
3efb96a6 100 remove_terminal_sequences,
cfb56d1a 101 render_table,
eedb7ba5 102 replace_extension,
ce02ed60
PH
103 SameFileError,
104 sanitize_filename,
1bb5c511 105 sanitize_path,
dcf77cf1 106 sanitize_url,
67dda517 107 sanitized_Request,
e5660ee6 108 std_headers,
819e0531 109 STR_FORMAT_RE_TMPL,
110 STR_FORMAT_TYPES,
1211bb6d 111 str_or_none,
e29663c6 112 strftime_or_none,
ce02ed60 113 subtitles_filename,
819e0531 114 supports_terminal_sequences,
f2ebc5c7 115 timetuple_from_msec,
732044af 116 to_high_limit_path,
324ad820 117 traverse_obj,
6033d980 118 try_get,
ce02ed60 119 UnavailableVideoError,
29eb5174 120 url_basename,
7d1eb38a 121 variadic,
58b1f00d 122 version_tuple,
ce02ed60
PH
123 write_json_file,
124 write_string,
6a3f4c3f 125 YoutubeDLCookieProcessor,
dca08720 126 YoutubeDLHandler,
fca6dba8 127 YoutubeDLRedirectHandler,
ce02ed60 128)
a0e07d31 129from .cache import Cache
ec11a9f4 130from .minicurses import format_text
52a8a1e1 131from .extractor import (
132 gen_extractor_classes,
133 get_info_extractor,
134 _LAZY_LOADER,
3ae5e797 135 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 136)
4c54b89e 137from .extractor.openload import PhantomJSwrapper
52a8a1e1 138from .downloader import (
dbf5416a 139 FFmpegFD,
52a8a1e1 140 get_suitable_downloader,
141 shorten_protocol_name
142)
4c83c967 143from .downloader.rtmp import rtmpdump_version
4f026faf 144from .postprocessor import (
e36d50c5 145 get_postprocessor,
4e3b637d 146 EmbedThumbnailPP,
adbc4ec4 147 FFmpegFixupDuplicateMoovPP,
e36d50c5 148 FFmpegFixupDurationPP,
f17f8651 149 FFmpegFixupM3u8PP,
62cd676c 150 FFmpegFixupM4aPP,
6271f1ca 151 FFmpegFixupStretchedPP,
e36d50c5 152 FFmpegFixupTimestampPP,
4f026faf
PH
153 FFmpegMergerPP,
154 FFmpegPostProcessor,
0202b52a 155 MoveFilesAfterDownloadPP,
3ae5e797 156 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 157)
4c88ff87 158from .update import detect_variant
36eaf303 159from .version import __version__, RELEASE_GIT_HEAD
8222d8de 160
e9c0cdd3
YCH
161if compat_os_name == 'nt':
162 import ctypes
163
2459b6e1 164
8222d8de
JMF
165class YoutubeDL(object):
166 """YoutubeDL class.
167
168 YoutubeDL objects are the ones responsible of downloading the
169 actual video file and writing it to disk if the user has requested
170 it, among some other tasks. In most cases there should be one per
171 program. As, given a video URL, the downloader doesn't know how to
172 extract all the needed information, task that InfoExtractors do, it
173 has to pass the URL to one of them.
174
175 For this, YoutubeDL objects have a method that allows
176 InfoExtractors to be registered in a given order. When it is passed
177 a URL, the YoutubeDL object handles it to the first InfoExtractor it
178 finds that reports being able to handle it. The InfoExtractor extracts
179 all the information about the video or videos the URL refers to, and
180 YoutubeDL process the extracted information, possibly using a File
181 Downloader to download the video.
182
183 YoutubeDL objects accept a lot of parameters. In order not to saturate
184 the object constructor with arguments, it receives a dictionary of
185 options instead. These options are available through the params
186 attribute for the InfoExtractors to use. The YoutubeDL also
187 registers itself as the downloader in charge for the InfoExtractors
188 that are added to it, so this is a "mutual registration".
189
190 Available options:
191
192 username: Username for authentication purposes.
193 password: Password for authentication purposes.
180940e0 194 videopassword: Password for accessing a video.
1da50aa3
S
195 ap_mso: Adobe Pass multiple-system operator identifier.
196 ap_username: Multiple-system operator account username.
197 ap_password: Multiple-system operator account password.
8222d8de
JMF
198 usenetrc: Use netrc for authentication instead.
199 verbose: Print additional info to stdout.
200 quiet: Do not print messages to stdout.
ad8915b7 201 no_warnings: Do not print out anything for warnings.
53c18592 202 forceprint: A list of templates to force print
203 forceurl: Force printing final URL. (Deprecated)
204 forcetitle: Force printing title. (Deprecated)
205 forceid: Force printing ID. (Deprecated)
206 forcethumbnail: Force printing thumbnail URL. (Deprecated)
207 forcedescription: Force printing description. (Deprecated)
208 forcefilename: Force printing final filename. (Deprecated)
209 forceduration: Force printing duration. (Deprecated)
8694c600 210 forcejson: Force printing info_dict as JSON.
63e0be34
PH
211 dump_single_json: Force printing the info_dict of the whole playlist
212 (or video) as a single JSON line.
c25228e5 213 force_write_download_archive: Force writing download archive regardless
214 of 'skip_download' or 'simulate'.
b7b04c78 215 simulate: Do not download the video files. If unset (or None),
216 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 217 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 218 You can also pass a function. The function takes 'ctx' as
219 argument and returns the formats to download.
220 See "build_format_selector" for an implementation
63ad4d43 221 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 222 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
223 extracting metadata even if the video is not actually
224 available for download (experimental)
0930b11f 225 format_sort: A list of fields by which to sort the video formats.
226 See "Sorting Formats" for more details.
c25228e5 227 format_sort_force: Force the given format_sort. see "Sorting Formats"
228 for more details.
229 allow_multiple_video_streams: Allow multiple video streams to be merged
230 into a single file
231 allow_multiple_audio_streams: Allow multiple audio streams to be merged
232 into a single file
0ba692ac 233 check_formats Whether to test if the formats are downloadable.
9f1a1c36 234 Can be True (check all), False (check none),
235 'selected' (check selected formats),
0ba692ac 236 or None (check only if requested by extractor)
4524baf0 237 paths: Dictionary of output paths. The allowed keys are 'home'
238 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 239 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 240 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 241 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
242 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
243 restrictfilenames: Do not allow "&" and spaces in file names
244 trim_file_name: Limit length of filename (extension excluded)
4524baf0 245 windowsfilenames: Force the filenames to be windows compatible
b1940459 246 ignoreerrors: Do not stop on download/postprocessing errors.
247 Can be 'only_download' to ignore only download errors.
248 Default is 'only_download' for CLI, but False for API
26e2805c 249 skip_playlist_after_errors: Number of allowed failures until the rest of
250 the playlist is skipped
d22dec74 251 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 252 overwrites: Overwrite all video and metadata files if True,
253 overwrite only non-video files if None
254 and don't overwrite any file if False
34488702 255 For compatibility with youtube-dl,
256 "nooverwrites" may also be used instead
8222d8de
JMF
257 playliststart: Playlist item to start at.
258 playlistend: Playlist item to end at.
c14e88f0 259 playlist_items: Specific indices of playlist to download.
ff815fe6 260 playlistreverse: Download playlist items in reverse order.
75822ca7 261 playlistrandom: Download playlist items in random order.
8222d8de
JMF
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
8bf9319e 264 logger: Log messages to a logging.Logger instance.
8222d8de 265 logtostderr: Log messages to stderr instead of stdout.
819e0531 266 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
75d43ca0 269 clean_infojson: Remove private fields from the infojson
34488702 270 getcomments: Extract video comments. This will not be written to disk
06167fbb 271 unless writeinfojson is also given
1fb07d10 272 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 273 writethumbnail: Write the thumbnail image to a file
c25228e5 274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
ec82d85a 276 write_all_thumbnails: Write all thumbnail formats to files
732044af 277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 282 writesubtitles: Write the video subtitles to a file
741dd8ea 283 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 284 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 285 Downloads all the subtitles of the video
0b7f3118 286 (requires writesubtitles or writeautomaticsub)
8222d8de 287 listsubtitles: Lists all available subtitles for the video
a504ced0 288 subtitlesformat: The format code for subtitles
c32b0aab 289 subtitleslangs: List of languages of the subtitles to download (can be regex).
290 The list may contain "all" to refer to all the available
291 subtitles. The language can be prefixed with a "-" to
292 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
293 keepvideo: Keep the video file after post-processing
294 daterange: A DateRange object, download only if the upload_date is in the range.
295 skip_download: Skip the actual download of the video file
c35f9e72 296 cachedir: Location of the cache files in the filesystem.
a0e07d31 297 False to disable filesystem cache.
47192f92 298 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
299 age_limit: An integer representing the user's age in years.
300 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
301 min_views: An integer representing the minimum view count the video
302 must have in order to not be skipped.
303 Videos without view count information are always
304 downloaded. None for no limit.
305 max_views: An integer representing the maximum view count.
306 Videos that are more popular than that are not
307 downloaded.
308 Videos without view count information are always
309 downloaded. None for no limit.
310 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
311 Videos already present in the file are not downloaded
312 again.
8a51f564 313 break_on_existing: Stop the download process after attempting to download a
314 file that is in the archive.
315 break_on_reject: Stop the download process when encountering a video that
316 has been filtered out.
b222c271 317 break_per_url: Whether break_on_reject and break_on_existing
318 should act on each input URL as opposed to for the entire queue
8a51f564 319 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
320 cookiesfrombrowser: A tuple containing the name of the browser and the profile
321 name/path from where cookies are loaded.
49a57e70 322 Eg: ('chrome', ) or ('vivaldi', 'default')
a1ee09e8 323 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
324 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
325 At the moment, this is only supported by YouTube.
a1ee09e8 326 proxy: URL of the proxy server to use
38cce791 327 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 328 on geo-restricted sites.
e344693b 329 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
330 bidi_workaround: Work around buggy terminals without bidirectional text
331 support, using fridibi
a0ddb8a2 332 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 333 include_ads: Download ads as well (deprecated)
04b4d394
PH
334 default_search: Prepend this string if an input url is not valid.
335 'auto' for elaborate guessing
62fec3b2 336 encoding: Use this encoding instead of the system-specified.
e8ee972c 337 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
338 Pass in 'in_playlist' to only show this behavior for
339 playlist items.
f2ebc5c7 340 wait_for_video: If given, wait for scheduled streams to become available.
341 The value should be a tuple containing the range
342 (min_secs, max_secs) to wait between retries
4f026faf 343 postprocessors: A list of dictionaries, each with an entry
71b640cc 344 * key: The name of the postprocessor. See
7a5c1cfe 345 yt_dlp/postprocessor/__init__.py for a list.
56d868db 346 * when: When to run the postprocessor. Can be one of
347 pre_process|before_dl|post_process|after_move.
348 Assumed to be 'post_process' if not given
b5ae35ee 349 post_hooks: Deprecated - Register a custom postprocessor instead
350 A list of functions that get called as the final step
ab8e5e51
AM
351 for each video file, after all postprocessors have been
352 called. The filename will be passed as the only argument.
71b640cc
PH
353 progress_hooks: A list of functions that get called on download
354 progress, with a dictionary with the entries
5cda4eda 355 * status: One of "downloading", "error", or "finished".
ee69b99a 356 Check this first and ignore unknown values.
3ba7740d 357 * info_dict: The extracted info_dict
71b640cc 358
5cda4eda 359 If status is one of "downloading", or "finished", the
ee69b99a
PH
360 following properties may also be present:
361 * filename: The final filename (always present)
5cda4eda 362 * tmpfilename: The filename we're currently writing to
71b640cc
PH
363 * downloaded_bytes: Bytes on disk
364 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
365 * total_bytes_estimate: Guess of the eventual file size,
366 None if unavailable.
367 * elapsed: The number of seconds since download started.
71b640cc
PH
368 * eta: The estimated time in seconds, None if unknown
369 * speed: The download speed in bytes/second, None if
370 unknown
5cda4eda
PH
371 * fragment_index: The counter of the currently
372 downloaded video fragment.
373 * fragment_count: The number of fragments (= individual
374 files that will be merged)
71b640cc
PH
375
376 Progress hooks are guaranteed to be called at least once
377 (with status "finished") if the download is successful.
819e0531 378 postprocessor_hooks: A list of functions that get called on postprocessing
379 progress, with a dictionary with the entries
380 * status: One of "started", "processing", or "finished".
381 Check this first and ignore unknown values.
382 * postprocessor: Name of the postprocessor
383 * info_dict: The extracted info_dict
384
385 Progress hooks are guaranteed to be called at least twice
386 (with status "started" and "finished") if the processing is successful.
45598f15 387 merge_output_format: Extension to use when merging formats.
6b591b29 388 final_ext: Expected final extension; used to detect when the file was
59a7a13e 389 already downloaded and converted
6271f1ca
PH
390 fixup: Automatically correct known faults of the file.
391 One of:
392 - "never": do nothing
393 - "warn": only emit a warning
394 - "detect_or_warn": check whether we can do anything
62cd676c 395 about it, warn otherwise (default)
504f20dd 396 source_address: Client-side IP address to bind to.
6ec6cb4e 397 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 398 yt-dlp servers for debugging. (BROKEN)
1cf376f5 399 sleep_interval_requests: Number of seconds to sleep between requests
400 during extraction
7aa589a5
S
401 sleep_interval: Number of seconds to sleep before each download when
402 used alone or a lower bound of a range for randomized
403 sleep before each download (minimum possible number
404 of seconds to sleep) when used along with
405 max_sleep_interval.
406 max_sleep_interval:Upper bound of a range for randomized sleep before each
407 download (maximum possible number of seconds to sleep).
408 Must only be used along with sleep_interval.
409 Actual sleep time will be a random float from range
410 [sleep_interval; max_sleep_interval].
1cf376f5 411 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
412 listformats: Print an overview of available video formats and exit.
413 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
414 match_filter: A function that gets called with the info_dict of
415 every video.
416 If it returns a message, the video is ignored.
417 If it returns None, the video is downloaded.
418 match_filter_func in utils.py is one example for this.
7e5db8c9 419 no_color: Do not emit color codes in output.
0a840f58 420 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 421 HTTP header
0a840f58 422 geo_bypass_country:
773f291d
S
423 Two-letter ISO 3166-2 country code that will be used for
424 explicit geographic restriction bypassing via faking
504f20dd 425 X-Forwarded-For HTTP header
5f95927a
S
426 geo_bypass_ip_block:
427 IP range in CIDR notation that will be used similarly to
504f20dd 428 geo_bypass_country
71b640cc 429
85729c51 430 The following options determine which downloader is picked:
52a8a1e1 431 external_downloader: A dictionary of protocol keys and the executable of the
432 external downloader to use for it. The allowed protocols
433 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
434 Set the value to 'native' to use the native downloader
435 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
436 or {'m3u8': 'ffmpeg'} instead.
437 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
438 if True, otherwise use ffmpeg/avconv if False, otherwise
439 use downloader suggested by extractor if None.
53ed7066 440 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 441 The following options do not work when used through the API:
b5ae35ee 442 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 443 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 444 Refer __init__.py for their implementation
819e0531 445 progress_template: Dictionary of templates for progress outputs.
446 Allowed keys are 'download', 'postprocess',
447 'download-title' (console title) and 'postprocess-title'.
448 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 449
8222d8de 450 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 451 the downloader (see yt_dlp/downloader/common.py):
51d9739f 452 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
b5ae35ee 453 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
454 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 455 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
456
457 The following options are used by the post processors:
d4a24f40 458 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 459 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
460 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
461 to the binary or its containing directory.
43820c03 462 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 463 and a list of additional command-line arguments for the
464 postprocessor/executable. The dict can also have "PP+EXE" keys
465 which are used when the given exe is used by the given PP.
466 Use 'default' as the name for arguments to passed to all PP
467 For compatibility with youtube-dl, a single list of args
468 can also be used
e409895f 469
470 The following options are used by the extractors:
62bff2c1 471 extractor_retries: Number of times to retry for known errors
472 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 473 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 474 discontinuities such as ad breaks (default: False)
5d3a0e79 475 extractor_args: A dictionary of arguments to be passed to the extractors.
476 See "EXTRACTOR ARGUMENTS" for details.
477 Eg: {'youtube': {'skip': ['dash', 'hls']}}
478 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
479 If True (default), DASH manifests and related
62bff2c1 480 data will be downloaded and processed by extractor.
481 You can reduce network I/O by disabling it if you don't
482 care about DASH. (only for youtube)
5d3a0e79 483 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
484 If True (default), HLS manifests and related
62bff2c1 485 data will be downloaded and processed by extractor.
486 You can reduce network I/O by disabling it if you don't
487 care about HLS. (only for youtube)
8222d8de
JMF
488 """
489
c9969434
S
490 _NUMERIC_FIELDS = set((
491 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 492 'timestamp', 'release_timestamp',
c9969434
S
493 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
494 'average_rating', 'comment_count', 'age_limit',
495 'start_time', 'end_time',
496 'chapter_number', 'season_number', 'episode_number',
497 'track_number', 'disc_number', 'release_year',
c9969434
S
498 ))
499
48ee10ee 500 _format_selection_exts = {
501 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
502 'video': {'mp4', 'flv', 'webm', '3gp'},
503 'storyboards': {'mhtml'},
504 }
505
8222d8de 506 params = None
8b7491c8 507 _ies = {}
56d868db 508 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 509 _printed_messages = set()
1cf376f5 510 _first_webpage_request = True
8222d8de
JMF
511 _download_retcode = None
512 _num_downloads = None
30a074c2 513 _playlist_level = 0
514 _playlist_urls = set()
8222d8de
JMF
515 _screen_file = None
516
3511266b 517 def __init__(self, params=None, auto_init=True):
883d4b1e 518 """Create a FileDownloader object with the given options.
519 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 520 Set to 'no_verbose_header' to not print the header
883d4b1e 521 """
e9f9a10f
JMF
522 if params is None:
523 params = {}
8b7491c8 524 self._ies = {}
56c73665 525 self._ies_instances = {}
56d868db 526 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 527 self._printed_messages = set()
1cf376f5 528 self._first_webpage_request = True
ab8e5e51 529 self._post_hooks = []
933605d7 530 self._progress_hooks = []
819e0531 531 self._postprocessor_hooks = []
8222d8de
JMF
532 self._download_retcode = 0
533 self._num_downloads = 0
534 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 535 self._err_file = sys.stderr
819e0531 536 self.params = params
a0e07d31 537 self.cache = Cache(self)
34308b30 538
819e0531 539 windows_enable_vt_mode()
ec11a9f4 540 self._allow_colors = {
541 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
542 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
543 }
819e0531 544
a61f4b28 545 if sys.version_info < (3, 6):
546 self.report_warning(
0181adef 547 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 548
88acdbc2 549 if self.params.get('allow_unplayable_formats'):
550 self.report_warning(
ec11a9f4 551 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 552 'This is a developer option intended for debugging. \n'
553 ' If you experience any issues while using this option, '
ec11a9f4 554 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 555
be5df5ee
S
556 def check_deprecated(param, option, suggestion):
557 if self.params.get(param) is not None:
53ed7066 558 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
559 return True
560 return False
561
562 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
563 if self.params.get('geo_verification_proxy') is None:
564 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
565
0d1bb027 566 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
567 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 568 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 569
49a57e70 570 for msg in self.params.get('_warnings', []):
0d1bb027 571 self.report_warning(msg)
ee8dd27a 572 for msg in self.params.get('_deprecation_warnings', []):
573 self.deprecation_warning(msg)
0d1bb027 574
ec11a9f4 575 if 'list-formats' in self.params.get('compat_opts', []):
576 self.params['listformats_table'] = False
577
b5ae35ee 578 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 579 # nooverwrites was unnecessarily changed to overwrites
580 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
581 # This ensures compatibility with both keys
582 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 583 elif self.params.get('overwrites') is None:
584 self.params.pop('overwrites', None)
b868936c 585 else:
586 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 587
0783b09b 588 if params.get('bidi_workaround', False):
1c088fa8
PH
589 try:
590 import pty
591 master, slave = pty.openpty()
003c69a8 592 width = compat_get_terminal_size().columns
1c088fa8
PH
593 if width is None:
594 width_args = []
595 else:
596 width_args = ['-w', str(width)]
5d681e96 597 sp_kwargs = dict(
1c088fa8
PH
598 stdin=subprocess.PIPE,
599 stdout=slave,
600 stderr=self._err_file)
5d681e96 601 try:
d3c93ec2 602 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 603 except OSError:
d3c93ec2 604 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 605 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 606 except OSError as ose:
66e7ace1 607 if ose.errno == errno.ENOENT:
49a57e70 608 self.report_warning(
609 'Could not find fribidi executable, ignoring --bidi-workaround. '
610 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
611 else:
612 raise
0783b09b 613
3089bc74
S
614 if (sys.platform != 'win32'
615 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
616 and not params.get('restrictfilenames', False)):
e9137224 617 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 618 self.report_warning(
6febd1c1 619 'Assuming --restrict-filenames since file system encoding '
1b725173 620 'cannot encode all characters. '
6febd1c1 621 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 622 self.params['restrictfilenames'] = True
34308b30 623
de6000d9 624 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 625
187986a8 626 # Creating format selector here allows us to catch syntax errors before the extraction
627 self.format_selector = (
fa9f30b8 628 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 629 else self.params['format'] if callable(self.params['format'])
187986a8 630 else self.build_format_selector(self.params['format']))
631
dca08720
PH
632 self._setup_opener()
633
3511266b 634 if auto_init:
883d4b1e 635 if auto_init != 'no_verbose_header':
636 self.print_debug_header()
3511266b
PH
637 self.add_default_info_extractors()
638
013b50b7 639 hooks = {
640 'post_hooks': self.add_post_hook,
641 'progress_hooks': self.add_progress_hook,
642 'postprocessor_hooks': self.add_postprocessor_hook,
643 }
644 for opt, fn in hooks.items():
645 for ph in self.params.get(opt, []):
646 fn(ph)
71b640cc 647
5bfc8bee 648 for pp_def_raw in self.params.get('postprocessors', []):
649 pp_def = dict(pp_def_raw)
650 when = pp_def.pop('when', 'post_process')
651 self.add_post_processor(
652 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
653 when=when)
654
51fb4995
YCH
655 register_socks_protocols()
656
ed39cac5 657 def preload_download_archive(fn):
658 """Preload the archive, if any is specified"""
659 if fn is None:
660 return False
49a57e70 661 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 662 try:
663 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
664 for line in archive_file:
665 self.archive.add(line.strip())
666 except IOError as ioe:
667 if ioe.errno != errno.ENOENT:
668 raise
669 return False
670 return True
671
672 self.archive = set()
673 preload_download_archive(self.params.get('download_archive'))
674
7d4111ed
PH
675 def warn_if_short_id(self, argv):
676 # short YouTube ID starting with dash?
677 idxs = [
678 i for i, a in enumerate(argv)
679 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
680 if idxs:
681 correct_argv = (
7a5c1cfe 682 ['yt-dlp']
3089bc74
S
683 + [a for i, a in enumerate(argv) if i not in idxs]
684 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
685 )
686 self.report_warning(
687 'Long argument string detected. '
49a57e70 688 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
689 args_to_str(correct_argv))
690
8222d8de
JMF
691 def add_info_extractor(self, ie):
692 """Add an InfoExtractor object to the end of the list."""
8b7491c8 693 ie_key = ie.ie_key()
694 self._ies[ie_key] = ie
e52d7f85 695 if not isinstance(ie, type):
8b7491c8 696 self._ies_instances[ie_key] = ie
e52d7f85 697 ie.set_downloader(self)
8222d8de 698
8b7491c8 699 def _get_info_extractor_class(self, ie_key):
700 ie = self._ies.get(ie_key)
701 if ie is None:
702 ie = get_info_extractor(ie_key)
703 self.add_info_extractor(ie)
704 return ie
705
56c73665
JMF
706 def get_info_extractor(self, ie_key):
707 """
708 Get an instance of an IE with name ie_key, it will try to get one from
709 the _ies list, if there's no instance it will create a new one and add
710 it to the extractor list.
711 """
712 ie = self._ies_instances.get(ie_key)
713 if ie is None:
714 ie = get_info_extractor(ie_key)()
715 self.add_info_extractor(ie)
716 return ie
717
023fa8c4
JMF
718 def add_default_info_extractors(self):
719 """
720 Add the InfoExtractors returned by gen_extractors to the end of the list
721 """
e52d7f85 722 for ie in gen_extractor_classes():
023fa8c4
JMF
723 self.add_info_extractor(ie)
724
56d868db 725 def add_post_processor(self, pp, when='post_process'):
8222d8de 726 """Add a PostProcessor object to the end of the chain."""
5bfa4862 727 self._pps[when].append(pp)
8222d8de
JMF
728 pp.set_downloader(self)
729
ab8e5e51
AM
730 def add_post_hook(self, ph):
731 """Add the post hook"""
732 self._post_hooks.append(ph)
733
933605d7 734 def add_progress_hook(self, ph):
819e0531 735 """Add the download progress hook"""
933605d7 736 self._progress_hooks.append(ph)
8ab470f1 737
819e0531 738 def add_postprocessor_hook(self, ph):
739 """Add the postprocessing progress hook"""
740 self._postprocessor_hooks.append(ph)
5bfc8bee 741 for pps in self._pps.values():
742 for pp in pps:
743 pp.add_progress_hook(ph)
819e0531 744
1c088fa8 745 def _bidi_workaround(self, message):
5d681e96 746 if not hasattr(self, '_output_channel'):
1c088fa8
PH
747 return message
748
5d681e96 749 assert hasattr(self, '_output_process')
11b85ce6 750 assert isinstance(message, compat_str)
6febd1c1
PH
751 line_count = message.count('\n') + 1
752 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 753 self._output_process.stdin.flush()
6febd1c1 754 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 755 for _ in range(line_count))
6febd1c1 756 return res[:-len('\n')]
1c088fa8 757
b35496d8 758 def _write_string(self, message, out=None, only_once=False):
759 if only_once:
760 if message in self._printed_messages:
761 return
762 self._printed_messages.add(message)
763 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 764
848887eb 765 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 766 """Print message to stdout"""
8bf9319e 767 if self.params.get('logger'):
43afe285 768 self.params['logger'].debug(message)
835a1478 769 elif not quiet or self.params.get('verbose'):
770 self._write_string(
771 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
772 self._err_file if quiet else self._screen_file)
8222d8de 773
b35496d8 774 def to_stderr(self, message, only_once=False):
0760b0a7 775 """Print message to stderr"""
11b85ce6 776 assert isinstance(message, compat_str)
8bf9319e 777 if self.params.get('logger'):
43afe285
IB
778 self.params['logger'].error(message)
779 else:
b35496d8 780 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 781
1e5b9a95
PH
782 def to_console_title(self, message):
783 if not self.params.get('consoletitle', False):
784 return
3efb96a6 785 message = remove_terminal_sequences(message)
4bede0d8
C
786 if compat_os_name == 'nt':
787 if ctypes.windll.kernel32.GetConsoleWindow():
788 # c_wchar_p() might not be necessary if `message` is
789 # already of type unicode()
790 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 791 elif 'TERM' in os.environ:
b46696bd 792 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 793
bdde425c
PH
794 def save_console_title(self):
795 if not self.params.get('consoletitle', False):
796 return
b7b04c78 797 if self.params.get('simulate'):
94c3442e 798 return
4bede0d8 799 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 800 # Save the title on stack
734f90bb 801 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
802
803 def restore_console_title(self):
804 if not self.params.get('consoletitle', False):
805 return
b7b04c78 806 if self.params.get('simulate'):
94c3442e 807 return
4bede0d8 808 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 809 # Restore the title from stack
734f90bb 810 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
811
812 def __enter__(self):
813 self.save_console_title()
814 return self
815
816 def __exit__(self, *args):
817 self.restore_console_title()
f89197d7 818
dca08720 819 if self.params.get('cookiefile') is not None:
1bab3437 820 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 821
fa9f30b8 822 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
823 """Determine action to take when a download problem appears.
824
825 Depending on if the downloader has been configured to ignore
826 download errors or not, this method may throw an exception or
827 not when errors are found, after printing the message.
828
fa9f30b8 829 @param tb If given, is additional traceback information
830 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
831 """
832 if message is not None:
833 self.to_stderr(message)
834 if self.params.get('verbose'):
835 if tb is None:
836 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 837 tb = ''
8222d8de 838 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 839 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 840 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
841 else:
842 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 843 tb = ''.join(tb_data)
c19bc311 844 if tb:
845 self.to_stderr(tb)
fa9f30b8 846 if not is_error:
847 return
b1940459 848 if not self.params.get('ignoreerrors'):
8222d8de
JMF
849 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
850 exc_info = sys.exc_info()[1].exc_info
851 else:
852 exc_info = sys.exc_info()
853 raise DownloadError(message, exc_info)
854 self._download_retcode = 1
855
0760b0a7 856 def to_screen(self, message, skip_eol=False):
857 """Print message to stdout if not in quiet mode"""
858 self.to_stdout(
859 message, skip_eol, quiet=self.params.get('quiet', False))
860
ec11a9f4 861 class Styles(Enum):
862 HEADERS = 'yellow'
f304da8a 863 EMPHASIS = 'light blue'
ec11a9f4 864 ID = 'green'
865 DELIM = 'blue'
866 ERROR = 'red'
867 WARNING = 'yellow'
ff51ed58 868 SUPPRESS = 'light black'
ec11a9f4 869
7578d77d 870 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 871 if test_encoding:
872 original_text = text
ec11a9f4 873 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
874 text = text.encode(encoding, 'ignore').decode(encoding)
875 if fallback is not None and text != original_text:
876 text = fallback
877 if isinstance(f, self.Styles):
f304da8a 878 f = f.value
7578d77d 879 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 880
881 def _format_screen(self, *args, **kwargs):
7578d77d 882 return self._format_text(
883 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 884
885 def _format_err(self, *args, **kwargs):
7578d77d 886 return self._format_text(
887 self._err_file, self._allow_colors['err'], *args, **kwargs)
819e0531 888
c84aeac6 889 def report_warning(self, message, only_once=False):
8222d8de
JMF
890 '''
891 Print the message to stderr, it will be prefixed with 'WARNING:'
892 If stderr is a tty file the 'WARNING:' will be colored
893 '''
6d07ce01
JMF
894 if self.params.get('logger') is not None:
895 self.params['logger'].warning(message)
8222d8de 896 else:
ad8915b7
PH
897 if self.params.get('no_warnings'):
898 return
ec11a9f4 899 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 900
ee8dd27a 901 def deprecation_warning(self, message):
902 if self.params.get('logger') is not None:
903 self.params['logger'].warning('DeprecationWarning: {message}')
904 else:
905 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
906
fa9f30b8 907 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
908 '''
909 Do the same as trouble, but prefixes the message with 'ERROR:', colored
910 in red if stderr is a tty file.
911 '''
fa9f30b8 912 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 913
b35496d8 914 def write_debug(self, message, only_once=False):
0760b0a7 915 '''Log debug message or Print message to stderr'''
916 if not self.params.get('verbose', False):
917 return
918 message = '[debug] %s' % message
919 if self.params.get('logger'):
920 self.params['logger'].debug(message)
921 else:
b35496d8 922 self.to_stderr(message, only_once)
0760b0a7 923
8222d8de
JMF
924 def report_file_already_downloaded(self, file_name):
925 """Report file has already been fully downloaded."""
926 try:
6febd1c1 927 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 928 except UnicodeEncodeError:
6febd1c1 929 self.to_screen('[download] The file has already been downloaded')
8222d8de 930
0c3d0f51 931 def report_file_delete(self, file_name):
932 """Report that existing file will be deleted."""
933 try:
c25228e5 934 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 935 except UnicodeEncodeError:
c25228e5 936 self.to_screen('Deleting existing file')
0c3d0f51 937
1151c407 938 def raise_no_formats(self, info, forced=False):
939 has_drm = info.get('__has_drm')
88acdbc2 940 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
941 expected = self.params.get('ignore_no_formats_error')
942 if forced or not expected:
1151c407 943 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
944 expected=has_drm or expected)
88acdbc2 945 else:
946 self.report_warning(msg)
947
de6000d9 948 def parse_outtmpl(self):
949 outtmpl_dict = self.params.get('outtmpl', {})
950 if not isinstance(outtmpl_dict, dict):
951 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 952 # Remove spaces in the default template
953 if self.params.get('restrictfilenames'):
954 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
955 else:
956 sanitize = lambda x: x
de6000d9 957 outtmpl_dict.update({
71ce444a 958 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 959 if outtmpl_dict.get(k) is None})
de6000d9 960 for key, val in outtmpl_dict.items():
961 if isinstance(val, bytes):
962 self.report_warning(
963 'Parameter outtmpl is bytes, but should be a unicode string. '
964 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
965 return outtmpl_dict
966
21cd8fae 967 def get_output_path(self, dir_type='', filename=None):
968 paths = self.params.get('paths', {})
969 assert isinstance(paths, dict)
970 path = os.path.join(
971 expand_path(paths.get('home', '').strip()),
972 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
973 filename or '')
974
975 # Temporary fix for #4787
976 # 'Treat' all problem characters by passing filename through preferredencoding
977 # to workaround encoding issues with subprocess on python2 @ Windows
978 if sys.version_info < (3, 0) and sys.platform == 'win32':
979 path = encodeFilename(path, True).decode(preferredencoding())
980 return sanitize_path(path, force=self.params.get('windowsfilenames'))
981
76a264ac 982 @staticmethod
901130bb 983 def _outtmpl_expandpath(outtmpl):
984 # expand_path translates '%%' into '%' and '$$' into '$'
985 # correspondingly that is not what we want since we need to keep
986 # '%%' intact for template dict substitution step. Working around
987 # with boundary-alike separator hack.
988 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
989 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
990
991 # outtmpl should be expand_path'ed before template dict substitution
992 # because meta fields may contain env variables we don't want to
993 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
994 # title "Hello $PATH", we don't want `$PATH` to be expanded.
995 return expand_path(outtmpl).replace(sep, '')
996
997 @staticmethod
998 def escape_outtmpl(outtmpl):
999 ''' Escape any remaining strings like %s, %abc% etc. '''
1000 return re.sub(
1001 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1002 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1003 outtmpl)
1004
1005 @classmethod
1006 def validate_outtmpl(cls, outtmpl):
76a264ac 1007 ''' @return None or Exception object '''
7d1eb38a 1008 outtmpl = re.sub(
e0fd9573 1009 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDF]'),
7d1eb38a 1010 lambda mobj: f'{mobj.group(0)[:-1]}s',
1011 cls._outtmpl_expandpath(outtmpl))
76a264ac 1012 try:
7d1eb38a 1013 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1014 return None
1015 except ValueError as err:
1016 return err
1017
03b4de72 1018 @staticmethod
1019 def _copy_infodict(info_dict):
1020 info_dict = dict(info_dict)
1021 for key in ('__original_infodict', '__postprocessors'):
1022 info_dict.pop(key, None)
1023 return info_dict
1024
e0fd9573 1025 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1026 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1027 @param sanitize Whether to sanitize the output as a filename.
1028 For backward compatibility, a function can also be passed
1029 """
1030
6e84b215 1031 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1032
03b4de72 1033 info_dict = self._copy_infodict(info_dict)
752cda38 1034 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1035 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1036 if info_dict.get('duration', None) is not None
1037 else None)
752cda38 1038 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1039 if info_dict.get('resolution') is None:
1040 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1041
e6f21b3d 1042 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1043 # of %(field)s to %(field)0Nd for backward compatibility
1044 field_size_compat_map = {
ec11a9f4 1045 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1046 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1047 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1048 }
752cda38 1049
385a27fa 1050 TMPL_DICT = {}
e0fd9573 1051 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDF]'))
385a27fa 1052 MATH_FUNCTIONS = {
1053 '+': float.__add__,
1054 '-': float.__sub__,
1055 }
e625be0d 1056 # Field is of the form key1.key2...
1057 # where keys (except first) can be string, int or slice
2b8a2973 1058 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1059 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1060 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1061 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1062 (?P<negate>-)?
385a27fa 1063 (?P<fields>{field})
1064 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1065 (?:>(?P<strf_format>.+?))?
e978789f
P
1066 (?P<alternate>(?<!\\),[^|&)]+)?
1067 (?:&(?P<replacement>.*?))?
e625be0d 1068 (?:\|(?P<default>.*?))?
385a27fa 1069 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1070
2b8a2973 1071 def _traverse_infodict(k):
1072 k = k.split('.')
1073 if k[0] == '':
1074 k.pop(0)
1075 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1076
752cda38 1077 def get_value(mdict):
1078 # Object traversal
2b8a2973 1079 value = _traverse_infodict(mdict['fields'])
752cda38 1080 # Negative
1081 if mdict['negate']:
1082 value = float_or_none(value)
1083 if value is not None:
1084 value *= -1
1085 # Do maths
385a27fa 1086 offset_key = mdict['maths']
1087 if offset_key:
752cda38 1088 value = float_or_none(value)
1089 operator = None
385a27fa 1090 while offset_key:
1091 item = re.match(
1092 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1093 offset_key).group(0)
1094 offset_key = offset_key[len(item):]
1095 if operator is None:
752cda38 1096 operator = MATH_FUNCTIONS[item]
385a27fa 1097 continue
1098 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1099 offset = float_or_none(item)
1100 if offset is None:
2b8a2973 1101 offset = float_or_none(_traverse_infodict(item))
385a27fa 1102 try:
1103 value = operator(value, multiplier * offset)
1104 except (TypeError, ZeroDivisionError):
1105 return None
1106 operator = None
752cda38 1107 # Datetime formatting
1108 if mdict['strf_format']:
7c37ff97 1109 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1110
1111 return value
1112
b868936c 1113 na = self.params.get('outtmpl_na_placeholder', 'NA')
1114
e0fd9573 1115 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1116 return sanitize_filename(str(value), restricted=restricted,
1117 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1118
1119 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1120 sanitize = bool(sanitize)
1121
6e84b215 1122 def _dumpjson_default(obj):
1123 if isinstance(obj, (set, LazyList)):
1124 return list(obj)
adbc4ec4 1125 return repr(obj)
6e84b215 1126
752cda38 1127 def create_key(outer_mobj):
1128 if not outer_mobj.group('has_key'):
b836dc94 1129 return outer_mobj.group(0)
752cda38 1130 key = outer_mobj.group('key')
752cda38 1131 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1132 initial_field = mobj.group('fields') if mobj else ''
e978789f 1133 value, replacement, default = None, None, na
7c37ff97 1134 while mobj:
e625be0d 1135 mobj = mobj.groupdict()
7c37ff97 1136 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1137 value = get_value(mobj)
e978789f 1138 replacement = mobj['replacement']
7c37ff97 1139 if value is None and mobj['alternate']:
1140 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1141 else:
1142 break
752cda38 1143
b868936c 1144 fmt = outer_mobj.group('format')
752cda38 1145 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1146 fmt = '0{:d}d'.format(field_size_compat_map[key])
1147
e978789f 1148 value = default if value is None else value if replacement is None else replacement
752cda38 1149
4476d2c7 1150 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1151 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1152 if fmt[-1] == 'l': # list
4476d2c7 1153 delim = '\n' if '#' in flags else ', '
91dd88b9 1154 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1155 elif fmt[-1] == 'j': # json
4476d2c7 1156 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1157 elif fmt[-1] == 'q': # quoted
4476d2c7 1158 value = map(str, variadic(value) if '#' in flags else [value])
1159 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1160 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1161 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1162 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1163 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1164 value, fmt = unicodedata.normalize(
1165 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1166 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1167 value), str_fmt
e0fd9573 1168 elif fmt[-1] == 'D': # decimal suffix
1169 value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
1170 elif fmt[-1] == 'F': # filename sanitization
1171 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1172 elif fmt[-1] == 'c':
524e2e4f 1173 if value:
1174 value = str(value)[0]
76a264ac 1175 else:
524e2e4f 1176 fmt = str_fmt
76a264ac 1177 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1178 value = float_or_none(value)
752cda38 1179 if value is None:
1180 value, fmt = default, 's'
901130bb 1181
752cda38 1182 if sanitize:
1183 if fmt[-1] == 'r':
1184 # If value is an object, sanitize might convert it to a string
1185 # So we convert it to repr first
7d1eb38a 1186 value, fmt = repr(value), str_fmt
639f1cea 1187 if fmt[-1] in 'csr':
e0fd9573 1188 value = sanitizer(initial_field, value)
901130bb 1189
b868936c 1190 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1191 TMPL_DICT[key] = value
b868936c 1192 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1193
385a27fa 1194 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1195
819e0531 1196 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1197 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1198 return self.escape_outtmpl(outtmpl) % info_dict
1199
de6000d9 1200 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1201 try:
b836dc94 1202 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
e0fd9573 1203 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
15da37c7 1204
143db31d 1205 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1206 if filename and force_ext is not None:
752cda38 1207 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1208
bdc3fd2f
U
1209 # https://github.com/blackjack4494/youtube-dlc/issues/85
1210 trim_file_name = self.params.get('trim_file_name', False)
1211 if trim_file_name:
5c22c63d 1212 no_ext, *ext = filename.rsplit('.', 2)
1213 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1214
0202b52a 1215 return filename
8222d8de 1216 except ValueError as err:
6febd1c1 1217 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1218 return None
1219
de6000d9 1220 def prepare_filename(self, info_dict, dir_type='', warn=False):
1221 """Generate the output filename."""
21cd8fae 1222
de6000d9 1223 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1224 if not filename and dir_type not in ('', 'temp'):
1225 return ''
de6000d9 1226
c84aeac6 1227 if warn:
21cd8fae 1228 if not self.params.get('paths'):
de6000d9 1229 pass
1230 elif filename == '-':
c84aeac6 1231 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1232 elif os.path.isabs(filename):
c84aeac6 1233 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1234 if filename == '-' or not filename:
1235 return filename
1236
21cd8fae 1237 return self.get_output_path(dir_type, filename)
0202b52a 1238
120fe513 1239 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1240 """ Returns None if the file should be downloaded """
8222d8de 1241
c77495e3 1242 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1243
8b0d7497 1244 def check_filter():
8b0d7497 1245 if 'title' in info_dict:
1246 # This can happen when we're just evaluating the playlist
1247 title = info_dict['title']
1248 matchtitle = self.params.get('matchtitle', False)
1249 if matchtitle:
1250 if not re.search(matchtitle, title, re.IGNORECASE):
1251 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1252 rejecttitle = self.params.get('rejecttitle', False)
1253 if rejecttitle:
1254 if re.search(rejecttitle, title, re.IGNORECASE):
1255 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1256 date = info_dict.get('upload_date')
1257 if date is not None:
1258 dateRange = self.params.get('daterange', DateRange())
1259 if date not in dateRange:
1260 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1261 view_count = info_dict.get('view_count')
1262 if view_count is not None:
1263 min_views = self.params.get('min_views')
1264 if min_views is not None and view_count < min_views:
1265 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1266 max_views = self.params.get('max_views')
1267 if max_views is not None and view_count > max_views:
1268 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1269 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1270 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1271
8f18aca8 1272 match_filter = self.params.get('match_filter')
1273 if match_filter is not None:
1274 try:
1275 ret = match_filter(info_dict, incomplete=incomplete)
1276 except TypeError:
1277 # For backward compatibility
1278 ret = None if incomplete else match_filter(info_dict)
1279 if ret is not None:
1280 return ret
8b0d7497 1281 return None
1282
c77495e3 1283 if self.in_download_archive(info_dict):
1284 reason = '%s has already been recorded in the archive' % video_title
1285 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1286 else:
1287 reason = check_filter()
1288 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1289 if reason is not None:
120fe513 1290 if not silent:
1291 self.to_screen('[download] ' + reason)
c77495e3 1292 if self.params.get(break_opt, False):
1293 raise break_err()
8b0d7497 1294 return reason
fe7e0c98 1295
b6c45014
JMF
1296 @staticmethod
1297 def add_extra_info(info_dict, extra_info):
1298 '''Set the keys from extra_info in info dict if they are missing'''
1299 for key, value in extra_info.items():
1300 info_dict.setdefault(key, value)
1301
409e1828 1302 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1303 process=True, force_generic_extractor=False):
41d1cca3 1304 """
1305 Return a list with a dictionary for each video extracted.
1306
1307 Arguments:
1308 url -- URL to extract
1309
1310 Keyword arguments:
1311 download -- whether to download videos during extraction
1312 ie_key -- extractor key hint
1313 extra_info -- dictionary containing the extra values to add to each result
1314 process -- whether to resolve all unresolved references (URLs, playlist items),
1315 must be True for download to work.
1316 force_generic_extractor -- force using the generic extractor
1317 """
fe7e0c98 1318
409e1828 1319 if extra_info is None:
1320 extra_info = {}
1321
61aa5ba3 1322 if not ie_key and force_generic_extractor:
d22dec74
S
1323 ie_key = 'Generic'
1324
8222d8de 1325 if ie_key:
8b7491c8 1326 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1327 else:
1328 ies = self._ies
1329
8b7491c8 1330 for ie_key, ie in ies.items():
8222d8de
JMF
1331 if not ie.suitable(url):
1332 continue
1333
1334 if not ie.working():
6febd1c1
PH
1335 self.report_warning('The program functionality for this site has been marked as broken, '
1336 'and will probably not work.')
8222d8de 1337
1151c407 1338 temp_id = ie.get_temp_id(url)
a0566bbf 1339 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1340 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1341 if self.params.get('break_on_existing', False):
1342 raise ExistingVideoReached()
a0566bbf 1343 break
8b7491c8 1344 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1345 else:
1346 self.report_error('no suitable InfoExtractor for URL %s' % url)
1347
8e5fecc8 1348 def __handle_extraction_exceptions(func):
b5ae35ee 1349 @functools.wraps(func)
a0566bbf 1350 def wrapper(self, *args, **kwargs):
1351 try:
1352 return func(self, *args, **kwargs)
773f291d
S
1353 except GeoRestrictedError as e:
1354 msg = e.msg
1355 if e.countries:
1356 msg += '\nThis video is available in %s.' % ', '.join(
1357 map(ISO3166Utils.short2full, e.countries))
1358 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1359 self.report_error(msg)
fb043a6e 1360 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1361 self.report_error(compat_str(e), e.format_traceback())
f2ebc5c7 1362 except ReExtractInfo as e:
1363 if e.expected:
1364 self.to_screen(f'{e}; Re-extracting data')
1365 else:
1366 self.to_stderr('\r')
1367 self.report_warning(f'{e}; Re-extracting data')
51d9739f 1368 return wrapper(self, *args, **kwargs)
c07a39ae 1369 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
d3e5bbf4 1370 raise
8222d8de 1371 except Exception as e:
b1940459 1372 if self.params.get('ignoreerrors'):
9b9c5355 1373 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1374 else:
1375 raise
a0566bbf 1376 return wrapper
1377
f2ebc5c7 1378 def _wait_for_video(self, ie_result):
1379 if (not self.params.get('wait_for_video')
1380 or ie_result.get('_type', 'video') != 'video'
1381 or ie_result.get('formats') or ie_result.get('url')):
1382 return
1383
1384 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1385 last_msg = ''
1386
1387 def progress(msg):
1388 nonlocal last_msg
1389 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1390 last_msg = msg
1391
1392 min_wait, max_wait = self.params.get('wait_for_video')
1393 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1394 if diff is None and ie_result.get('live_status') == 'is_upcoming':
38d79fd1 1395 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
f2ebc5c7 1396 self.report_warning('Release time of video is not known')
1397 elif (diff or 0) <= 0:
1398 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1399 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1400 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1401
1402 wait_till = time.time() + diff
1403 try:
1404 while True:
1405 diff = wait_till - time.time()
1406 if diff <= 0:
1407 progress('')
1408 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1409 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1410 time.sleep(1)
1411 except KeyboardInterrupt:
1412 progress('')
1413 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1414 except BaseException as e:
1415 if not isinstance(e, ReExtractInfo):
1416 self.to_screen('')
1417 raise
1418
a0566bbf 1419 @__handle_extraction_exceptions
58f197b7 1420 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1421 ie_result = ie.extract(url)
1422 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1423 return
1424 if isinstance(ie_result, list):
1425 # Backwards compatibility: old IE result format
1426 ie_result = {
1427 '_type': 'compat_list',
1428 'entries': ie_result,
1429 }
e37d0efb 1430 if extra_info.get('original_url'):
1431 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1432 self.add_default_extra_info(ie_result, ie, url)
1433 if process:
f2ebc5c7 1434 self._wait_for_video(ie_result)
a0566bbf 1435 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1436 else:
a0566bbf 1437 return ie_result
fe7e0c98 1438
ea38e55f 1439 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1440 if url is not None:
1441 self.add_extra_info(ie_result, {
1442 'webpage_url': url,
1443 'original_url': url,
1444 'webpage_url_basename': url_basename(url),
0bb322b9 1445 'webpage_url_domain': get_domain(url),
6033d980 1446 })
1447 if ie is not None:
1448 self.add_extra_info(ie_result, {
1449 'extractor': ie.IE_NAME,
1450 'extractor_key': ie.ie_key(),
1451 })
ea38e55f 1452
58adec46 1453 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1454 """
1455 Take the result of the ie(may be modified) and resolve all unresolved
1456 references (URLs, playlist items).
1457
1458 It will also download the videos if 'download'.
1459 Returns the resolved ie_result.
1460 """
58adec46 1461 if extra_info is None:
1462 extra_info = {}
e8ee972c
PH
1463 result_type = ie_result.get('_type', 'video')
1464
057a5206 1465 if result_type in ('url', 'url_transparent'):
134c6ea8 1466 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1467 if ie_result.get('original_url'):
1468 extra_info.setdefault('original_url', ie_result['original_url'])
1469
057a5206 1470 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1471 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1472 or extract_flat is True):
ecb54191 1473 info_copy = ie_result.copy()
6033d980 1474 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1475 if ie and not ie_result.get('id'):
4614bc22 1476 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1477 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1478 self.add_extra_info(info_copy, extra_info)
b5475f11 1479 info_copy, _ = self.pre_process(info_copy)
ecb54191 1480 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1481 if self.params.get('force_write_download_archive', False):
1482 self.record_download_archive(info_copy)
e8ee972c
PH
1483 return ie_result
1484
8222d8de 1485 if result_type == 'video':
b6c45014 1486 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1487 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1488 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1489 if additional_urls:
e9f4ccd1 1490 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1491 if isinstance(additional_urls, compat_str):
1492 additional_urls = [additional_urls]
1493 self.to_screen(
1494 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1495 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1496 ie_result['additional_entries'] = [
1497 self.extract_info(
b69fd25c 1498 url, download, extra_info=extra_info,
9c2b75b5 1499 force_generic_extractor=self.params.get('force_generic_extractor'))
1500 for url in additional_urls
1501 ]
1502 return ie_result
8222d8de
JMF
1503 elif result_type == 'url':
1504 # We have to add extra_info to the results because it may be
1505 # contained in a playlist
07cce701 1506 return self.extract_info(
1507 ie_result['url'], download,
1508 ie_key=ie_result.get('ie_key'),
1509 extra_info=extra_info)
7fc3fa05
PH
1510 elif result_type == 'url_transparent':
1511 # Use the information from the embedding page
1512 info = self.extract_info(
1513 ie_result['url'], ie_key=ie_result.get('ie_key'),
1514 extra_info=extra_info, download=False, process=False)
1515
1640eb09
S
1516 # extract_info may return None when ignoreerrors is enabled and
1517 # extraction failed with an error, don't crash and return early
1518 # in this case
1519 if not info:
1520 return info
1521
412c617d
PH
1522 force_properties = dict(
1523 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1524 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1525 if f in force_properties:
1526 del force_properties[f]
1527 new_result = info.copy()
1528 new_result.update(force_properties)
7fc3fa05 1529
0563f7ac
S
1530 # Extracted info may not be a video result (i.e.
1531 # info.get('_type', 'video') != video) but rather an url or
1532 # url_transparent. In such cases outer metadata (from ie_result)
1533 # should be propagated to inner one (info). For this to happen
1534 # _type of info should be overridden with url_transparent. This
067aa17e 1535 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1536 if new_result.get('_type') == 'url':
1537 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1538
1539 return self.process_ie_result(
1540 new_result, download=download, extra_info=extra_info)
40fcba5e 1541 elif result_type in ('playlist', 'multi_video'):
30a074c2 1542 # Protect from infinite recursion due to recursively nested playlists
1543 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1544 webpage_url = ie_result['webpage_url']
1545 if webpage_url in self._playlist_urls:
7e85e872 1546 self.to_screen(
30a074c2 1547 '[download] Skipping already downloaded playlist: %s'
1548 % ie_result.get('title') or ie_result.get('id'))
1549 return
7e85e872 1550
30a074c2 1551 self._playlist_level += 1
1552 self._playlist_urls.add(webpage_url)
bc516a3f 1553 self._sanitize_thumbnails(ie_result)
30a074c2 1554 try:
1555 return self.__process_playlist(ie_result, download)
1556 finally:
1557 self._playlist_level -= 1
1558 if not self._playlist_level:
1559 self._playlist_urls.clear()
8222d8de 1560 elif result_type == 'compat_list':
c9bf4114
PH
1561 self.report_warning(
1562 'Extractor %s returned a compat_list result. '
1563 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1564
8222d8de 1565 def _fixup(r):
b868936c 1566 self.add_extra_info(r, {
1567 'extractor': ie_result['extractor'],
1568 'webpage_url': ie_result['webpage_url'],
1569 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1570 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1571 'extractor_key': ie_result['extractor_key'],
1572 })
8222d8de
JMF
1573 return r
1574 ie_result['entries'] = [
b6c45014 1575 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1576 for r in ie_result['entries']
1577 ]
1578 return ie_result
1579 else:
1580 raise Exception('Invalid result type: %s' % result_type)
1581
e92caff5 1582 def _ensure_dir_exists(self, path):
1583 return make_dir(path, self.report_error)
1584
30a074c2 1585 def __process_playlist(self, ie_result, download):
1586 # We process each entry in the playlist
1587 playlist = ie_result.get('title') or ie_result.get('id')
1588 self.to_screen('[download] Downloading playlist: %s' % playlist)
1589
498f5606 1590 if 'entries' not in ie_result:
aa9369a2 1591 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1592
1593 MissingEntry = object()
498f5606 1594 incomplete_entries = bool(ie_result.get('requested_entries'))
1595 if incomplete_entries:
bf5f605e 1596 def fill_missing_entries(entries, indices):
7c7f7161 1597 ret = [MissingEntry] * max(indices)
bf5f605e 1598 for i, entry in zip(indices, entries):
498f5606 1599 ret[i - 1] = entry
1600 return ret
1601 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1602
30a074c2 1603 playlist_results = []
1604
56a8fb4f 1605 playliststart = self.params.get('playliststart', 1)
30a074c2 1606 playlistend = self.params.get('playlistend')
1607 # For backwards compatibility, interpret -1 as whole list
1608 if playlistend == -1:
1609 playlistend = None
1610
1611 playlistitems_str = self.params.get('playlist_items')
1612 playlistitems = None
1613 if playlistitems_str is not None:
1614 def iter_playlistitems(format):
1615 for string_segment in format.split(','):
1616 if '-' in string_segment:
1617 start, end = string_segment.split('-')
1618 for item in range(int(start), int(end) + 1):
1619 yield int(item)
1620 else:
1621 yield int(string_segment)
1622 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1623
1624 ie_entries = ie_result['entries']
56a8fb4f 1625 msg = (
1626 'Downloading %d videos' if not isinstance(ie_entries, list)
1627 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1628
1629 if isinstance(ie_entries, list):
1630 def get_entry(i):
1631 return ie_entries[i - 1]
1632 else:
c586f9e8 1633 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1634 ie_entries = LazyList(ie_entries)
1635
1636 def get_entry(i):
1637 return YoutubeDL.__handle_extraction_exceptions(
1638 lambda self, i: ie_entries[i - 1]
1639 )(self, i)
50fed816 1640
56a8fb4f 1641 entries = []
ff1c7fc9 1642 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1643 for i in items:
1644 if i == 0:
1645 continue
56a8fb4f 1646 if playlistitems is None and playlistend is not None and playlistend < i:
1647 break
1648 entry = None
1649 try:
50fed816 1650 entry = get_entry(i)
7c7f7161 1651 if entry is MissingEntry:
498f5606 1652 raise EntryNotInPlaylist()
56a8fb4f 1653 except (IndexError, EntryNotInPlaylist):
1654 if incomplete_entries:
aa9369a2 1655 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1656 elif not playlistitems:
1657 break
1658 entries.append(entry)
120fe513 1659 try:
1660 if entry is not None:
1661 self._match_entry(entry, incomplete=True, silent=True)
1662 except (ExistingVideoReached, RejectedVideoReached):
1663 break
56a8fb4f 1664 ie_result['entries'] = entries
30a074c2 1665
56a8fb4f 1666 # Save playlist_index before re-ordering
1667 entries = [
9e598870 1668 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1669 for i, entry in enumerate(entries, 1)
1670 if entry is not None]
1671 n_entries = len(entries)
498f5606 1672
e08a85d8 1673 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1674 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1675 ie_result['requested_entries'] = playlistitems
1676
e08a85d8 1677 _infojson_written = False
49a57e70 1678 if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
498f5606 1679 ie_copy = {
1680 'playlist': playlist,
1681 'playlist_id': ie_result.get('id'),
1682 'playlist_title': ie_result.get('title'),
1683 'playlist_uploader': ie_result.get('uploader'),
1684 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1685 'playlist_index': 0,
49a57e70 1686 'n_entries': n_entries,
498f5606 1687 }
1688 ie_copy.update(dict(ie_result))
1689
e08a85d8 1690 _infojson_written = self._write_info_json(
1691 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1692 if _infojson_written is None:
80c03fa9 1693 return
1694 if self._write_description('playlist', ie_result,
1695 self.prepare_filename(ie_copy, 'pl_description')) is None:
1696 return
681de68e 1697 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1698 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1699
1700 if self.params.get('playlistreverse', False):
1701 entries = entries[::-1]
30a074c2 1702 if self.params.get('playlistrandom', False):
1703 random.shuffle(entries)
1704
1705 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1706
56a8fb4f 1707 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1708 failures = 0
1709 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1710 for i, entry_tuple in enumerate(entries, 1):
1711 playlist_index, entry = entry_tuple
81139999 1712 if 'playlist-index' in self.params.get('compat_opts', []):
1713 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1714 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1715 # This __x_forwarded_for_ip thing is a bit ugly but requires
1716 # minimal changes
1717 if x_forwarded_for:
1718 entry['__x_forwarded_for_ip'] = x_forwarded_for
1719 extra = {
1720 'n_entries': n_entries,
f59ae581 1721 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1722 'playlist_index': playlist_index,
1723 'playlist_autonumber': i,
30a074c2 1724 'playlist': playlist,
1725 'playlist_id': ie_result.get('id'),
1726 'playlist_title': ie_result.get('title'),
1727 'playlist_uploader': ie_result.get('uploader'),
1728 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1729 'extractor': ie_result['extractor'],
1730 'webpage_url': ie_result['webpage_url'],
1731 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1732 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1733 'extractor_key': ie_result['extractor_key'],
1734 }
1735
1736 if self._match_entry(entry, incomplete=True) is not None:
1737 continue
1738
1739 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1740 if not entry_result:
1741 failures += 1
1742 if failures >= max_failures:
1743 self.report_error(
1744 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1745 break
30a074c2 1746 playlist_results.append(entry_result)
1747 ie_result['entries'] = playlist_results
e08a85d8 1748
1749 # Write the updated info to json
1750 if _infojson_written and self._write_info_json(
1751 'updated playlist', ie_result,
1752 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1753 return
30a074c2 1754 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1755 return ie_result
1756
a0566bbf 1757 @__handle_extraction_exceptions
1758 def __process_iterable_entry(self, entry, download, extra_info):
1759 return self.process_ie_result(
1760 entry, download=download, extra_info=extra_info)
1761
67134eab
JMF
1762 def _build_format_filter(self, filter_spec):
1763 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1764
1765 OPERATORS = {
1766 '<': operator.lt,
1767 '<=': operator.le,
1768 '>': operator.gt,
1769 '>=': operator.ge,
1770 '=': operator.eq,
1771 '!=': operator.ne,
1772 }
67134eab 1773 operator_rex = re.compile(r'''(?x)\s*
187986a8 1774 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1775 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1776 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1777 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1778 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1779 if m:
1780 try:
1781 comparison_value = int(m.group('value'))
1782 except ValueError:
1783 comparison_value = parse_filesize(m.group('value'))
1784 if comparison_value is None:
1785 comparison_value = parse_filesize(m.group('value') + 'B')
1786 if comparison_value is None:
1787 raise ValueError(
1788 'Invalid value %r in format specification %r' % (
67134eab 1789 m.group('value'), filter_spec))
9ddb6925
S
1790 op = OPERATORS[m.group('op')]
1791
083c9df9 1792 if not m:
9ddb6925
S
1793 STR_OPERATORS = {
1794 '=': operator.eq,
10d33b34
YCH
1795 '^=': lambda attr, value: attr.startswith(value),
1796 '$=': lambda attr, value: attr.endswith(value),
1797 '*=': lambda attr, value: value in attr,
9ddb6925 1798 }
187986a8 1799 str_operator_rex = re.compile(r'''(?x)\s*
1800 (?P<key>[a-zA-Z0-9._-]+)\s*
1801 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1802 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1803 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1804 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1805 if m:
1806 comparison_value = m.group('value')
2cc779f4
S
1807 str_op = STR_OPERATORS[m.group('op')]
1808 if m.group('negation'):
e118a879 1809 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1810 else:
1811 op = str_op
083c9df9 1812
9ddb6925 1813 if not m:
187986a8 1814 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1815
1816 def _filter(f):
1817 actual_value = f.get(m.group('key'))
1818 if actual_value is None:
1819 return m.group('none_inclusive')
1820 return op(actual_value, comparison_value)
67134eab
JMF
1821 return _filter
1822
9f1a1c36 1823 def _check_formats(self, formats):
1824 for f in formats:
1825 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1826 path = self.get_output_path('temp')
1827 if not self._ensure_dir_exists(f'{path}/'):
1828 continue
1829 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1830 temp_file.close()
1831 try:
1832 success, _ = self.dl(temp_file.name, f, test=True)
1833 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1834 success = False
1835 finally:
1836 if os.path.exists(temp_file.name):
1837 try:
1838 os.remove(temp_file.name)
1839 except OSError:
1840 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1841 if success:
1842 yield f
1843 else:
1844 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1845
0017d9ad 1846 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1847
af0f7428
S
1848 def can_merge():
1849 merger = FFmpegMergerPP(self)
1850 return merger.available and merger.can_merge()
1851
91ebc640 1852 prefer_best = (
b7b04c78 1853 not self.params.get('simulate')
91ebc640 1854 and download
1855 and (
1856 not can_merge()
19807826 1857 or info_dict.get('is_live', False)
de6000d9 1858 or self.outtmpl_dict['default'] == '-'))
53ed7066 1859 compat = (
1860 prefer_best
1861 or self.params.get('allow_multiple_audio_streams', False)
1862 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1863
1864 return (
53ed7066 1865 'best/bestvideo+bestaudio' if prefer_best
1866 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1867 else 'bestvideo+bestaudio/best')
0017d9ad 1868
67134eab
JMF
1869 def build_format_selector(self, format_spec):
1870 def syntax_error(note, start):
1871 message = (
1872 'Invalid format specification: '
1873 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1874 return SyntaxError(message)
1875
1876 PICKFIRST = 'PICKFIRST'
1877 MERGE = 'MERGE'
1878 SINGLE = 'SINGLE'
0130afb7 1879 GROUP = 'GROUP'
67134eab
JMF
1880 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1881
91ebc640 1882 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1883 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1884
9f1a1c36 1885 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1886
67134eab
JMF
1887 def _parse_filter(tokens):
1888 filter_parts = []
1889 for type, string, start, _, _ in tokens:
1890 if type == tokenize.OP and string == ']':
1891 return ''.join(filter_parts)
1892 else:
1893 filter_parts.append(string)
1894
232541df 1895 def _remove_unused_ops(tokens):
17cc1534 1896 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1897 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1898 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1899 last_string, last_start, last_end, last_line = None, None, None, None
1900 for type, string, start, end, line in tokens:
1901 if type == tokenize.OP and string == '[':
1902 if last_string:
1903 yield tokenize.NAME, last_string, last_start, last_end, last_line
1904 last_string = None
1905 yield type, string, start, end, line
1906 # everything inside brackets will be handled by _parse_filter
1907 for type, string, start, end, line in tokens:
1908 yield type, string, start, end, line
1909 if type == tokenize.OP and string == ']':
1910 break
1911 elif type == tokenize.OP and string in ALLOWED_OPS:
1912 if last_string:
1913 yield tokenize.NAME, last_string, last_start, last_end, last_line
1914 last_string = None
1915 yield type, string, start, end, line
1916 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1917 if not last_string:
1918 last_string = string
1919 last_start = start
1920 last_end = end
1921 else:
1922 last_string += string
1923 if last_string:
1924 yield tokenize.NAME, last_string, last_start, last_end, last_line
1925
cf2ac6df 1926 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1927 selectors = []
1928 current_selector = None
1929 for type, string, start, _, _ in tokens:
1930 # ENCODING is only defined in python 3.x
1931 if type == getattr(tokenize, 'ENCODING', None):
1932 continue
1933 elif type in [tokenize.NAME, tokenize.NUMBER]:
1934 current_selector = FormatSelector(SINGLE, string, [])
1935 elif type == tokenize.OP:
cf2ac6df
JMF
1936 if string == ')':
1937 if not inside_group:
1938 # ')' will be handled by the parentheses group
1939 tokens.restore_last_token()
67134eab 1940 break
cf2ac6df 1941 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1942 tokens.restore_last_token()
1943 break
cf2ac6df
JMF
1944 elif inside_choice and string == ',':
1945 tokens.restore_last_token()
1946 break
1947 elif string == ',':
0a31a350
JMF
1948 if not current_selector:
1949 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1950 selectors.append(current_selector)
1951 current_selector = None
1952 elif string == '/':
d96d604e
JMF
1953 if not current_selector:
1954 raise syntax_error('"/" must follow a format selector', start)
67134eab 1955 first_choice = current_selector
cf2ac6df 1956 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1957 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1958 elif string == '[':
1959 if not current_selector:
1960 current_selector = FormatSelector(SINGLE, 'best', [])
1961 format_filter = _parse_filter(tokens)
1962 current_selector.filters.append(format_filter)
0130afb7
JMF
1963 elif string == '(':
1964 if current_selector:
1965 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1966 group = _parse_format_selection(tokens, inside_group=True)
1967 current_selector = FormatSelector(GROUP, group, [])
67134eab 1968 elif string == '+':
d03cfdce 1969 if not current_selector:
1970 raise syntax_error('Unexpected "+"', start)
1971 selector_1 = current_selector
1972 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1973 if not selector_2:
1974 raise syntax_error('Expected a selector', start)
1975 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1976 else:
1977 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1978 elif type == tokenize.ENDMARKER:
1979 break
1980 if current_selector:
1981 selectors.append(current_selector)
1982 return selectors
1983
f8d4ad9a 1984 def _merge(formats_pair):
1985 format_1, format_2 = formats_pair
1986
1987 formats_info = []
1988 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1989 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1990
1991 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1992 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1993 for (i, fmt_info) in enumerate(formats_info):
551f9388 1994 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1995 formats_info.pop(i)
1996 continue
1997 for aud_vid in ['audio', 'video']:
f8d4ad9a 1998 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1999 if get_no_more[aud_vid]:
2000 formats_info.pop(i)
f5510afe 2001 break
f8d4ad9a 2002 get_no_more[aud_vid] = True
2003
2004 if len(formats_info) == 1:
2005 return formats_info[0]
2006
2007 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2008 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2009
2010 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2011 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2012
2013 output_ext = self.params.get('merge_output_format')
2014 if not output_ext:
2015 if the_only_video:
2016 output_ext = the_only_video['ext']
2017 elif the_only_audio and not video_fmts:
2018 output_ext = the_only_audio['ext']
2019 else:
2020 output_ext = 'mkv'
2021
975a0d0d 2022 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2023
f8d4ad9a 2024 new_dict = {
2025 'requested_formats': formats_info,
975a0d0d 2026 'format': '+'.join(filtered('format')),
2027 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2028 'ext': output_ext,
975a0d0d 2029 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2030 'language': '+'.join(orderedSet(filtered('language'))) or None,
2031 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2032 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2033 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2034 }
2035
2036 if the_only_video:
2037 new_dict.update({
2038 'width': the_only_video.get('width'),
2039 'height': the_only_video.get('height'),
2040 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2041 'fps': the_only_video.get('fps'),
49a57e70 2042 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2043 'vcodec': the_only_video.get('vcodec'),
2044 'vbr': the_only_video.get('vbr'),
2045 'stretched_ratio': the_only_video.get('stretched_ratio'),
2046 })
2047
2048 if the_only_audio:
2049 new_dict.update({
2050 'acodec': the_only_audio.get('acodec'),
2051 'abr': the_only_audio.get('abr'),
975a0d0d 2052 'asr': the_only_audio.get('asr'),
f8d4ad9a 2053 })
2054
2055 return new_dict
2056
e8e73840 2057 def _check_formats(formats):
981052c9 2058 if not check_formats:
2059 yield from formats
b5ac45b1 2060 return
9f1a1c36 2061 yield from self._check_formats(formats)
e8e73840 2062
67134eab 2063 def _build_selector_function(selector):
909d24dd 2064 if isinstance(selector, list): # ,
67134eab
JMF
2065 fs = [_build_selector_function(s) for s in selector]
2066
317f7ab6 2067 def selector_function(ctx):
67134eab 2068 for f in fs:
981052c9 2069 yield from f(ctx)
67134eab 2070 return selector_function
909d24dd 2071
2072 elif selector.type == GROUP: # ()
0130afb7 2073 selector_function = _build_selector_function(selector.selector)
909d24dd 2074
2075 elif selector.type == PICKFIRST: # /
67134eab
JMF
2076 fs = [_build_selector_function(s) for s in selector.selector]
2077
317f7ab6 2078 def selector_function(ctx):
67134eab 2079 for f in fs:
317f7ab6 2080 picked_formats = list(f(ctx))
67134eab
JMF
2081 if picked_formats:
2082 return picked_formats
2083 return []
67134eab 2084
981052c9 2085 elif selector.type == MERGE: # +
2086 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2087
2088 def selector_function(ctx):
adbc4ec4 2089 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2090 yield _merge(pair)
2091
909d24dd 2092 elif selector.type == SINGLE: # atom
598d185d 2093 format_spec = selector.selector or 'best'
909d24dd 2094
f8d4ad9a 2095 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2096 if format_spec == 'all':
2097 def selector_function(ctx):
9222c381 2098 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2099 elif format_spec == 'mergeall':
2100 def selector_function(ctx):
dd2a987d 2101 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2102 if not formats:
2103 return
921b76ca 2104 merged_format = formats[-1]
2105 for f in formats[-2::-1]:
f8d4ad9a 2106 merged_format = _merge((merged_format, f))
2107 yield merged_format
909d24dd 2108
2109 else:
e8e73840 2110 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2111 mobj = re.match(
2112 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2113 format_spec)
2114 if mobj is not None:
2115 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2116 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2117 format_type = (mobj.group('type') or [None])[0]
2118 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2119 format_modified = mobj.group('mod') is not None
909d24dd 2120
2121 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2122 _filter_f = (
eff63539 2123 (lambda f: f.get('%scodec' % format_type) != 'none')
2124 if format_type and format_modified # bv*, ba*, wv*, wa*
2125 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2126 if format_type # bv, ba, wv, wa
2127 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2128 if not format_modified # b, w
8326b00a 2129 else lambda f: True) # b*, w*
2130 filter_f = lambda f: _filter_f(f) and (
2131 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2132 else:
48ee10ee 2133 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2134 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2135 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2136 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2137 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2138 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2139 else:
b5ae35ee 2140 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2141
2142 def selector_function(ctx):
2143 formats = list(ctx['formats'])
909d24dd 2144 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2145 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2146 # for extractors with incomplete formats (audio only (soundcloud)
2147 # or video only (imgur)) best/worst will fallback to
2148 # best/worst {video,audio}-only format
e8e73840 2149 matches = formats
981052c9 2150 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2151 try:
e8e73840 2152 yield matches[format_idx - 1]
981052c9 2153 except IndexError:
2154 return
083c9df9 2155
67134eab 2156 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2157
317f7ab6 2158 def final_selector(ctx):
adbc4ec4 2159 ctx_copy = dict(ctx)
67134eab 2160 for _filter in filters:
317f7ab6
S
2161 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2162 return selector_function(ctx_copy)
67134eab 2163 return final_selector
083c9df9 2164
67134eab 2165 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2166 try:
232541df 2167 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2168 except tokenize.TokenError:
2169 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2170
2171 class TokenIterator(object):
2172 def __init__(self, tokens):
2173 self.tokens = tokens
2174 self.counter = 0
2175
2176 def __iter__(self):
2177 return self
2178
2179 def __next__(self):
2180 if self.counter >= len(self.tokens):
2181 raise StopIteration()
2182 value = self.tokens[self.counter]
2183 self.counter += 1
2184 return value
2185
2186 next = __next__
2187
2188 def restore_last_token(self):
2189 self.counter -= 1
2190
2191 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2192 return _build_selector_function(parsed_selector)
a9c58ad9 2193
e5660ee6
JMF
2194 def _calc_headers(self, info_dict):
2195 res = std_headers.copy()
2196
2197 add_headers = info_dict.get('http_headers')
2198 if add_headers:
2199 res.update(add_headers)
2200
2201 cookies = self._calc_cookies(info_dict)
2202 if cookies:
2203 res['Cookie'] = cookies
2204
0016b84e
S
2205 if 'X-Forwarded-For' not in res:
2206 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2207 if x_forwarded_for_ip:
2208 res['X-Forwarded-For'] = x_forwarded_for_ip
2209
e5660ee6
JMF
2210 return res
2211
2212 def _calc_cookies(self, info_dict):
5c2266df 2213 pr = sanitized_Request(info_dict['url'])
e5660ee6 2214 self.cookiejar.add_cookie_header(pr)
662435f7 2215 return pr.get_header('Cookie')
e5660ee6 2216
9f1a1c36 2217 def _sort_thumbnails(self, thumbnails):
2218 thumbnails.sort(key=lambda t: (
2219 t.get('preference') if t.get('preference') is not None else -1,
2220 t.get('width') if t.get('width') is not None else -1,
2221 t.get('height') if t.get('height') is not None else -1,
2222 t.get('id') if t.get('id') is not None else '',
2223 t.get('url')))
2224
b0249bca 2225 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2226 thumbnails = info_dict.get('thumbnails')
2227 if thumbnails is None:
2228 thumbnail = info_dict.get('thumbnail')
2229 if thumbnail:
2230 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2231 if not thumbnails:
2232 return
2233
2234 def check_thumbnails(thumbnails):
2235 for t in thumbnails:
2236 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2237 try:
2238 self.urlopen(HEADRequest(t['url']))
2239 except network_exceptions as err:
2240 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2241 continue
2242 yield t
2243
2244 self._sort_thumbnails(thumbnails)
2245 for i, t in enumerate(thumbnails):
2246 if t.get('id') is None:
2247 t['id'] = '%d' % i
2248 if t.get('width') and t.get('height'):
2249 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2250 t['url'] = sanitize_url(t['url'])
2251
2252 if self.params.get('check_formats') is True:
282f5709 2253 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2254 else:
2255 info_dict['thumbnails'] = thumbnails
bc516a3f 2256
dd82ffea
JMF
2257 def process_video_result(self, info_dict, download=True):
2258 assert info_dict.get('_type', 'video') == 'video'
2259
bec1fad2
PH
2260 if 'id' not in info_dict:
2261 raise ExtractorError('Missing "id" field in extractor result')
2262 if 'title' not in info_dict:
1151c407 2263 raise ExtractorError('Missing "title" field in extractor result',
2264 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2265
c9969434
S
2266 def report_force_conversion(field, field_not, conversion):
2267 self.report_warning(
2268 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2269 % (field, field_not, conversion))
2270
2271 def sanitize_string_field(info, string_field):
2272 field = info.get(string_field)
2273 if field is None or isinstance(field, compat_str):
2274 return
2275 report_force_conversion(string_field, 'a string', 'string')
2276 info[string_field] = compat_str(field)
2277
2278 def sanitize_numeric_fields(info):
2279 for numeric_field in self._NUMERIC_FIELDS:
2280 field = info.get(numeric_field)
2281 if field is None or isinstance(field, compat_numeric_types):
2282 continue
2283 report_force_conversion(numeric_field, 'numeric', 'int')
2284 info[numeric_field] = int_or_none(field)
2285
2286 sanitize_string_field(info_dict, 'id')
2287 sanitize_numeric_fields(info_dict)
be6217b2 2288
dd82ffea
JMF
2289 if 'playlist' not in info_dict:
2290 # It isn't part of a playlist
2291 info_dict['playlist'] = None
2292 info_dict['playlist_index'] = None
2293
bc516a3f 2294 self._sanitize_thumbnails(info_dict)
d5519808 2295
536a55da 2296 thumbnail = info_dict.get('thumbnail')
bc516a3f 2297 thumbnails = info_dict.get('thumbnails')
536a55da
S
2298 if thumbnail:
2299 info_dict['thumbnail'] = sanitize_url(thumbnail)
2300 elif thumbnails:
d5519808
PH
2301 info_dict['thumbnail'] = thumbnails[-1]['url']
2302
ae30b840 2303 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2304 info_dict['display_id'] = info_dict['id']
2305
239df021 2306 if info_dict.get('duration') is not None:
2307 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2308
10db0d2f 2309 for ts_key, date_key in (
2310 ('timestamp', 'upload_date'),
2311 ('release_timestamp', 'release_date'),
2312 ):
2313 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2314 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2315 # see http://bugs.python.org/issue1646728)
2316 try:
2317 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2318 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2319 except (ValueError, OverflowError, OSError):
2320 pass
9d2ecdbc 2321
ae30b840 2322 live_keys = ('is_live', 'was_live')
2323 live_status = info_dict.get('live_status')
2324 if live_status is None:
2325 for key in live_keys:
2326 if info_dict.get(key) is False:
2327 continue
2328 if info_dict.get(key):
2329 live_status = key
2330 break
2331 if all(info_dict.get(key) is False for key in live_keys):
2332 live_status = 'not_live'
2333 if live_status:
2334 info_dict['live_status'] = live_status
2335 for key in live_keys:
2336 if info_dict.get(key) is None:
2337 info_dict[key] = (live_status == key)
2338
33d2fc2f
S
2339 # Auto generate title fields corresponding to the *_number fields when missing
2340 # in order to always have clean titles. This is very common for TV series.
2341 for field in ('chapter', 'season', 'episode'):
2342 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2343 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2344
05108a49
S
2345 for cc_kind in ('subtitles', 'automatic_captions'):
2346 cc = info_dict.get(cc_kind)
2347 if cc:
2348 for _, subtitle in cc.items():
2349 for subtitle_format in subtitle:
2350 if subtitle_format.get('url'):
2351 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2352 if subtitle_format.get('ext') is None:
2353 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2354
2355 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2356 subtitles = info_dict.get('subtitles')
4bba3716 2357
360e1ca5 2358 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2359 info_dict['id'], subtitles, automatic_captions)
a504ced0 2360
dd82ffea
JMF
2361 if info_dict.get('formats') is None:
2362 # There's only one format available
2363 formats = [info_dict]
2364 else:
2365 formats = info_dict['formats']
2366
e0493e90 2367 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2368 if not self.params.get('allow_unplayable_formats'):
2369 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2370
adbc4ec4
THD
2371 if info_dict.get('is_live'):
2372 get_from_start = bool(self.params.get('live_from_start'))
2373 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2374
db95dc13 2375 if not formats:
1151c407 2376 self.raise_no_formats(info_dict)
db95dc13 2377
73af5cc8
S
2378 def is_wellformed(f):
2379 url = f.get('url')
a5ac0c47 2380 if not url:
73af5cc8
S
2381 self.report_warning(
2382 '"url" field is missing or empty - skipping format, '
2383 'there is an error in extractor')
a5ac0c47
S
2384 return False
2385 if isinstance(url, bytes):
2386 sanitize_string_field(f, 'url')
2387 return True
73af5cc8
S
2388
2389 # Filter out malformed formats for better extraction robustness
2390 formats = list(filter(is_wellformed, formats))
2391
181c7053
S
2392 formats_dict = {}
2393
dd82ffea 2394 # We check that all the formats have the format and format_id fields
db95dc13 2395 for i, format in enumerate(formats):
c9969434
S
2396 sanitize_string_field(format, 'format_id')
2397 sanitize_numeric_fields(format)
dcf77cf1 2398 format['url'] = sanitize_url(format['url'])
e74e3b63 2399 if not format.get('format_id'):
8016c922 2400 format['format_id'] = compat_str(i)
e2effb08
S
2401 else:
2402 # Sanitize format_id from characters used in format selector expression
ec85ded8 2403 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2404 format_id = format['format_id']
2405 if format_id not in formats_dict:
2406 formats_dict[format_id] = []
2407 formats_dict[format_id].append(format)
2408
2409 # Make sure all formats have unique format_id
03b4de72 2410 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2411 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2412 ambigious_id = len(ambiguous_formats) > 1
2413 for i, format in enumerate(ambiguous_formats):
2414 if ambigious_id:
181c7053 2415 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2416 if format.get('ext') is None:
2417 format['ext'] = determine_ext(format['url']).lower()
2418 # Ensure there is no conflict between id and ext in format selection
2419 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2420 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2421 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2422
2423 for i, format in enumerate(formats):
8c51aa65 2424 if format.get('format') is None:
6febd1c1 2425 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2426 id=format['format_id'],
2427 res=self.format_resolution(format),
b868936c 2428 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2429 )
6f0be937 2430 if format.get('protocol') is None:
b5559424 2431 format['protocol'] = determine_protocol(format)
239df021 2432 if format.get('resolution') is None:
2433 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2434 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2435 format['dynamic_range'] = 'SDR'
f2fe69c7 2436 if (info_dict.get('duration') and format.get('tbr')
2437 and not format.get('filesize') and not format.get('filesize_approx')):
2438 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2439
e5660ee6
JMF
2440 # Add HTTP headers, so that external programs can use them from the
2441 # json output
2442 full_format_info = info_dict.copy()
2443 full_format_info.update(format)
2444 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2445 # Remove private housekeeping stuff
2446 if '__x_forwarded_for_ip' in info_dict:
2447 del info_dict['__x_forwarded_for_ip']
dd82ffea 2448
4bcc7bd1 2449 # TODO Central sorting goes here
99e206d5 2450
9f1a1c36 2451 if self.params.get('check_formats') is True:
282f5709 2452 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2453
88acdbc2 2454 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2455 # only set the 'formats' fields if the original info_dict list them
2456 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2457 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2458 # which can't be exported to json
b3d9ef88 2459 info_dict['formats'] = formats
4ec82a72 2460
2461 info_dict, _ = self.pre_process(info_dict)
2462
093a1710 2463 # The pre-processors may have modified the formats
2464 formats = info_dict.get('formats', [info_dict])
2465
fa9f30b8 2466 list_only = self.params.get('simulate') is None and (
2467 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2468 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2469 if self.params.get('list_thumbnails'):
2470 self.list_thumbnails(info_dict)
b7b04c78 2471 if self.params.get('listsubtitles'):
2472 if 'automatic_captions' in info_dict:
2473 self.list_subtitles(
2474 info_dict['id'], automatic_captions, 'automatic captions')
2475 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2476 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2477 self.list_formats(info_dict)
169dbde9 2478 if list_only:
b7b04c78 2479 # Without this printing, -F --print-json will not work
169dbde9 2480 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2481 return
2482
187986a8 2483 format_selector = self.format_selector
2484 if format_selector is None:
0017d9ad 2485 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2486 self.write_debug('Default format spec: %s' % req_format)
187986a8 2487 format_selector = self.build_format_selector(req_format)
317f7ab6 2488
fa9f30b8 2489 while True:
2490 if interactive_format_selection:
2491 req_format = input(
2492 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2493 try:
2494 format_selector = self.build_format_selector(req_format)
2495 except SyntaxError as err:
2496 self.report_error(err, tb=False, is_error=False)
2497 continue
2498
2499 # While in format selection we may need to have an access to the original
2500 # format set in order to calculate some metrics or do some processing.
2501 # For now we need to be able to guess whether original formats provided
2502 # by extractor are incomplete or not (i.e. whether extractor provides only
2503 # video-only or audio-only formats) for proper formats selection for
2504 # extractors with such incomplete formats (see
2505 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2506 # Since formats may be filtered during format selection and may not match
2507 # the original formats the results may be incorrect. Thus original formats
2508 # or pre-calculated metrics should be passed to format selection routines
2509 # as well.
2510 # We will pass a context object containing all necessary additional data
2511 # instead of just formats.
2512 # This fixes incorrect format selection issue (see
2513 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2514 incomplete_formats = (
2515 # All formats are video-only or
2516 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2517 # all formats are audio-only
2518 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2519
2520 ctx = {
2521 'formats': formats,
2522 'incomplete_formats': incomplete_formats,
2523 }
2524
2525 formats_to_download = list(format_selector(ctx))
2526 if interactive_format_selection and not formats_to_download:
2527 self.report_error('Requested format is not available', tb=False, is_error=False)
2528 continue
2529 break
317f7ab6 2530
dd82ffea 2531 if not formats_to_download:
b7da73eb 2532 if not self.params.get('ignore_no_formats_error'):
1151c407 2533 raise ExtractorError('Requested format is not available', expected=True,
2534 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2535 else:
2536 self.report_warning('Requested format is not available')
4513a41a
A
2537 # Process what we can, even without any available formats.
2538 self.process_info(dict(info_dict))
b7da73eb 2539 elif download:
2540 self.to_screen(
07cce701 2541 '[info] %s: Downloading %d format(s): %s' % (
2542 info_dict['id'], len(formats_to_download),
2543 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2544 for fmt in formats_to_download:
dd82ffea 2545 new_info = dict(info_dict)
4ec82a72 2546 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2547 new_info['__original_infodict'] = info_dict
b7da73eb 2548 new_info.update(fmt)
dd82ffea 2549 self.process_info(new_info)
49a57e70 2550 # We update the info dict with the selected best quality format (backwards compatibility)
b7da73eb 2551 if formats_to_download:
2552 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2553 return info_dict
2554
98c70d6f 2555 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2556 """Select the requested subtitles and their format"""
98c70d6f
JMF
2557 available_subs = {}
2558 if normal_subtitles and self.params.get('writesubtitles'):
2559 available_subs.update(normal_subtitles)
2560 if automatic_captions and self.params.get('writeautomaticsub'):
2561 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2562 if lang not in available_subs:
2563 available_subs[lang] = cap_info
2564
4d171848
JMF
2565 if (not self.params.get('writesubtitles') and not
2566 self.params.get('writeautomaticsub') or not
2567 available_subs):
2568 return None
a504ced0 2569
c32b0aab 2570 all_sub_langs = available_subs.keys()
a504ced0 2571 if self.params.get('allsubtitles', False):
c32b0aab 2572 requested_langs = all_sub_langs
2573 elif self.params.get('subtitleslangs', False):
77c4a9ef 2574 # A list is used so that the order of languages will be the same as
2575 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2576 requested_langs = []
2577 for lang_re in self.params.get('subtitleslangs'):
2578 if lang_re == 'all':
2579 requested_langs.extend(all_sub_langs)
c32b0aab 2580 continue
77c4a9ef 2581 discard = lang_re[0] == '-'
c32b0aab 2582 if discard:
77c4a9ef 2583 lang_re = lang_re[1:]
2584 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2585 if discard:
2586 for lang in current_langs:
77c4a9ef 2587 while lang in requested_langs:
2588 requested_langs.remove(lang)
c32b0aab 2589 else:
77c4a9ef 2590 requested_langs.extend(current_langs)
2591 requested_langs = orderedSet(requested_langs)
c32b0aab 2592 elif 'en' in available_subs:
2593 requested_langs = ['en']
a504ced0 2594 else:
c32b0aab 2595 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2596 if requested_langs:
2597 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2598
2599 formats_query = self.params.get('subtitlesformat', 'best')
2600 formats_preference = formats_query.split('/') if formats_query else []
2601 subs = {}
2602 for lang in requested_langs:
2603 formats = available_subs.get(lang)
2604 if formats is None:
2605 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2606 continue
a504ced0
JMF
2607 for ext in formats_preference:
2608 if ext == 'best':
2609 f = formats[-1]
2610 break
2611 matches = list(filter(lambda f: f['ext'] == ext, formats))
2612 if matches:
2613 f = matches[-1]
2614 break
2615 else:
2616 f = formats[-1]
2617 self.report_warning(
2618 'No subtitle format found matching "%s" for language %s, '
2619 'using %s' % (formats_query, lang, f['ext']))
2620 subs[lang] = f
2621 return subs
2622
d06daf23 2623 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2624 def print_mandatory(field, actual_field=None):
2625 if actual_field is None:
2626 actual_field = field
d06daf23 2627 if (self.params.get('force%s' % field, False)
53c18592 2628 and (not incomplete or info_dict.get(actual_field) is not None)):
2629 self.to_stdout(info_dict[actual_field])
d06daf23
S
2630
2631 def print_optional(field):
2632 if (self.params.get('force%s' % field, False)
2633 and info_dict.get(field) is not None):
2634 self.to_stdout(info_dict[field])
2635
53c18592 2636 info_dict = info_dict.copy()
2637 if filename is not None:
2638 info_dict['filename'] = filename
2639 if info_dict.get('requested_formats') is not None:
2640 # For RTMP URLs, also include the playpath
2641 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2642 elif 'url' in info_dict:
2643 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2644
2b8a2973 2645 if self.params.get('forceprint') or self.params.get('forcejson'):
2646 self.post_extract(info_dict)
53c18592 2647 for tmpl in self.params.get('forceprint', []):
b5ae35ee 2648 mobj = re.match(r'\w+(=?)$', tmpl)
2649 if mobj and mobj.group(1):
2650 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2651 elif mobj:
2652 tmpl = '%({})s'.format(tmpl)
2653 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
53c18592 2654
d06daf23
S
2655 print_mandatory('title')
2656 print_mandatory('id')
53c18592 2657 print_mandatory('url', 'urls')
d06daf23
S
2658 print_optional('thumbnail')
2659 print_optional('description')
53c18592 2660 print_optional('filename')
b868936c 2661 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2662 self.to_stdout(formatSeconds(info_dict['duration']))
2663 print_mandatory('format')
53c18592 2664
2b8a2973 2665 if self.params.get('forcejson'):
6e84b215 2666 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2667
e8e73840 2668 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2669 if not info.get('url'):
1151c407 2670 self.raise_no_formats(info, True)
e8e73840 2671
2672 if test:
2673 verbose = self.params.get('verbose')
2674 params = {
2675 'test': True,
a169858f 2676 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2677 'verbose': verbose,
2678 'noprogress': not verbose,
2679 'nopart': True,
2680 'skip_unavailable_fragments': False,
2681 'keep_fragments': False,
2682 'overwrites': True,
2683 '_no_ytdl_file': True,
2684 }
2685 else:
2686 params = self.params
96fccc10 2687 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2688 if not test:
2689 for ph in self._progress_hooks:
2690 fd.add_progress_hook(ph)
18e674b4 2691 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2692 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2693
adbc4ec4
THD
2694 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2695 # But it may contain objects that are not deep-copyable
2696 new_info = self._copy_infodict(info)
e8e73840 2697 if new_info.get('http_headers') is None:
2698 new_info['http_headers'] = self._calc_headers(new_info)
2699 return fd.download(name, new_info, subtitle)
2700
8222d8de
JMF
2701 def process_info(self, info_dict):
2702 """Process a single resolved IE result."""
2703
2704 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2705
2706 max_downloads = self.params.get('max_downloads')
2707 if max_downloads is not None:
2708 if self._num_downloads >= int(max_downloads):
2709 raise MaxDownloadsReached()
8222d8de 2710
adbc4ec4 2711 if info_dict.get('is_live') and not self.params.get('live_from_start'):
39ca3b5c 2712 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2713
d06daf23 2714 # TODO: backward compatibility, to be removed
8222d8de 2715 info_dict['fulltitle'] = info_dict['title']
8222d8de 2716
4513a41a 2717 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2718 info_dict['format'] = info_dict['ext']
2719
c77495e3 2720 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2721 return
2722
277d6ff5 2723 self.post_extract(info_dict)
fd288278 2724 self._num_downloads += 1
8222d8de 2725
dcf64d43 2726 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2727 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2728 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2729 files_to_move = {}
8222d8de
JMF
2730
2731 # Forced printings
4513a41a 2732 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2733
b7b04c78 2734 if self.params.get('simulate'):
2d30509f 2735 if self.params.get('force_write_download_archive', False):
2736 self.record_download_archive(info_dict)
2d30509f 2737 # Do nothing else if in simulate mode
8222d8de
JMF
2738 return
2739
de6000d9 2740 if full_filename is None:
8222d8de 2741 return
e92caff5 2742 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2743 return
e92caff5 2744 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2745 return
2746
80c03fa9 2747 if self._write_description('video', info_dict,
2748 self.prepare_filename(info_dict, 'description')) is None:
2749 return
2750
2751 sub_files = self._write_subtitles(info_dict, temp_filename)
2752 if sub_files is None:
2753 return
2754 files_to_move.update(dict(sub_files))
2755
2756 thumb_files = self._write_thumbnails(
2757 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2758 if thumb_files is None:
2759 return
2760 files_to_move.update(dict(thumb_files))
8222d8de 2761
80c03fa9 2762 infofn = self.prepare_filename(info_dict, 'infojson')
2763 _infojson_written = self._write_info_json('video', info_dict, infofn)
2764 if _infojson_written:
dac5df5a 2765 info_dict['infojson_filename'] = infofn
e75bb0d6 2766 # For backward compatibility, even though it was a private field
80c03fa9 2767 info_dict['__infojson_filename'] = infofn
2768 elif _infojson_written is None:
2769 return
2770
2771 # Note: Annotations are deprecated
2772 annofn = None
1fb07d10 2773 if self.params.get('writeannotations', False):
de6000d9 2774 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2775 if annofn:
e92caff5 2776 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2777 return
0c3d0f51 2778 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2779 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2780 elif not info_dict.get('annotations'):
2781 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2782 else:
2783 try:
6febd1c1 2784 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2785 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2786 annofile.write(info_dict['annotations'])
2787 except (KeyError, TypeError):
6febd1c1 2788 self.report_warning('There are no annotations to write.')
7b6fefc9 2789 except (OSError, IOError):
6febd1c1 2790 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2791 return
1fb07d10 2792
732044af 2793 # Write internet shortcut files
08438d2c 2794 def _write_link_file(link_type):
732044af 2795 if 'webpage_url' not in info_dict:
2796 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
08438d2c 2797 return False
2798 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2799 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2800 return False
10e3742e 2801 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2802 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2803 return True
2804 try:
2805 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2806 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2807 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2808 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2809 if link_type == 'desktop':
2810 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2811 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2812 except (OSError, IOError):
2813 self.report_error(f'Cannot write internet shortcut {linkfn}')
2814 return False
732044af 2815 return True
2816
08438d2c 2817 write_links = {
2818 'url': self.params.get('writeurllink'),
2819 'webloc': self.params.get('writewebloclink'),
2820 'desktop': self.params.get('writedesktoplink'),
2821 }
2822 if self.params.get('writelink'):
2823 link_type = ('webloc' if sys.platform == 'darwin'
2824 else 'desktop' if sys.platform.startswith('linux')
2825 else 'url')
2826 write_links[link_type] = True
2827
2828 if any(should_write and not _write_link_file(link_type)
2829 for link_type, should_write in write_links.items()):
2830 return
732044af 2831
56d868db 2832 try:
2833 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2834 except PostProcessingError as err:
2835 self.report_error('Preprocessing: %s' % str(err))
2836 return
2837
732044af 2838 must_record_download_archive = False
56d868db 2839 if self.params.get('skip_download', False):
2840 info_dict['filepath'] = temp_filename
2841 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2842 info_dict['__files_to_move'] = files_to_move
2843 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2844 else:
2845 # Download
b868936c 2846 info_dict.setdefault('__postprocessors', [])
4340deca 2847 try:
0202b52a 2848
6b591b29 2849 def existing_file(*filepaths):
2850 ext = info_dict.get('ext')
2851 final_ext = self.params.get('final_ext', ext)
2852 existing_files = []
2853 for file in orderedSet(filepaths):
2854 if final_ext != ext:
2855 converted = replace_extension(file, final_ext, ext)
2856 if os.path.exists(encodeFilename(converted)):
2857 existing_files.append(converted)
2858 if os.path.exists(encodeFilename(file)):
2859 existing_files.append(file)
2860
2861 if not existing_files or self.params.get('overwrites', False):
2862 for file in orderedSet(existing_files):
2863 self.report_file_delete(file)
2864 os.remove(encodeFilename(file))
2865 return None
2866
6b591b29 2867 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2868 return existing_files[0]
0202b52a 2869
2870 success = True
4340deca 2871 if info_dict.get('requested_formats') is not None:
81cd954a
S
2872
2873 def compatible_formats(formats):
d03cfdce 2874 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2875 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2876 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2877 if len(video_formats) > 2 or len(audio_formats) > 2:
2878 return False
2879
81cd954a 2880 # Check extension
d03cfdce 2881 exts = set(format.get('ext') for format in formats)
2882 COMPATIBLE_EXTS = (
2883 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2884 set(('webm',)),
2885 )
2886 for ext_sets in COMPATIBLE_EXTS:
2887 if ext_sets.issuperset(exts):
2888 return True
81cd954a
S
2889 # TODO: Check acodec/vcodec
2890 return False
2891
2892 requested_formats = info_dict['requested_formats']
0202b52a 2893 old_ext = info_dict['ext']
4e3b637d 2894 if self.params.get('merge_output_format') is None:
2895 if not compatible_formats(requested_formats):
2896 info_dict['ext'] = 'mkv'
2897 self.report_warning(
2898 'Requested formats are incompatible for merge and will be merged into mkv')
2899 if (info_dict['ext'] == 'webm'
2900 and info_dict.get('thumbnails')
2901 # check with type instead of pp_key, __name__, or isinstance
2902 # since we dont want any custom PPs to trigger this
2903 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2904 info_dict['ext'] = 'mkv'
2905 self.report_warning(
2906 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 2907 new_ext = info_dict['ext']
0202b52a 2908
124bc071 2909 def correct_ext(filename, ext=new_ext):
96fccc10 2910 if filename == '-':
2911 return filename
0202b52a 2912 filename_real_ext = os.path.splitext(filename)[1][1:]
2913 filename_wo_ext = (
2914 os.path.splitext(filename)[0]
124bc071 2915 if filename_real_ext in (old_ext, new_ext)
0202b52a 2916 else filename)
124bc071 2917 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2918
38c6902b 2919 # Ensure filename always has a correct extension for successful merge
0202b52a 2920 full_filename = correct_ext(full_filename)
2921 temp_filename = correct_ext(temp_filename)
2922 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2923 info_dict['__real_download'] = False
18e674b4 2924
adbc4ec4
THD
2925 downloaded = []
2926 merger = FFmpegMergerPP(self)
2927
2928 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 2929 if dl_filename is not None:
6c7274ec 2930 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
2931 elif fd:
2932 for f in requested_formats if fd != FFmpegFD else []:
2933 f['filepath'] = fname = prepend_extension(
2934 correct_ext(temp_filename, info_dict['ext']),
2935 'f%s' % f['format_id'], info_dict['ext'])
2936 downloaded.append(fname)
dbf5416a 2937 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2938 success, real_download = self.dl(temp_filename, info_dict)
2939 info_dict['__real_download'] = real_download
18e674b4 2940 else:
18e674b4 2941 if self.params.get('allow_unplayable_formats'):
2942 self.report_warning(
2943 'You have requested merging of multiple formats '
2944 'while also allowing unplayable formats to be downloaded. '
2945 'The formats won\'t be merged to prevent data corruption.')
2946 elif not merger.available:
2947 self.report_warning(
2948 'You have requested merging of multiple formats but ffmpeg is not installed. '
2949 'The formats won\'t be merged.')
2950
96fccc10 2951 if temp_filename == '-':
adbc4ec4 2952 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 2953 else 'but the formats are incompatible for simultaneous download' if merger.available
2954 else 'but ffmpeg is not installed')
2955 self.report_warning(
2956 f'You have requested downloading multiple formats to stdout {reason}. '
2957 'The formats will be streamed one after the other')
2958 fname = temp_filename
dbf5416a 2959 for f in requested_formats:
2960 new_info = dict(info_dict)
2961 del new_info['requested_formats']
2962 new_info.update(f)
96fccc10 2963 if temp_filename != '-':
124bc071 2964 fname = prepend_extension(
2965 correct_ext(temp_filename, new_info['ext']),
2966 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2967 if not self._ensure_dir_exists(fname):
2968 return
a21e0ab1 2969 f['filepath'] = fname
96fccc10 2970 downloaded.append(fname)
dbf5416a 2971 partial_success, real_download = self.dl(fname, new_info)
2972 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2973 success = success and partial_success
adbc4ec4
THD
2974
2975 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2976 info_dict['__postprocessors'].append(merger)
2977 info_dict['__files_to_merge'] = downloaded
2978 # Even if there were no downloads, it is being merged only now
2979 info_dict['__real_download'] = True
2980 else:
2981 for file in downloaded:
2982 files_to_move[file] = None
4340deca
P
2983 else:
2984 # Just a single file
0202b52a 2985 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2986 if dl_filename is None or dl_filename == temp_filename:
2987 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2988 # So we should try to resume the download
e8e73840 2989 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2990 info_dict['__real_download'] = real_download
6c7274ec 2991 else:
2992 self.report_file_already_downloaded(dl_filename)
0202b52a 2993
0202b52a 2994 dl_filename = dl_filename or temp_filename
c571435f 2995 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2996
3158150c 2997 except network_exceptions as err:
7960b056 2998 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2999 return
3000 except (OSError, IOError) as err:
3001 raise UnavailableVideoError(err)
3002 except (ContentTooShortError, ) as err:
3003 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3004 return
8222d8de 3005
de6000d9 3006 if success and full_filename != '-':
f17f8651 3007
fd7cfb64 3008 def fixup():
3009 do_fixup = True
3010 fixup_policy = self.params.get('fixup')
3011 vid = info_dict['id']
3012
3013 if fixup_policy in ('ignore', 'never'):
3014 return
3015 elif fixup_policy == 'warn':
3016 do_fixup = False
f89b3e2d 3017 elif fixup_policy != 'force':
3018 assert fixup_policy in ('detect_or_warn', None)
3019 if not info_dict.get('__real_download'):
3020 do_fixup = False
fd7cfb64 3021
3022 def ffmpeg_fixup(cndn, msg, cls):
3023 if not cndn:
3024 return
3025 if not do_fixup:
3026 self.report_warning(f'{vid}: {msg}')
3027 return
3028 pp = cls(self)
3029 if pp.available:
3030 info_dict['__postprocessors'].append(pp)
3031 else:
3032 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3033
3034 stretched_ratio = info_dict.get('stretched_ratio')
3035 ffmpeg_fixup(
3036 stretched_ratio not in (1, None),
3037 f'Non-uniform pixel ratio {stretched_ratio}',
3038 FFmpegFixupStretchedPP)
3039
3040 ffmpeg_fixup(
3041 (info_dict.get('requested_formats') is None
3042 and info_dict.get('container') == 'm4a_dash'
3043 and info_dict.get('ext') == 'm4a'),
3044 'writing DASH m4a. Only some players support this container',
3045 FFmpegFixupM4aPP)
3046
993191c0 3047 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3048 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3049
3050 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3051 ffmpeg_fixup(downloader == 'HlsFD',
3052 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3053 FFmpegFixupM3u8PP)
3054 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3055 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3056
e04b003e 3057 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3058 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3059
3060 fixup()
8222d8de 3061 try:
23c1a667 3062 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 3063 except PostProcessingError as err:
3064 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3065 return
ab8e5e51
AM
3066 try:
3067 for ph in self._post_hooks:
23c1a667 3068 ph(info_dict['filepath'])
ab8e5e51
AM
3069 except Exception as err:
3070 self.report_error('post hooks: %s' % str(err))
3071 return
2d30509f 3072 must_record_download_archive = True
3073
3074 if must_record_download_archive or self.params.get('force_write_download_archive', False):
3075 self.record_download_archive(info_dict)
c3e6ffba 3076 max_downloads = self.params.get('max_downloads')
3077 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3078 raise MaxDownloadsReached()
8222d8de 3079
aa9369a2 3080 def __download_wrapper(self, func):
3081 @functools.wraps(func)
3082 def wrapper(*args, **kwargs):
3083 try:
3084 res = func(*args, **kwargs)
3085 except UnavailableVideoError as e:
3086 self.report_error(e)
b222c271 3087 except MaxDownloadsReached as e:
aa9369a2 3088 self.to_screen(f'[info] {e}')
3089 raise
b222c271 3090 except DownloadCancelled as e:
3091 self.to_screen(f'[info] {e}')
3092 if not self.params.get('break_per_url'):
3093 raise
aa9369a2 3094 else:
3095 if self.params.get('dump_single_json', False):
3096 self.post_extract(res)
3097 self.to_stdout(json.dumps(self.sanitize_info(res)))
3098 return wrapper
3099
8222d8de
JMF
3100 def download(self, url_list):
3101 """Download a given list of URLs."""
aa9369a2 3102 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3103 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3104 if (len(url_list) > 1
3105 and outtmpl != '-'
3106 and '%' not in outtmpl
3107 and self.params.get('max_downloads') != 1):
acd69589 3108 raise SameFileError(outtmpl)
8222d8de
JMF
3109
3110 for url in url_list:
aa9369a2 3111 self.__download_wrapper(self.extract_info)(
3112 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3113
3114 return self._download_retcode
3115
1dcc4c0c 3116 def download_with_info_file(self, info_filename):
31bd3925
JMF
3117 with contextlib.closing(fileinput.FileInput(
3118 [info_filename], mode='r',
3119 openhook=fileinput.hook_encoded('utf-8'))) as f:
3120 # FileInput doesn't have a read method, we can't call json.load
8012d892 3121 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3122 try:
aa9369a2 3123 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3124 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3125 if not isinstance(e, EntryNotInPlaylist):
3126 self.to_stderr('\r')
d4943898
JMF
3127 webpage_url = info.get('webpage_url')
3128 if webpage_url is not None:
aa9369a2 3129 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3130 return self.download([webpage_url])
3131 else:
3132 raise
3133 return self._download_retcode
1dcc4c0c 3134
cb202fd2 3135 @staticmethod
8012d892 3136 def sanitize_info(info_dict, remove_private_keys=False):
3137 ''' Sanitize the infodict for converting to json '''
3ad56b42 3138 if info_dict is None:
3139 return info_dict
6e84b215 3140 info_dict.setdefault('epoch', int(time.time()))
3141 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
a49891c7 3142 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
8012d892 3143 if remove_private_keys:
6e84b215 3144 remove_keys |= {
dac5df5a 3145 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3146 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3147 }
ae8f99e6 3148 empty_values = (None, {}, [], set(), tuple())
3149 reject = lambda k, v: k not in keep_keys and (
3150 k.startswith('_') or k in remove_keys or v in empty_values)
3151 else:
ae8f99e6 3152 reject = lambda k, v: k in remove_keys
adbc4ec4
THD
3153
3154 def filter_fn(obj):
3155 if isinstance(obj, dict):
3156 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3157 elif isinstance(obj, (list, tuple, set, LazyList)):
3158 return list(map(filter_fn, obj))
3159 elif obj is None or isinstance(obj, (str, int, float, bool)):
3160 return obj
3161 else:
3162 return repr(obj)
3163
5226731e 3164 return filter_fn(info_dict)
cb202fd2 3165
8012d892 3166 @staticmethod
3167 def filter_requested_info(info_dict, actually_filter=True):
3168 ''' Alias of sanitize_info for backward compatibility '''
3169 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3170
dcf64d43 3171 def run_pp(self, pp, infodict):
5bfa4862 3172 files_to_delete = []
dcf64d43 3173 if '__files_to_move' not in infodict:
3174 infodict['__files_to_move'] = {}
b1940459 3175 try:
3176 files_to_delete, infodict = pp.run(infodict)
3177 except PostProcessingError as e:
3178 # Must be True and not 'only_download'
3179 if self.params.get('ignoreerrors') is True:
3180 self.report_error(e)
3181 return infodict
3182 raise
3183
5bfa4862 3184 if not files_to_delete:
dcf64d43 3185 return infodict
5bfa4862 3186 if self.params.get('keepvideo', False):
3187 for f in files_to_delete:
dcf64d43 3188 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3189 else:
3190 for old_filename in set(files_to_delete):
3191 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3192 try:
3193 os.remove(encodeFilename(old_filename))
3194 except (IOError, OSError):
3195 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3196 if old_filename in infodict['__files_to_move']:
3197 del infodict['__files_to_move'][old_filename]
3198 return infodict
5bfa4862 3199
277d6ff5 3200 @staticmethod
3201 def post_extract(info_dict):
3202 def actual_post_extract(info_dict):
3203 if info_dict.get('_type') in ('playlist', 'multi_video'):
3204 for video_dict in info_dict.get('entries', {}):
b050d210 3205 actual_post_extract(video_dict or {})
277d6ff5 3206 return
3207
07cce701 3208 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3209 extra = post_extractor().items()
3210 info_dict.update(extra)
07cce701 3211 info_dict.pop('__post_extractor', None)
277d6ff5 3212
4ec82a72 3213 original_infodict = info_dict.get('__original_infodict') or {}
3214 original_infodict.update(extra)
3215 original_infodict.pop('__post_extractor', None)
3216
b050d210 3217 actual_post_extract(info_dict or {})
277d6ff5 3218
56d868db 3219 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3220 info = dict(ie_info)
56d868db 3221 info['__files_to_move'] = files_to_move or {}
3222 for pp in self._pps[key]:
dcf64d43 3223 info = self.run_pp(pp, info)
56d868db 3224 return info, info.pop('__files_to_move', None)
5bfa4862 3225
dcf64d43 3226 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3227 """Run all the postprocessors on the given file."""
3228 info = dict(ie_info)
3229 info['filepath'] = filename
dcf64d43 3230 info['__files_to_move'] = files_to_move or {}
0202b52a 3231
56d868db 3232 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3233 info = self.run_pp(pp, info)
3234 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3235 del info['__files_to_move']
56d868db 3236 for pp in self._pps['after_move']:
dcf64d43 3237 info = self.run_pp(pp, info)
23c1a667 3238 return info
c1c9a79c 3239
5db07df6 3240 def _make_archive_id(self, info_dict):
e9fef7ee
S
3241 video_id = info_dict.get('id')
3242 if not video_id:
3243 return
5db07df6
PH
3244 # Future-proof against any change in case
3245 # and backwards compatibility with prior versions
e9fef7ee 3246 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3247 if extractor is None:
1211bb6d
S
3248 url = str_or_none(info_dict.get('url'))
3249 if not url:
3250 return
e9fef7ee 3251 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3252 for ie_key, ie in self._ies.items():
1211bb6d 3253 if ie.suitable(url):
8b7491c8 3254 extractor = ie_key
e9fef7ee
S
3255 break
3256 else:
3257 return
d0757229 3258 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3259
3260 def in_download_archive(self, info_dict):
3261 fn = self.params.get('download_archive')
3262 if fn is None:
3263 return False
3264
3265 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3266 if not vid_id:
7012b23c 3267 return False # Incomplete video information
5db07df6 3268
a45e8619 3269 return vid_id in self.archive
c1c9a79c
PH
3270
3271 def record_download_archive(self, info_dict):
3272 fn = self.params.get('download_archive')
3273 if fn is None:
3274 return
5db07df6
PH
3275 vid_id = self._make_archive_id(info_dict)
3276 assert vid_id
c1c9a79c 3277 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3278 archive_file.write(vid_id + '\n')
a45e8619 3279 self.archive.add(vid_id)
dd82ffea 3280
8c51aa65 3281 @staticmethod
8abeeb94 3282 def format_resolution(format, default='unknown'):
9359f3d4 3283 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3284 return 'audio only'
f49d89ee
PH
3285 if format.get('resolution') is not None:
3286 return format['resolution']
35615307 3287 if format.get('width') and format.get('height'):
ff51ed58 3288 return '%dx%d' % (format['width'], format['height'])
35615307 3289 elif format.get('height'):
ff51ed58 3290 return '%sp' % format['height']
35615307 3291 elif format.get('width'):
ff51ed58 3292 return '%dx?' % format['width']
3293 return default
8c51aa65 3294
c57f7757
PH
3295 def _format_note(self, fdict):
3296 res = ''
3297 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3298 res += '(unsupported)'
32f90364
PH
3299 if fdict.get('language'):
3300 if res:
3301 res += ' '
f304da8a 3302 res += '[%s]' % fdict['language']
c57f7757 3303 if fdict.get('format_note') is not None:
f304da8a 3304 if res:
3305 res += ' '
3306 res += fdict['format_note']
c57f7757 3307 if fdict.get('tbr') is not None:
f304da8a 3308 if res:
3309 res += ', '
3310 res += '%4dk' % fdict['tbr']
c57f7757
PH
3311 if fdict.get('container') is not None:
3312 if res:
3313 res += ', '
3314 res += '%s container' % fdict['container']
3089bc74
S
3315 if (fdict.get('vcodec') is not None
3316 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3317 if res:
3318 res += ', '
3319 res += fdict['vcodec']
91c7271a 3320 if fdict.get('vbr') is not None:
c57f7757
PH
3321 res += '@'
3322 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3323 res += 'video@'
3324 if fdict.get('vbr') is not None:
3325 res += '%4dk' % fdict['vbr']
fbb21cf5 3326 if fdict.get('fps') is not None:
5d583bdf
S
3327 if res:
3328 res += ', '
3329 res += '%sfps' % fdict['fps']
c57f7757
PH
3330 if fdict.get('acodec') is not None:
3331 if res:
3332 res += ', '
3333 if fdict['acodec'] == 'none':
3334 res += 'video only'
3335 else:
3336 res += '%-5s' % fdict['acodec']
3337 elif fdict.get('abr') is not None:
3338 if res:
3339 res += ', '
3340 res += 'audio'
3341 if fdict.get('abr') is not None:
3342 res += '@%3dk' % fdict['abr']
3343 if fdict.get('asr') is not None:
3344 res += ' (%5dHz)' % fdict['asr']
3345 if fdict.get('filesize') is not None:
3346 if res:
3347 res += ', '
3348 res += format_bytes(fdict['filesize'])
9732d77e
PH
3349 elif fdict.get('filesize_approx') is not None:
3350 if res:
3351 res += ', '
3352 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3353 return res
91c7271a 3354
ec11a9f4 3355 def _list_format_headers(self, *headers):
3356 if self.params.get('listformats_table', True) is not False:
3357 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3358 return headers
3359
c57f7757 3360 def list_formats(self, info_dict):
b69fd25c 3361 if not info_dict.get('formats') and not info_dict.get('url'):
3362 self.to_screen('%s has no formats' % info_dict['id'])
3363 return
3364 self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3365
94badb25 3366 formats = info_dict.get('formats', [info_dict])
ec11a9f4 3367 new_format = self.params.get('listformats_table', True) is not False
76d321f6 3368 if new_format:
ec11a9f4 3369 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
76d321f6 3370 table = [
3371 [
ec11a9f4 3372 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
76d321f6 3373 format_field(f, 'ext'),
ff51ed58 3374 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
c5e3f849 3375 format_field(f, 'fps', '\t%d'),
176f1866 3376 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
ec11a9f4 3377 delim,
c5e3f849 3378 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3379 format_field(f, 'tbr', '\t%dk'),
b69fd25c 3380 shorten_protocol_name(f.get('protocol', '')),
ec11a9f4 3381 delim,
ff51ed58 3382 format_field(f, 'vcodec', default='unknown').replace(
3383 'none',
3384 'images' if f.get('acodec') == 'none'
3385 else self._format_screen('audio only', self.Styles.SUPPRESS)),
c5e3f849 3386 format_field(f, 'vbr', '\t%dk'),
ff51ed58 3387 format_field(f, 'acodec', default='unknown').replace(
3388 'none',
3389 '' if f.get('vcodec') == 'none'
3390 else self._format_screen('video only', self.Styles.SUPPRESS)),
c5e3f849 3391 format_field(f, 'abr', '\t%dk'),
3392 format_field(f, 'asr', '\t%dHz'),
34921b43 3393 join_nonempty(
3394 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3f698246 3395 format_field(f, 'language', '[%s]'),
c5e3f849 3396 join_nonempty(
3397 format_field(f, 'format_note'),
3398 format_field(f, 'container', ignore=(None, f.get('ext'))),
3399 delim=', '),
3400 delim=' '),
3f698246 3401 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
ec11a9f4 3402 header_line = self._list_format_headers(
c5e3f849 3403 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3404 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
76d321f6 3405 else:
3406 table = [
3407 [
3408 format_field(f, 'format_id'),
3409 format_field(f, 'ext'),
3410 self.format_resolution(f),
3411 self._format_note(f)]
3412 for f in formats
3413 if f.get('preference') is None or f['preference'] >= -1000]
3414 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3415
169dbde9 3416 self.to_stdout(render_table(
ec11a9f4 3417 header_line, table,
c5e3f849 3418 extra_gap=(0 if new_format else 1),
3419 hide_empty=new_format,
ec11a9f4 3420 delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
cfb56d1a
PH
3421
3422 def list_thumbnails(self, info_dict):
b0249bca 3423 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3424 if not thumbnails:
b7b72db9 3425 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3426 return
cfb56d1a
PH
3427
3428 self.to_screen(
3429 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3430 self.to_stdout(render_table(
ec11a9f4 3431 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
cfb56d1a 3432 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3433
360e1ca5 3434 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3435 if not subtitles:
360e1ca5 3436 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3437 return
a504ced0 3438 self.to_screen(
edab9dbf 3439 'Available %s for %s:' % (name, video_id))
2412044c 3440
3441 def _row(lang, formats):
49c258e1 3442 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3443 if len(set(names)) == 1:
7aee40c1 3444 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3445 return [lang, ', '.join(names), ', '.join(exts)]
3446
169dbde9 3447 self.to_stdout(render_table(
ec11a9f4 3448 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3449 [_row(lang, formats) for lang, formats in subtitles.items()],
c5e3f849 3450 hide_empty=True))
a504ced0 3451
dca08720
PH
3452 def urlopen(self, req):
3453 """ Start an HTTP download """
82d8a8b6 3454 if isinstance(req, compat_basestring):
67dda517 3455 req = sanitized_Request(req)
19a41fc6 3456 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3457
3458 def print_debug_header(self):
3459 if not self.params.get('verbose'):
3460 return
49a57e70 3461
3462 def get_encoding(stream):
3463 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3464 if not supports_terminal_sequences(stream):
e3c7d495 3465 from .compat import WINDOWS_VT_MODE
3466 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3467 return ret
3468
3469 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3470 locale.getpreferredencoding(),
3471 sys.getfilesystemencoding(),
3472 get_encoding(self._screen_file), get_encoding(self._err_file),
3473 self.get_encoding())
883d4b1e 3474
3475 logger = self.params.get('logger')
3476 if logger:
3477 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3478 write_debug(encoding_str)
3479 else:
96565c7e 3480 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3481 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3482
4c88ff87 3483 source = detect_variant()
36eaf303 3484 write_debug(join_nonempty(
3485 'yt-dlp version', __version__,
3486 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3487 '' if source == 'unknown' else f'({source})',
3488 delim=' '))
6e21fdd2 3489 if not _LAZY_LOADER:
3490 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3491 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3492 else:
49a57e70 3493 write_debug('Lazy loading extractors is disabled')
3ae5e797 3494 if plugin_extractors or plugin_postprocessors:
49a57e70 3495 write_debug('Plugins: %s' % [
3ae5e797 3496 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3497 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3498 if self.params.get('compat_opts'):
49a57e70 3499 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3500
3501 if source == 'source':
dca08720 3502 try:
36eaf303 3503 sp = Popen(
3504 ['git', 'rev-parse', '--short', 'HEAD'],
3505 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3506 cwd=os.path.dirname(os.path.abspath(__file__)))
3507 out, err = sp.communicate_or_kill()
3508 out = out.decode().strip()
3509 if re.match('[0-9a-f]+', out):
3510 write_debug('Git HEAD: %s' % out)
70a1165b 3511 except Exception:
36eaf303 3512 try:
3513 sys.exc_clear()
3514 except Exception:
3515 pass
b300cda4
S
3516
3517 def python_implementation():
3518 impl_name = platform.python_implementation()
3519 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3520 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3521 return impl_name
3522
49a57e70 3523 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3524 platform.python_version(),
3525 python_implementation(),
3526 platform.architecture()[0],
b300cda4 3527 platform_name()))
d28b5171 3528
8913ef74 3529 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3530 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3531 if ffmpeg_features:
a4211baf 3532 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3533
4c83c967 3534 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3535 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3536 exe_str = ', '.join(
2831b468 3537 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3538 ) or 'none'
49a57e70 3539 write_debug('exe versions: %s' % exe_str)
dca08720 3540
2831b468 3541 from .downloader.websocket import has_websockets
3542 from .postprocessor.embedthumbnail import has_mutagen
3543 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3544
c586f9e8 3545 lib_str = join_nonempty(
edf65256 3546 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
c586f9e8 3547 KEYRING_AVAILABLE and 'keyring',
2831b468 3548 has_mutagen and 'mutagen',
3549 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3550 has_websockets and 'websockets',
3551 delim=', ') or 'none'
49a57e70 3552 write_debug('Optional libraries: %s' % lib_str)
2831b468 3553
dca08720
PH
3554 proxy_map = {}
3555 for handler in self._opener.handlers:
3556 if hasattr(handler, 'proxies'):
3557 proxy_map.update(handler.proxies)
49a57e70 3558 write_debug(f'Proxy map: {proxy_map}')
dca08720 3559
49a57e70 3560 # Not implemented
3561 if False and self.params.get('call_home'):
58b1f00d 3562 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3563 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3564 latest_version = self.urlopen(
3565 'https://yt-dl.org/latest/version').read().decode('utf-8')
3566 if version_tuple(latest_version) > version_tuple(__version__):
3567 self.report_warning(
3568 'You are using an outdated version (newest version: %s)! '
3569 'See https://yt-dl.org/update if you need help updating.' %
3570 latest_version)
3571
e344693b 3572 def _setup_opener(self):
6ad14cab 3573 timeout_val = self.params.get('socket_timeout')
17bddf3e 3574 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3575
982ee69a 3576 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3577 opts_cookiefile = self.params.get('cookiefile')
3578 opts_proxy = self.params.get('proxy')
3579
982ee69a 3580 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3581
6a3f4c3f 3582 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3583 if opts_proxy is not None:
3584 if opts_proxy == '':
3585 proxies = {}
3586 else:
3587 proxies = {'http': opts_proxy, 'https': opts_proxy}
3588 else:
3589 proxies = compat_urllib_request.getproxies()
067aa17e 3590 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3591 if 'http' in proxies and 'https' not in proxies:
3592 proxies['https'] = proxies['http']
91410c9b 3593 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3594
3595 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3596 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3597 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3598 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3599 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3600
3601 # When passing our own FileHandler instance, build_opener won't add the
3602 # default FileHandler and allows us to disable the file protocol, which
3603 # can be used for malicious purposes (see
067aa17e 3604 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3605 file_handler = compat_urllib_request.FileHandler()
3606
3607 def file_open(*args, **kwargs):
7a5c1cfe 3608 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3609 file_handler.file_open = file_open
3610
3611 opener = compat_urllib_request.build_opener(
fca6dba8 3612 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3613
dca08720
PH
3614 # Delete the default user-agent header, which would otherwise apply in
3615 # cases where our custom HTTP handler doesn't come into play
067aa17e 3616 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3617 opener.addheaders = []
3618 self._opener = opener
62fec3b2
PH
3619
3620 def encode(self, s):
3621 if isinstance(s, bytes):
3622 return s # Already encoded
3623
3624 try:
3625 return s.encode(self.get_encoding())
3626 except UnicodeEncodeError as err:
3627 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3628 raise
3629
3630 def get_encoding(self):
3631 encoding = self.params.get('encoding')
3632 if encoding is None:
3633 encoding = preferredencoding()
3634 return encoding
ec82d85a 3635
e08a85d8 3636 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
80c03fa9 3637 ''' Write infojson and returns True = written, False = skip, None = error '''
e08a85d8 3638 if overwrite is None:
3639 overwrite = self.params.get('overwrites', True)
80c03fa9 3640 if not self.params.get('writeinfojson'):
3641 return False
3642 elif not infofn:
3643 self.write_debug(f'Skipping writing {label} infojson')
3644 return False
3645 elif not self._ensure_dir_exists(infofn):
3646 return None
e08a85d8 3647 elif not overwrite and os.path.exists(infofn):
80c03fa9 3648 self.to_screen(f'[info] {label.title()} metadata is already present')
3649 else:
3650 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3651 try:
3652 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3653 except (OSError, IOError):
3654 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3655 return None
3656 return True
3657
3658 def _write_description(self, label, ie_result, descfn):
3659 ''' Write description and returns True = written, False = skip, None = error '''
3660 if not self.params.get('writedescription'):
3661 return False
3662 elif not descfn:
3663 self.write_debug(f'Skipping writing {label} description')
3664 return False
3665 elif not self._ensure_dir_exists(descfn):
3666 return None
3667 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3668 self.to_screen(f'[info] {label.title()} description is already present')
3669 elif ie_result.get('description') is None:
3670 self.report_warning(f'There\'s no {label} description to write')
3671 return False
3672 else:
3673 try:
3674 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3675 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3676 descfile.write(ie_result['description'])
3677 except (OSError, IOError):
3678 self.report_error(f'Cannot write {label} description file {descfn}')
3679 return None
3680 return True
3681
3682 def _write_subtitles(self, info_dict, filename):
3683 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3684 ret = []
3685 subtitles = info_dict.get('requested_subtitles')
3686 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3687 # subtitles download errors are already managed as troubles in relevant IE
3688 # that way it will silently go on when used with unsupporting IE
3689 return ret
3690
3691 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3692 if not sub_filename_base:
3693 self.to_screen('[info] Skipping writing video subtitles')
3694 return ret
3695 for sub_lang, sub_info in subtitles.items():
3696 sub_format = sub_info['ext']
3697 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3698 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3699 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3700 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3701 sub_info['filepath'] = sub_filename
3702 ret.append((sub_filename, sub_filename_final))
3703 continue
3704
3705 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3706 if sub_info.get('data') is not None:
3707 try:
3708 # Use newline='' to prevent conversion of newline characters
3709 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3710 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3711 subfile.write(sub_info['data'])
3712 sub_info['filepath'] = sub_filename
3713 ret.append((sub_filename, sub_filename_final))
3714 continue
3715 except (OSError, IOError):
3716 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3717 return None
3718
3719 try:
3720 sub_copy = sub_info.copy()
3721 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3722 self.dl(sub_filename, sub_copy, subtitle=True)
3723 sub_info['filepath'] = sub_filename
3724 ret.append((sub_filename, sub_filename_final))
3725 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3726 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3727 continue
519804a9 3728 return ret
80c03fa9 3729
3730 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3731 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3732 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3733 thumbnails, ret = [], []
6c4fd172 3734 if write_all or self.params.get('writethumbnail', False):
0202b52a 3735 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3736 multiple = write_all and len(thumbnails) > 1
ec82d85a 3737
80c03fa9 3738 if thumb_filename_base is None:
3739 thumb_filename_base = filename
3740 if thumbnails and not thumb_filename_base:
3741 self.write_debug(f'Skipping writing {label} thumbnail')
3742 return ret
3743
dd0228ce 3744 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3745 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3746 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3747 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3748 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3749
80c03fa9 3750 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3751 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3752 t['filepath'] = thumb_filename
aa9369a2 3753 self.to_screen('[info] %s is already present' % (
3754 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
ec82d85a 3755 else:
80c03fa9 3756 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3757 try:
3758 uf = self.urlopen(t['url'])
80c03fa9 3759 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3760 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3761 shutil.copyfileobj(uf, thumbf)
80c03fa9 3762 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3763 t['filepath'] = thumb_filename
3158150c 3764 except network_exceptions as err:
dd0228ce 3765 thumbnails.pop(idx)
80c03fa9 3766 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3767 if ret and not write_all:
3768 break
0202b52a 3769 return ret