]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[vk] Improve _VALID_URL (#2207)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
003c69a8 35 compat_get_terminal_size,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
edf65256 39 compat_pycrypto_AES,
7d1eb38a 40 compat_shlex_quote,
ce02ed60 41 compat_str,
67134eab 42 compat_tokenize_tokenize,
ce02ed60
PH
43 compat_urllib_error,
44 compat_urllib_request,
8b172c2e 45 compat_urllib_request_DataHandler,
819e0531 46 windows_enable_vt_mode,
8c25f81b 47)
982ee69a 48from .cookies import load_cookies
8c25f81b 49from .utils import (
eedb7ba5
S
50 age_restricted,
51 args_to_str,
ce02ed60
PH
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
acd69589 55 DEFAULT_OUTTMPL,
ce02ed60 56 determine_ext,
b5559424 57 determine_protocol,
48f79687 58 DownloadCancelled,
ce02ed60 59 DownloadError,
c0384f22 60 encode_compat_str,
ce02ed60 61 encodeFilename,
498f5606 62 EntryNotInPlaylist,
a06916d9 63 error_to_compat_str,
8b0d7497 64 ExistingVideoReached,
590bc6f6 65 expand_path,
ce02ed60 66 ExtractorError,
e29663c6 67 float_or_none,
02dbf93f 68 format_bytes,
76d321f6 69 format_field,
e0fd9573 70 format_decimal_suffix,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
0bb322b9 73 get_domain,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
34921b43 78 join_nonempty,
56a8fb4f 79 LazyList,
08438d2c 80 LINK_TEMPLATES,
ce02ed60 81 locked_file,
0202b52a 82 make_dir,
dca08720 83 make_HTTPS_handler,
ce02ed60 84 MaxDownloadsReached,
3158150c 85 network_exceptions,
ec11a9f4 86 number_of_digits,
cd6fc19e 87 orderedSet,
a06916d9 88 OUTTMPL_TYPES,
b7ab0590 89 PagedList,
083c9df9 90 parse_filesize,
91410c9b 91 PerRequestProxyHandler,
dca08720 92 platform_name,
d3c93ec2 93 Popen,
eedb7ba5 94 PostProcessingError,
ce02ed60 95 preferredencoding,
eedb7ba5 96 prepend_extension,
f2ebc5c7 97 ReExtractInfo,
51fb4995 98 register_socks_protocols,
a06916d9 99 RejectedVideoReached,
3efb96a6 100 remove_terminal_sequences,
cfb56d1a 101 render_table,
eedb7ba5 102 replace_extension,
ce02ed60
PH
103 SameFileError,
104 sanitize_filename,
1bb5c511 105 sanitize_path,
dcf77cf1 106 sanitize_url,
67dda517 107 sanitized_Request,
e5660ee6 108 std_headers,
819e0531 109 STR_FORMAT_RE_TMPL,
110 STR_FORMAT_TYPES,
1211bb6d 111 str_or_none,
e29663c6 112 strftime_or_none,
ce02ed60 113 subtitles_filename,
819e0531 114 supports_terminal_sequences,
f2ebc5c7 115 timetuple_from_msec,
732044af 116 to_high_limit_path,
324ad820 117 traverse_obj,
6033d980 118 try_get,
ce02ed60 119 UnavailableVideoError,
29eb5174 120 url_basename,
7d1eb38a 121 variadic,
58b1f00d 122 version_tuple,
ce02ed60
PH
123 write_json_file,
124 write_string,
6a3f4c3f 125 YoutubeDLCookieProcessor,
dca08720 126 YoutubeDLHandler,
fca6dba8 127 YoutubeDLRedirectHandler,
ce02ed60 128)
a0e07d31 129from .cache import Cache
ec11a9f4 130from .minicurses import format_text
52a8a1e1 131from .extractor import (
132 gen_extractor_classes,
133 get_info_extractor,
134 _LAZY_LOADER,
3ae5e797 135 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 136)
4c54b89e 137from .extractor.openload import PhantomJSwrapper
52a8a1e1 138from .downloader import (
dbf5416a 139 FFmpegFD,
52a8a1e1 140 get_suitable_downloader,
141 shorten_protocol_name
142)
4c83c967 143from .downloader.rtmp import rtmpdump_version
4f026faf 144from .postprocessor import (
e36d50c5 145 get_postprocessor,
4e3b637d 146 EmbedThumbnailPP,
adbc4ec4 147 FFmpegFixupDuplicateMoovPP,
e36d50c5 148 FFmpegFixupDurationPP,
f17f8651 149 FFmpegFixupM3u8PP,
62cd676c 150 FFmpegFixupM4aPP,
6271f1ca 151 FFmpegFixupStretchedPP,
e36d50c5 152 FFmpegFixupTimestampPP,
4f026faf
PH
153 FFmpegMergerPP,
154 FFmpegPostProcessor,
0202b52a 155 MoveFilesAfterDownloadPP,
3ae5e797 156 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 157)
4c88ff87 158from .update import detect_variant
36eaf303 159from .version import __version__, RELEASE_GIT_HEAD
8222d8de 160
e9c0cdd3
YCH
161if compat_os_name == 'nt':
162 import ctypes
163
2459b6e1 164
8222d8de
JMF
165class YoutubeDL(object):
166 """YoutubeDL class.
167
168 YoutubeDL objects are the ones responsible of downloading the
169 actual video file and writing it to disk if the user has requested
170 it, among some other tasks. In most cases there should be one per
171 program. As, given a video URL, the downloader doesn't know how to
172 extract all the needed information, task that InfoExtractors do, it
173 has to pass the URL to one of them.
174
175 For this, YoutubeDL objects have a method that allows
176 InfoExtractors to be registered in a given order. When it is passed
177 a URL, the YoutubeDL object handles it to the first InfoExtractor it
178 finds that reports being able to handle it. The InfoExtractor extracts
179 all the information about the video or videos the URL refers to, and
180 YoutubeDL process the extracted information, possibly using a File
181 Downloader to download the video.
182
183 YoutubeDL objects accept a lot of parameters. In order not to saturate
184 the object constructor with arguments, it receives a dictionary of
185 options instead. These options are available through the params
186 attribute for the InfoExtractors to use. The YoutubeDL also
187 registers itself as the downloader in charge for the InfoExtractors
188 that are added to it, so this is a "mutual registration".
189
190 Available options:
191
192 username: Username for authentication purposes.
193 password: Password for authentication purposes.
180940e0 194 videopassword: Password for accessing a video.
1da50aa3
S
195 ap_mso: Adobe Pass multiple-system operator identifier.
196 ap_username: Multiple-system operator account username.
197 ap_password: Multiple-system operator account password.
8222d8de
JMF
198 usenetrc: Use netrc for authentication instead.
199 verbose: Print additional info to stdout.
200 quiet: Do not print messages to stdout.
ad8915b7 201 no_warnings: Do not print out anything for warnings.
53c18592 202 forceprint: A list of templates to force print
203 forceurl: Force printing final URL. (Deprecated)
204 forcetitle: Force printing title. (Deprecated)
205 forceid: Force printing ID. (Deprecated)
206 forcethumbnail: Force printing thumbnail URL. (Deprecated)
207 forcedescription: Force printing description. (Deprecated)
208 forcefilename: Force printing final filename. (Deprecated)
209 forceduration: Force printing duration. (Deprecated)
8694c600 210 forcejson: Force printing info_dict as JSON.
63e0be34
PH
211 dump_single_json: Force printing the info_dict of the whole playlist
212 (or video) as a single JSON line.
c25228e5 213 force_write_download_archive: Force writing download archive regardless
214 of 'skip_download' or 'simulate'.
b7b04c78 215 simulate: Do not download the video files. If unset (or None),
216 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 217 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 218 You can also pass a function. The function takes 'ctx' as
219 argument and returns the formats to download.
220 See "build_format_selector" for an implementation
63ad4d43 221 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 222 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
223 extracting metadata even if the video is not actually
224 available for download (experimental)
0930b11f 225 format_sort: A list of fields by which to sort the video formats.
226 See "Sorting Formats" for more details.
c25228e5 227 format_sort_force: Force the given format_sort. see "Sorting Formats"
228 for more details.
229 allow_multiple_video_streams: Allow multiple video streams to be merged
230 into a single file
231 allow_multiple_audio_streams: Allow multiple audio streams to be merged
232 into a single file
0ba692ac 233 check_formats Whether to test if the formats are downloadable.
9f1a1c36 234 Can be True (check all), False (check none),
235 'selected' (check selected formats),
0ba692ac 236 or None (check only if requested by extractor)
4524baf0 237 paths: Dictionary of output paths. The allowed keys are 'home'
238 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 239 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 240 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 241 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
242 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
243 restrictfilenames: Do not allow "&" and spaces in file names
244 trim_file_name: Limit length of filename (extension excluded)
4524baf0 245 windowsfilenames: Force the filenames to be windows compatible
b1940459 246 ignoreerrors: Do not stop on download/postprocessing errors.
247 Can be 'only_download' to ignore only download errors.
248 Default is 'only_download' for CLI, but False for API
26e2805c 249 skip_playlist_after_errors: Number of allowed failures until the rest of
250 the playlist is skipped
d22dec74 251 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 252 overwrites: Overwrite all video and metadata files if True,
253 overwrite only non-video files if None
254 and don't overwrite any file if False
34488702 255 For compatibility with youtube-dl,
256 "nooverwrites" may also be used instead
8222d8de
JMF
257 playliststart: Playlist item to start at.
258 playlistend: Playlist item to end at.
c14e88f0 259 playlist_items: Specific indices of playlist to download.
ff815fe6 260 playlistreverse: Download playlist items in reverse order.
75822ca7 261 playlistrandom: Download playlist items in random order.
8222d8de
JMF
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
8bf9319e 264 logger: Log messages to a logging.Logger instance.
8222d8de 265 logtostderr: Log messages to stderr instead of stdout.
819e0531 266 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
75d43ca0 269 clean_infojson: Remove private fields from the infojson
34488702 270 getcomments: Extract video comments. This will not be written to disk
06167fbb 271 unless writeinfojson is also given
1fb07d10 272 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 273 writethumbnail: Write the thumbnail image to a file
c25228e5 274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
ec82d85a 276 write_all_thumbnails: Write all thumbnail formats to files
732044af 277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 282 writesubtitles: Write the video subtitles to a file
741dd8ea 283 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 284 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 285 Downloads all the subtitles of the video
0b7f3118 286 (requires writesubtitles or writeautomaticsub)
8222d8de 287 listsubtitles: Lists all available subtitles for the video
a504ced0 288 subtitlesformat: The format code for subtitles
c32b0aab 289 subtitleslangs: List of languages of the subtitles to download (can be regex).
290 The list may contain "all" to refer to all the available
291 subtitles. The language can be prefixed with a "-" to
292 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
293 keepvideo: Keep the video file after post-processing
294 daterange: A DateRange object, download only if the upload_date is in the range.
295 skip_download: Skip the actual download of the video file
c35f9e72 296 cachedir: Location of the cache files in the filesystem.
a0e07d31 297 False to disable filesystem cache.
47192f92 298 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
299 age_limit: An integer representing the user's age in years.
300 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
301 min_views: An integer representing the minimum view count the video
302 must have in order to not be skipped.
303 Videos without view count information are always
304 downloaded. None for no limit.
305 max_views: An integer representing the maximum view count.
306 Videos that are more popular than that are not
307 downloaded.
308 Videos without view count information are always
309 downloaded. None for no limit.
310 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
311 Videos already present in the file are not downloaded
312 again.
8a51f564 313 break_on_existing: Stop the download process after attempting to download a
314 file that is in the archive.
315 break_on_reject: Stop the download process when encountering a video that
316 has been filtered out.
b222c271 317 break_per_url: Whether break_on_reject and break_on_existing
318 should act on each input URL as opposed to for the entire queue
8a51f564 319 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
320 cookiesfrombrowser: A tuple containing the name of the browser, the profile
321 name/pathfrom where cookies are loaded, and the name of the
322 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
323 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
324 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
325 At the moment, this is only supported by YouTube.
a1ee09e8 326 proxy: URL of the proxy server to use
38cce791 327 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 328 on geo-restricted sites.
e344693b 329 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
330 bidi_workaround: Work around buggy terminals without bidirectional text
331 support, using fridibi
a0ddb8a2 332 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 333 include_ads: Download ads as well (deprecated)
04b4d394
PH
334 default_search: Prepend this string if an input url is not valid.
335 'auto' for elaborate guessing
62fec3b2 336 encoding: Use this encoding instead of the system-specified.
e8ee972c 337 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
338 Pass in 'in_playlist' to only show this behavior for
339 playlist items.
f2ebc5c7 340 wait_for_video: If given, wait for scheduled streams to become available.
341 The value should be a tuple containing the range
342 (min_secs, max_secs) to wait between retries
4f026faf 343 postprocessors: A list of dictionaries, each with an entry
71b640cc 344 * key: The name of the postprocessor. See
7a5c1cfe 345 yt_dlp/postprocessor/__init__.py for a list.
56d868db 346 * when: When to run the postprocessor. Can be one of
347 pre_process|before_dl|post_process|after_move.
348 Assumed to be 'post_process' if not given
b5ae35ee 349 post_hooks: Deprecated - Register a custom postprocessor instead
350 A list of functions that get called as the final step
ab8e5e51
AM
351 for each video file, after all postprocessors have been
352 called. The filename will be passed as the only argument.
71b640cc
PH
353 progress_hooks: A list of functions that get called on download
354 progress, with a dictionary with the entries
5cda4eda 355 * status: One of "downloading", "error", or "finished".
ee69b99a 356 Check this first and ignore unknown values.
3ba7740d 357 * info_dict: The extracted info_dict
71b640cc 358
5cda4eda 359 If status is one of "downloading", or "finished", the
ee69b99a
PH
360 following properties may also be present:
361 * filename: The final filename (always present)
5cda4eda 362 * tmpfilename: The filename we're currently writing to
71b640cc
PH
363 * downloaded_bytes: Bytes on disk
364 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
365 * total_bytes_estimate: Guess of the eventual file size,
366 None if unavailable.
367 * elapsed: The number of seconds since download started.
71b640cc
PH
368 * eta: The estimated time in seconds, None if unknown
369 * speed: The download speed in bytes/second, None if
370 unknown
5cda4eda
PH
371 * fragment_index: The counter of the currently
372 downloaded video fragment.
373 * fragment_count: The number of fragments (= individual
374 files that will be merged)
71b640cc
PH
375
376 Progress hooks are guaranteed to be called at least once
377 (with status "finished") if the download is successful.
819e0531 378 postprocessor_hooks: A list of functions that get called on postprocessing
379 progress, with a dictionary with the entries
380 * status: One of "started", "processing", or "finished".
381 Check this first and ignore unknown values.
382 * postprocessor: Name of the postprocessor
383 * info_dict: The extracted info_dict
384
385 Progress hooks are guaranteed to be called at least twice
386 (with status "started" and "finished") if the processing is successful.
45598f15 387 merge_output_format: Extension to use when merging formats.
6b591b29 388 final_ext: Expected final extension; used to detect when the file was
59a7a13e 389 already downloaded and converted
6271f1ca
PH
390 fixup: Automatically correct known faults of the file.
391 One of:
392 - "never": do nothing
393 - "warn": only emit a warning
394 - "detect_or_warn": check whether we can do anything
62cd676c 395 about it, warn otherwise (default)
504f20dd 396 source_address: Client-side IP address to bind to.
6ec6cb4e 397 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 398 yt-dlp servers for debugging. (BROKEN)
1cf376f5 399 sleep_interval_requests: Number of seconds to sleep between requests
400 during extraction
7aa589a5
S
401 sleep_interval: Number of seconds to sleep before each download when
402 used alone or a lower bound of a range for randomized
403 sleep before each download (minimum possible number
404 of seconds to sleep) when used along with
405 max_sleep_interval.
406 max_sleep_interval:Upper bound of a range for randomized sleep before each
407 download (maximum possible number of seconds to sleep).
408 Must only be used along with sleep_interval.
409 Actual sleep time will be a random float from range
410 [sleep_interval; max_sleep_interval].
1cf376f5 411 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
412 listformats: Print an overview of available video formats and exit.
413 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
414 match_filter: A function that gets called with the info_dict of
415 every video.
416 If it returns a message, the video is ignored.
417 If it returns None, the video is downloaded.
418 match_filter_func in utils.py is one example for this.
7e5db8c9 419 no_color: Do not emit color codes in output.
0a840f58 420 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 421 HTTP header
0a840f58 422 geo_bypass_country:
773f291d
S
423 Two-letter ISO 3166-2 country code that will be used for
424 explicit geographic restriction bypassing via faking
504f20dd 425 X-Forwarded-For HTTP header
5f95927a
S
426 geo_bypass_ip_block:
427 IP range in CIDR notation that will be used similarly to
504f20dd 428 geo_bypass_country
71b640cc 429
85729c51 430 The following options determine which downloader is picked:
52a8a1e1 431 external_downloader: A dictionary of protocol keys and the executable of the
432 external downloader to use for it. The allowed protocols
433 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
434 Set the value to 'native' to use the native downloader
435 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
436 or {'m3u8': 'ffmpeg'} instead.
437 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
438 if True, otherwise use ffmpeg/avconv if False, otherwise
439 use downloader suggested by extractor if None.
53ed7066 440 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 441 The following options do not work when used through the API:
b5ae35ee 442 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 443 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 444 Refer __init__.py for their implementation
819e0531 445 progress_template: Dictionary of templates for progress outputs.
446 Allowed keys are 'download', 'postprocess',
447 'download-title' (console title) and 'postprocess-title'.
448 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 449
8222d8de 450 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 451 the downloader (see yt_dlp/downloader/common.py):
51d9739f 452 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
453 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
454 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 455 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
456
457 The following options are used by the post processors:
d4a24f40 458 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 459 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
460 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
461 to the binary or its containing directory.
43820c03 462 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 463 and a list of additional command-line arguments for the
464 postprocessor/executable. The dict can also have "PP+EXE" keys
465 which are used when the given exe is used by the given PP.
466 Use 'default' as the name for arguments to passed to all PP
467 For compatibility with youtube-dl, a single list of args
468 can also be used
e409895f 469
470 The following options are used by the extractors:
62bff2c1 471 extractor_retries: Number of times to retry for known errors
472 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 473 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 474 discontinuities such as ad breaks (default: False)
5d3a0e79 475 extractor_args: A dictionary of arguments to be passed to the extractors.
476 See "EXTRACTOR ARGUMENTS" for details.
477 Eg: {'youtube': {'skip': ['dash', 'hls']}}
478 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
479 If True (default), DASH manifests and related
62bff2c1 480 data will be downloaded and processed by extractor.
481 You can reduce network I/O by disabling it if you don't
482 care about DASH. (only for youtube)
5d3a0e79 483 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
484 If True (default), HLS manifests and related
62bff2c1 485 data will be downloaded and processed by extractor.
486 You can reduce network I/O by disabling it if you don't
487 care about HLS. (only for youtube)
8222d8de
JMF
488 """
489
c9969434
S
490 _NUMERIC_FIELDS = set((
491 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 492 'timestamp', 'release_timestamp',
c9969434
S
493 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
494 'average_rating', 'comment_count', 'age_limit',
495 'start_time', 'end_time',
496 'chapter_number', 'season_number', 'episode_number',
497 'track_number', 'disc_number', 'release_year',
c9969434
S
498 ))
499
48ee10ee 500 _format_selection_exts = {
501 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
502 'video': {'mp4', 'flv', 'webm', '3gp'},
503 'storyboards': {'mhtml'},
504 }
505
8222d8de 506 params = None
8b7491c8 507 _ies = {}
56d868db 508 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 509 _printed_messages = set()
1cf376f5 510 _first_webpage_request = True
8222d8de
JMF
511 _download_retcode = None
512 _num_downloads = None
30a074c2 513 _playlist_level = 0
514 _playlist_urls = set()
8222d8de
JMF
515 _screen_file = None
516
3511266b 517 def __init__(self, params=None, auto_init=True):
883d4b1e 518 """Create a FileDownloader object with the given options.
519 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 520 Set to 'no_verbose_header' to not print the header
883d4b1e 521 """
e9f9a10f
JMF
522 if params is None:
523 params = {}
8b7491c8 524 self._ies = {}
56c73665 525 self._ies_instances = {}
56d868db 526 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 527 self._printed_messages = set()
1cf376f5 528 self._first_webpage_request = True
ab8e5e51 529 self._post_hooks = []
933605d7 530 self._progress_hooks = []
819e0531 531 self._postprocessor_hooks = []
8222d8de
JMF
532 self._download_retcode = 0
533 self._num_downloads = 0
534 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 535 self._err_file = sys.stderr
819e0531 536 self.params = params
a0e07d31 537 self.cache = Cache(self)
34308b30 538
819e0531 539 windows_enable_vt_mode()
ec11a9f4 540 self._allow_colors = {
541 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
542 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
543 }
819e0531 544
a61f4b28 545 if sys.version_info < (3, 6):
546 self.report_warning(
0181adef 547 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 548
88acdbc2 549 if self.params.get('allow_unplayable_formats'):
550 self.report_warning(
ec11a9f4 551 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 552 'This is a developer option intended for debugging. \n'
553 ' If you experience any issues while using this option, '
ec11a9f4 554 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 555
be5df5ee
S
556 def check_deprecated(param, option, suggestion):
557 if self.params.get(param) is not None:
53ed7066 558 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
559 return True
560 return False
561
562 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
563 if self.params.get('geo_verification_proxy') is None:
564 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
565
0d1bb027 566 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
567 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 568 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 569
49a57e70 570 for msg in self.params.get('_warnings', []):
0d1bb027 571 self.report_warning(msg)
ee8dd27a 572 for msg in self.params.get('_deprecation_warnings', []):
573 self.deprecation_warning(msg)
0d1bb027 574
ec11a9f4 575 if 'list-formats' in self.params.get('compat_opts', []):
576 self.params['listformats_table'] = False
577
b5ae35ee 578 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 579 # nooverwrites was unnecessarily changed to overwrites
580 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
581 # This ensures compatibility with both keys
582 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 583 elif self.params.get('overwrites') is None:
584 self.params.pop('overwrites', None)
b868936c 585 else:
586 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 587
0783b09b 588 if params.get('bidi_workaround', False):
1c088fa8
PH
589 try:
590 import pty
591 master, slave = pty.openpty()
003c69a8 592 width = compat_get_terminal_size().columns
1c088fa8
PH
593 if width is None:
594 width_args = []
595 else:
596 width_args = ['-w', str(width)]
5d681e96 597 sp_kwargs = dict(
1c088fa8
PH
598 stdin=subprocess.PIPE,
599 stdout=slave,
600 stderr=self._err_file)
5d681e96 601 try:
d3c93ec2 602 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 603 except OSError:
d3c93ec2 604 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 605 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 606 except OSError as ose:
66e7ace1 607 if ose.errno == errno.ENOENT:
49a57e70 608 self.report_warning(
609 'Could not find fribidi executable, ignoring --bidi-workaround. '
610 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
611 else:
612 raise
0783b09b 613
3089bc74
S
614 if (sys.platform != 'win32'
615 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
616 and not params.get('restrictfilenames', False)):
e9137224 617 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 618 self.report_warning(
6febd1c1 619 'Assuming --restrict-filenames since file system encoding '
1b725173 620 'cannot encode all characters. '
6febd1c1 621 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 622 self.params['restrictfilenames'] = True
34308b30 623
de6000d9 624 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 625
187986a8 626 # Creating format selector here allows us to catch syntax errors before the extraction
627 self.format_selector = (
fa9f30b8 628 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 629 else self.params['format'] if callable(self.params['format'])
187986a8 630 else self.build_format_selector(self.params['format']))
631
dca08720
PH
632 self._setup_opener()
633
3511266b 634 if auto_init:
883d4b1e 635 if auto_init != 'no_verbose_header':
636 self.print_debug_header()
3511266b
PH
637 self.add_default_info_extractors()
638
013b50b7 639 hooks = {
640 'post_hooks': self.add_post_hook,
641 'progress_hooks': self.add_progress_hook,
642 'postprocessor_hooks': self.add_postprocessor_hook,
643 }
644 for opt, fn in hooks.items():
645 for ph in self.params.get(opt, []):
646 fn(ph)
71b640cc 647
5bfc8bee 648 for pp_def_raw in self.params.get('postprocessors', []):
649 pp_def = dict(pp_def_raw)
650 when = pp_def.pop('when', 'post_process')
651 self.add_post_processor(
652 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
653 when=when)
654
51fb4995
YCH
655 register_socks_protocols()
656
ed39cac5 657 def preload_download_archive(fn):
658 """Preload the archive, if any is specified"""
659 if fn is None:
660 return False
49a57e70 661 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 662 try:
663 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
664 for line in archive_file:
665 self.archive.add(line.strip())
666 except IOError as ioe:
667 if ioe.errno != errno.ENOENT:
668 raise
669 return False
670 return True
671
672 self.archive = set()
673 preload_download_archive(self.params.get('download_archive'))
674
7d4111ed
PH
675 def warn_if_short_id(self, argv):
676 # short YouTube ID starting with dash?
677 idxs = [
678 i for i, a in enumerate(argv)
679 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
680 if idxs:
681 correct_argv = (
7a5c1cfe 682 ['yt-dlp']
3089bc74
S
683 + [a for i, a in enumerate(argv) if i not in idxs]
684 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
685 )
686 self.report_warning(
687 'Long argument string detected. '
49a57e70 688 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
689 args_to_str(correct_argv))
690
8222d8de
JMF
691 def add_info_extractor(self, ie):
692 """Add an InfoExtractor object to the end of the list."""
8b7491c8 693 ie_key = ie.ie_key()
694 self._ies[ie_key] = ie
e52d7f85 695 if not isinstance(ie, type):
8b7491c8 696 self._ies_instances[ie_key] = ie
e52d7f85 697 ie.set_downloader(self)
8222d8de 698
8b7491c8 699 def _get_info_extractor_class(self, ie_key):
700 ie = self._ies.get(ie_key)
701 if ie is None:
702 ie = get_info_extractor(ie_key)
703 self.add_info_extractor(ie)
704 return ie
705
56c73665
JMF
706 def get_info_extractor(self, ie_key):
707 """
708 Get an instance of an IE with name ie_key, it will try to get one from
709 the _ies list, if there's no instance it will create a new one and add
710 it to the extractor list.
711 """
712 ie = self._ies_instances.get(ie_key)
713 if ie is None:
714 ie = get_info_extractor(ie_key)()
715 self.add_info_extractor(ie)
716 return ie
717
023fa8c4
JMF
718 def add_default_info_extractors(self):
719 """
720 Add the InfoExtractors returned by gen_extractors to the end of the list
721 """
e52d7f85 722 for ie in gen_extractor_classes():
023fa8c4
JMF
723 self.add_info_extractor(ie)
724
56d868db 725 def add_post_processor(self, pp, when='post_process'):
8222d8de 726 """Add a PostProcessor object to the end of the chain."""
5bfa4862 727 self._pps[when].append(pp)
8222d8de
JMF
728 pp.set_downloader(self)
729
ab8e5e51
AM
730 def add_post_hook(self, ph):
731 """Add the post hook"""
732 self._post_hooks.append(ph)
733
933605d7 734 def add_progress_hook(self, ph):
819e0531 735 """Add the download progress hook"""
933605d7 736 self._progress_hooks.append(ph)
8ab470f1 737
819e0531 738 def add_postprocessor_hook(self, ph):
739 """Add the postprocessing progress hook"""
740 self._postprocessor_hooks.append(ph)
5bfc8bee 741 for pps in self._pps.values():
742 for pp in pps:
743 pp.add_progress_hook(ph)
819e0531 744
1c088fa8 745 def _bidi_workaround(self, message):
5d681e96 746 if not hasattr(self, '_output_channel'):
1c088fa8
PH
747 return message
748
5d681e96 749 assert hasattr(self, '_output_process')
11b85ce6 750 assert isinstance(message, compat_str)
6febd1c1
PH
751 line_count = message.count('\n') + 1
752 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 753 self._output_process.stdin.flush()
6febd1c1 754 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 755 for _ in range(line_count))
6febd1c1 756 return res[:-len('\n')]
1c088fa8 757
b35496d8 758 def _write_string(self, message, out=None, only_once=False):
759 if only_once:
760 if message in self._printed_messages:
761 return
762 self._printed_messages.add(message)
763 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 764
848887eb 765 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 766 """Print message to stdout"""
8bf9319e 767 if self.params.get('logger'):
43afe285 768 self.params['logger'].debug(message)
835a1478 769 elif not quiet or self.params.get('verbose'):
770 self._write_string(
771 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
772 self._err_file if quiet else self._screen_file)
8222d8de 773
b35496d8 774 def to_stderr(self, message, only_once=False):
0760b0a7 775 """Print message to stderr"""
11b85ce6 776 assert isinstance(message, compat_str)
8bf9319e 777 if self.params.get('logger'):
43afe285
IB
778 self.params['logger'].error(message)
779 else:
b35496d8 780 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 781
1e5b9a95
PH
782 def to_console_title(self, message):
783 if not self.params.get('consoletitle', False):
784 return
3efb96a6 785 message = remove_terminal_sequences(message)
4bede0d8
C
786 if compat_os_name == 'nt':
787 if ctypes.windll.kernel32.GetConsoleWindow():
788 # c_wchar_p() might not be necessary if `message` is
789 # already of type unicode()
790 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 791 elif 'TERM' in os.environ:
b46696bd 792 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 793
bdde425c
PH
794 def save_console_title(self):
795 if not self.params.get('consoletitle', False):
796 return
b7b04c78 797 if self.params.get('simulate'):
94c3442e 798 return
4bede0d8 799 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 800 # Save the title on stack
734f90bb 801 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
802
803 def restore_console_title(self):
804 if not self.params.get('consoletitle', False):
805 return
b7b04c78 806 if self.params.get('simulate'):
94c3442e 807 return
4bede0d8 808 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 809 # Restore the title from stack
734f90bb 810 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
811
812 def __enter__(self):
813 self.save_console_title()
814 return self
815
816 def __exit__(self, *args):
817 self.restore_console_title()
f89197d7 818
dca08720 819 if self.params.get('cookiefile') is not None:
1bab3437 820 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 821
fa9f30b8 822 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
823 """Determine action to take when a download problem appears.
824
825 Depending on if the downloader has been configured to ignore
826 download errors or not, this method may throw an exception or
827 not when errors are found, after printing the message.
828
fa9f30b8 829 @param tb If given, is additional traceback information
830 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
831 """
832 if message is not None:
833 self.to_stderr(message)
834 if self.params.get('verbose'):
835 if tb is None:
836 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 837 tb = ''
8222d8de 838 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 839 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 840 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
841 else:
842 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 843 tb = ''.join(tb_data)
c19bc311 844 if tb:
845 self.to_stderr(tb)
fa9f30b8 846 if not is_error:
847 return
b1940459 848 if not self.params.get('ignoreerrors'):
8222d8de
JMF
849 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
850 exc_info = sys.exc_info()[1].exc_info
851 else:
852 exc_info = sys.exc_info()
853 raise DownloadError(message, exc_info)
854 self._download_retcode = 1
855
0760b0a7 856 def to_screen(self, message, skip_eol=False):
857 """Print message to stdout if not in quiet mode"""
858 self.to_stdout(
859 message, skip_eol, quiet=self.params.get('quiet', False))
860
ec11a9f4 861 class Styles(Enum):
862 HEADERS = 'yellow'
f304da8a 863 EMPHASIS = 'light blue'
ec11a9f4 864 ID = 'green'
865 DELIM = 'blue'
866 ERROR = 'red'
867 WARNING = 'yellow'
ff51ed58 868 SUPPRESS = 'light black'
ec11a9f4 869
7578d77d 870 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 871 if test_encoding:
872 original_text = text
ec11a9f4 873 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
874 text = text.encode(encoding, 'ignore').decode(encoding)
875 if fallback is not None and text != original_text:
876 text = fallback
877 if isinstance(f, self.Styles):
f304da8a 878 f = f.value
7578d77d 879 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 880
881 def _format_screen(self, *args, **kwargs):
7578d77d 882 return self._format_text(
883 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 884
885 def _format_err(self, *args, **kwargs):
7578d77d 886 return self._format_text(
887 self._err_file, self._allow_colors['err'], *args, **kwargs)
819e0531 888
c84aeac6 889 def report_warning(self, message, only_once=False):
8222d8de
JMF
890 '''
891 Print the message to stderr, it will be prefixed with 'WARNING:'
892 If stderr is a tty file the 'WARNING:' will be colored
893 '''
6d07ce01
JMF
894 if self.params.get('logger') is not None:
895 self.params['logger'].warning(message)
8222d8de 896 else:
ad8915b7
PH
897 if self.params.get('no_warnings'):
898 return
ec11a9f4 899 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 900
ee8dd27a 901 def deprecation_warning(self, message):
902 if self.params.get('logger') is not None:
903 self.params['logger'].warning('DeprecationWarning: {message}')
904 else:
905 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
906
fa9f30b8 907 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
908 '''
909 Do the same as trouble, but prefixes the message with 'ERROR:', colored
910 in red if stderr is a tty file.
911 '''
fa9f30b8 912 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 913
b35496d8 914 def write_debug(self, message, only_once=False):
0760b0a7 915 '''Log debug message or Print message to stderr'''
916 if not self.params.get('verbose', False):
917 return
918 message = '[debug] %s' % message
919 if self.params.get('logger'):
920 self.params['logger'].debug(message)
921 else:
b35496d8 922 self.to_stderr(message, only_once)
0760b0a7 923
8222d8de
JMF
924 def report_file_already_downloaded(self, file_name):
925 """Report file has already been fully downloaded."""
926 try:
6febd1c1 927 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 928 except UnicodeEncodeError:
6febd1c1 929 self.to_screen('[download] The file has already been downloaded')
8222d8de 930
0c3d0f51 931 def report_file_delete(self, file_name):
932 """Report that existing file will be deleted."""
933 try:
c25228e5 934 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 935 except UnicodeEncodeError:
c25228e5 936 self.to_screen('Deleting existing file')
0c3d0f51 937
1151c407 938 def raise_no_formats(self, info, forced=False):
939 has_drm = info.get('__has_drm')
88acdbc2 940 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
941 expected = self.params.get('ignore_no_formats_error')
942 if forced or not expected:
1151c407 943 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
944 expected=has_drm or expected)
88acdbc2 945 else:
946 self.report_warning(msg)
947
de6000d9 948 def parse_outtmpl(self):
949 outtmpl_dict = self.params.get('outtmpl', {})
950 if not isinstance(outtmpl_dict, dict):
951 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 952 # Remove spaces in the default template
953 if self.params.get('restrictfilenames'):
954 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
955 else:
956 sanitize = lambda x: x
de6000d9 957 outtmpl_dict.update({
71ce444a 958 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 959 if outtmpl_dict.get(k) is None})
de6000d9 960 for key, val in outtmpl_dict.items():
961 if isinstance(val, bytes):
962 self.report_warning(
963 'Parameter outtmpl is bytes, but should be a unicode string. '
964 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
965 return outtmpl_dict
966
21cd8fae 967 def get_output_path(self, dir_type='', filename=None):
968 paths = self.params.get('paths', {})
969 assert isinstance(paths, dict)
970 path = os.path.join(
971 expand_path(paths.get('home', '').strip()),
972 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
973 filename or '')
974
975 # Temporary fix for #4787
976 # 'Treat' all problem characters by passing filename through preferredencoding
977 # to workaround encoding issues with subprocess on python2 @ Windows
978 if sys.version_info < (3, 0) and sys.platform == 'win32':
979 path = encodeFilename(path, True).decode(preferredencoding())
980 return sanitize_path(path, force=self.params.get('windowsfilenames'))
981
76a264ac 982 @staticmethod
901130bb 983 def _outtmpl_expandpath(outtmpl):
984 # expand_path translates '%%' into '%' and '$$' into '$'
985 # correspondingly that is not what we want since we need to keep
986 # '%%' intact for template dict substitution step. Working around
987 # with boundary-alike separator hack.
988 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
989 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
990
991 # outtmpl should be expand_path'ed before template dict substitution
992 # because meta fields may contain env variables we don't want to
993 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
994 # title "Hello $PATH", we don't want `$PATH` to be expanded.
995 return expand_path(outtmpl).replace(sep, '')
996
997 @staticmethod
998 def escape_outtmpl(outtmpl):
999 ''' Escape any remaining strings like %s, %abc% etc. '''
1000 return re.sub(
1001 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1002 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1003 outtmpl)
1004
1005 @classmethod
1006 def validate_outtmpl(cls, outtmpl):
76a264ac 1007 ''' @return None or Exception object '''
7d1eb38a 1008 outtmpl = re.sub(
37893bb0 1009 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1010 lambda mobj: f'{mobj.group(0)[:-1]}s',
1011 cls._outtmpl_expandpath(outtmpl))
76a264ac 1012 try:
7d1eb38a 1013 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1014 return None
1015 except ValueError as err:
1016 return err
1017
03b4de72 1018 @staticmethod
1019 def _copy_infodict(info_dict):
1020 info_dict = dict(info_dict)
1021 for key in ('__original_infodict', '__postprocessors'):
1022 info_dict.pop(key, None)
1023 return info_dict
1024
e0fd9573 1025 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1026 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1027 @param sanitize Whether to sanitize the output as a filename.
1028 For backward compatibility, a function can also be passed
1029 """
1030
6e84b215 1031 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1032
03b4de72 1033 info_dict = self._copy_infodict(info_dict)
752cda38 1034 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1035 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1036 if info_dict.get('duration', None) is not None
1037 else None)
752cda38 1038 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1039 if info_dict.get('resolution') is None:
1040 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1041
e6f21b3d 1042 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1043 # of %(field)s to %(field)0Nd for backward compatibility
1044 field_size_compat_map = {
ec11a9f4 1045 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1046 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1047 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1048 }
752cda38 1049
385a27fa 1050 TMPL_DICT = {}
37893bb0 1051 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1052 MATH_FUNCTIONS = {
1053 '+': float.__add__,
1054 '-': float.__sub__,
1055 }
e625be0d 1056 # Field is of the form key1.key2...
1057 # where keys (except first) can be string, int or slice
2b8a2973 1058 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1059 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1060 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1061 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1062 (?P<negate>-)?
385a27fa 1063 (?P<fields>{field})
1064 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1065 (?:>(?P<strf_format>.+?))?
e978789f
P
1066 (?P<alternate>(?<!\\),[^|&)]+)?
1067 (?:&(?P<replacement>.*?))?
e625be0d 1068 (?:\|(?P<default>.*?))?
385a27fa 1069 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1070
2b8a2973 1071 def _traverse_infodict(k):
1072 k = k.split('.')
1073 if k[0] == '':
1074 k.pop(0)
1075 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1076
752cda38 1077 def get_value(mdict):
1078 # Object traversal
2b8a2973 1079 value = _traverse_infodict(mdict['fields'])
752cda38 1080 # Negative
1081 if mdict['negate']:
1082 value = float_or_none(value)
1083 if value is not None:
1084 value *= -1
1085 # Do maths
385a27fa 1086 offset_key = mdict['maths']
1087 if offset_key:
752cda38 1088 value = float_or_none(value)
1089 operator = None
385a27fa 1090 while offset_key:
1091 item = re.match(
1092 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1093 offset_key).group(0)
1094 offset_key = offset_key[len(item):]
1095 if operator is None:
752cda38 1096 operator = MATH_FUNCTIONS[item]
385a27fa 1097 continue
1098 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1099 offset = float_or_none(item)
1100 if offset is None:
2b8a2973 1101 offset = float_or_none(_traverse_infodict(item))
385a27fa 1102 try:
1103 value = operator(value, multiplier * offset)
1104 except (TypeError, ZeroDivisionError):
1105 return None
1106 operator = None
752cda38 1107 # Datetime formatting
1108 if mdict['strf_format']:
7c37ff97 1109 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1110
1111 return value
1112
b868936c 1113 na = self.params.get('outtmpl_na_placeholder', 'NA')
1114
e0fd9573 1115 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1116 return sanitize_filename(str(value), restricted=restricted,
1117 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1118
1119 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1120 sanitize = bool(sanitize)
1121
6e84b215 1122 def _dumpjson_default(obj):
1123 if isinstance(obj, (set, LazyList)):
1124 return list(obj)
adbc4ec4 1125 return repr(obj)
6e84b215 1126
752cda38 1127 def create_key(outer_mobj):
1128 if not outer_mobj.group('has_key'):
b836dc94 1129 return outer_mobj.group(0)
752cda38 1130 key = outer_mobj.group('key')
752cda38 1131 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1132 initial_field = mobj.group('fields') if mobj else ''
e978789f 1133 value, replacement, default = None, None, na
7c37ff97 1134 while mobj:
e625be0d 1135 mobj = mobj.groupdict()
7c37ff97 1136 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1137 value = get_value(mobj)
e978789f 1138 replacement = mobj['replacement']
7c37ff97 1139 if value is None and mobj['alternate']:
1140 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1141 else:
1142 break
752cda38 1143
b868936c 1144 fmt = outer_mobj.group('format')
752cda38 1145 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1146 fmt = '0{:d}d'.format(field_size_compat_map[key])
1147
e978789f 1148 value = default if value is None else value if replacement is None else replacement
752cda38 1149
4476d2c7 1150 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1151 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1152 if fmt[-1] == 'l': # list
4476d2c7 1153 delim = '\n' if '#' in flags else ', '
4b4b7f74 1154 value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
524e2e4f 1155 elif fmt[-1] == 'j': # json
4476d2c7 1156 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1157 elif fmt[-1] == 'q': # quoted
4476d2c7 1158 value = map(str, variadic(value) if '#' in flags else [value])
1159 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1160 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1161 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1162 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1163 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1164 value, fmt = unicodedata.normalize(
1165 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1166 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1167 value), str_fmt
e0fd9573 1168 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1169 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1170 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1171 factor=1024 if '#' in flags else 1000)
37893bb0 1172 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1173 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1174 elif fmt[-1] == 'c':
524e2e4f 1175 if value:
1176 value = str(value)[0]
76a264ac 1177 else:
524e2e4f 1178 fmt = str_fmt
76a264ac 1179 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1180 value = float_or_none(value)
752cda38 1181 if value is None:
1182 value, fmt = default, 's'
901130bb 1183
752cda38 1184 if sanitize:
1185 if fmt[-1] == 'r':
1186 # If value is an object, sanitize might convert it to a string
1187 # So we convert it to repr first
7d1eb38a 1188 value, fmt = repr(value), str_fmt
639f1cea 1189 if fmt[-1] in 'csr':
e0fd9573 1190 value = sanitizer(initial_field, value)
901130bb 1191
b868936c 1192 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1193 TMPL_DICT[key] = value
b868936c 1194 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1195
385a27fa 1196 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1197
819e0531 1198 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1199 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1200 return self.escape_outtmpl(outtmpl) % info_dict
1201
de6000d9 1202 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1203 try:
b836dc94 1204 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
e0fd9573 1205 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
15da37c7 1206
143db31d 1207 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1208 if filename and force_ext is not None:
752cda38 1209 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1210
bdc3fd2f
U
1211 # https://github.com/blackjack4494/youtube-dlc/issues/85
1212 trim_file_name = self.params.get('trim_file_name', False)
1213 if trim_file_name:
5c22c63d 1214 no_ext, *ext = filename.rsplit('.', 2)
1215 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1216
0202b52a 1217 return filename
8222d8de 1218 except ValueError as err:
6febd1c1 1219 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1220 return None
1221
de6000d9 1222 def prepare_filename(self, info_dict, dir_type='', warn=False):
1223 """Generate the output filename."""
21cd8fae 1224
de6000d9 1225 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1226 if not filename and dir_type not in ('', 'temp'):
1227 return ''
de6000d9 1228
c84aeac6 1229 if warn:
21cd8fae 1230 if not self.params.get('paths'):
de6000d9 1231 pass
1232 elif filename == '-':
c84aeac6 1233 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1234 elif os.path.isabs(filename):
c84aeac6 1235 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1236 if filename == '-' or not filename:
1237 return filename
1238
21cd8fae 1239 return self.get_output_path(dir_type, filename)
0202b52a 1240
120fe513 1241 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1242 """ Returns None if the file should be downloaded """
8222d8de 1243
c77495e3 1244 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1245
8b0d7497 1246 def check_filter():
8b0d7497 1247 if 'title' in info_dict:
1248 # This can happen when we're just evaluating the playlist
1249 title = info_dict['title']
1250 matchtitle = self.params.get('matchtitle', False)
1251 if matchtitle:
1252 if not re.search(matchtitle, title, re.IGNORECASE):
1253 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1254 rejecttitle = self.params.get('rejecttitle', False)
1255 if rejecttitle:
1256 if re.search(rejecttitle, title, re.IGNORECASE):
1257 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1258 date = info_dict.get('upload_date')
1259 if date is not None:
1260 dateRange = self.params.get('daterange', DateRange())
1261 if date not in dateRange:
1262 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1263 view_count = info_dict.get('view_count')
1264 if view_count is not None:
1265 min_views = self.params.get('min_views')
1266 if min_views is not None and view_count < min_views:
1267 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1268 max_views = self.params.get('max_views')
1269 if max_views is not None and view_count > max_views:
1270 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1271 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1272 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1273
8f18aca8 1274 match_filter = self.params.get('match_filter')
1275 if match_filter is not None:
1276 try:
1277 ret = match_filter(info_dict, incomplete=incomplete)
1278 except TypeError:
1279 # For backward compatibility
1280 ret = None if incomplete else match_filter(info_dict)
1281 if ret is not None:
1282 return ret
8b0d7497 1283 return None
1284
c77495e3 1285 if self.in_download_archive(info_dict):
1286 reason = '%s has already been recorded in the archive' % video_title
1287 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1288 else:
1289 reason = check_filter()
1290 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1291 if reason is not None:
120fe513 1292 if not silent:
1293 self.to_screen('[download] ' + reason)
c77495e3 1294 if self.params.get(break_opt, False):
1295 raise break_err()
8b0d7497 1296 return reason
fe7e0c98 1297
b6c45014
JMF
1298 @staticmethod
1299 def add_extra_info(info_dict, extra_info):
1300 '''Set the keys from extra_info in info dict if they are missing'''
1301 for key, value in extra_info.items():
1302 info_dict.setdefault(key, value)
1303
409e1828 1304 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1305 process=True, force_generic_extractor=False):
41d1cca3 1306 """
1307 Return a list with a dictionary for each video extracted.
1308
1309 Arguments:
1310 url -- URL to extract
1311
1312 Keyword arguments:
1313 download -- whether to download videos during extraction
1314 ie_key -- extractor key hint
1315 extra_info -- dictionary containing the extra values to add to each result
1316 process -- whether to resolve all unresolved references (URLs, playlist items),
1317 must be True for download to work.
1318 force_generic_extractor -- force using the generic extractor
1319 """
fe7e0c98 1320
409e1828 1321 if extra_info is None:
1322 extra_info = {}
1323
61aa5ba3 1324 if not ie_key and force_generic_extractor:
d22dec74
S
1325 ie_key = 'Generic'
1326
8222d8de 1327 if ie_key:
8b7491c8 1328 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1329 else:
1330 ies = self._ies
1331
8b7491c8 1332 for ie_key, ie in ies.items():
8222d8de
JMF
1333 if not ie.suitable(url):
1334 continue
1335
1336 if not ie.working():
6febd1c1
PH
1337 self.report_warning('The program functionality for this site has been marked as broken, '
1338 'and will probably not work.')
8222d8de 1339
1151c407 1340 temp_id = ie.get_temp_id(url)
a0566bbf 1341 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1342 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1343 if self.params.get('break_on_existing', False):
1344 raise ExistingVideoReached()
a0566bbf 1345 break
8b7491c8 1346 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1347 else:
1348 self.report_error('no suitable InfoExtractor for URL %s' % url)
1349
8e5fecc8 1350 def __handle_extraction_exceptions(func):
b5ae35ee 1351 @functools.wraps(func)
a0566bbf 1352 def wrapper(self, *args, **kwargs):
6da22e7d 1353 while True:
1354 try:
1355 return func(self, *args, **kwargs)
1356 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1357 raise
6da22e7d 1358 except ReExtractInfo as e:
1359 if e.expected:
1360 self.to_screen(f'{e}; Re-extracting data')
1361 else:
1362 self.to_stderr('\r')
1363 self.report_warning(f'{e}; Re-extracting data')
1364 continue
1365 except GeoRestrictedError as e:
1366 msg = e.msg
1367 if e.countries:
1368 msg += '\nThis video is available in %s.' % ', '.join(
1369 map(ISO3166Utils.short2full, e.countries))
1370 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1371 self.report_error(msg)
1372 except ExtractorError as e: # An error we somewhat expected
1373 self.report_error(str(e), e.format_traceback())
1374 except Exception as e:
1375 if self.params.get('ignoreerrors'):
1376 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1377 else:
1378 raise
1379 break
a0566bbf 1380 return wrapper
1381
f2ebc5c7 1382 def _wait_for_video(self, ie_result):
1383 if (not self.params.get('wait_for_video')
1384 or ie_result.get('_type', 'video') != 'video'
1385 or ie_result.get('formats') or ie_result.get('url')):
1386 return
1387
1388 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1389 last_msg = ''
1390
1391 def progress(msg):
1392 nonlocal last_msg
1393 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1394 last_msg = msg
1395
1396 min_wait, max_wait = self.params.get('wait_for_video')
1397 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1398 if diff is None and ie_result.get('live_status') == 'is_upcoming':
38d79fd1 1399 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
f2ebc5c7 1400 self.report_warning('Release time of video is not known')
1401 elif (diff or 0) <= 0:
1402 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1403 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1404 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1405
1406 wait_till = time.time() + diff
1407 try:
1408 while True:
1409 diff = wait_till - time.time()
1410 if diff <= 0:
1411 progress('')
1412 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1413 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1414 time.sleep(1)
1415 except KeyboardInterrupt:
1416 progress('')
1417 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1418 except BaseException as e:
1419 if not isinstance(e, ReExtractInfo):
1420 self.to_screen('')
1421 raise
1422
a0566bbf 1423 @__handle_extraction_exceptions
58f197b7 1424 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1425 ie_result = ie.extract(url)
1426 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1427 return
1428 if isinstance(ie_result, list):
1429 # Backwards compatibility: old IE result format
1430 ie_result = {
1431 '_type': 'compat_list',
1432 'entries': ie_result,
1433 }
e37d0efb 1434 if extra_info.get('original_url'):
1435 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1436 self.add_default_extra_info(ie_result, ie, url)
1437 if process:
f2ebc5c7 1438 self._wait_for_video(ie_result)
a0566bbf 1439 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1440 else:
a0566bbf 1441 return ie_result
fe7e0c98 1442
ea38e55f 1443 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1444 if url is not None:
1445 self.add_extra_info(ie_result, {
1446 'webpage_url': url,
1447 'original_url': url,
1448 'webpage_url_basename': url_basename(url),
0bb322b9 1449 'webpage_url_domain': get_domain(url),
6033d980 1450 })
1451 if ie is not None:
1452 self.add_extra_info(ie_result, {
1453 'extractor': ie.IE_NAME,
1454 'extractor_key': ie.ie_key(),
1455 })
ea38e55f 1456
58adec46 1457 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1458 """
1459 Take the result of the ie(may be modified) and resolve all unresolved
1460 references (URLs, playlist items).
1461
1462 It will also download the videos if 'download'.
1463 Returns the resolved ie_result.
1464 """
58adec46 1465 if extra_info is None:
1466 extra_info = {}
e8ee972c
PH
1467 result_type = ie_result.get('_type', 'video')
1468
057a5206 1469 if result_type in ('url', 'url_transparent'):
134c6ea8 1470 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1471 if ie_result.get('original_url'):
1472 extra_info.setdefault('original_url', ie_result['original_url'])
1473
057a5206 1474 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1475 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1476 or extract_flat is True):
ecb54191 1477 info_copy = ie_result.copy()
6033d980 1478 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1479 if ie and not ie_result.get('id'):
4614bc22 1480 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1481 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1482 self.add_extra_info(info_copy, extra_info)
b5475f11 1483 info_copy, _ = self.pre_process(info_copy)
ecb54191 1484 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1485 if self.params.get('force_write_download_archive', False):
1486 self.record_download_archive(info_copy)
e8ee972c
PH
1487 return ie_result
1488
8222d8de 1489 if result_type == 'video':
b6c45014 1490 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1491 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1492 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1493 if additional_urls:
e9f4ccd1 1494 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1495 if isinstance(additional_urls, compat_str):
1496 additional_urls = [additional_urls]
1497 self.to_screen(
1498 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1499 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1500 ie_result['additional_entries'] = [
1501 self.extract_info(
b69fd25c 1502 url, download, extra_info=extra_info,
9c2b75b5 1503 force_generic_extractor=self.params.get('force_generic_extractor'))
1504 for url in additional_urls
1505 ]
1506 return ie_result
8222d8de
JMF
1507 elif result_type == 'url':
1508 # We have to add extra_info to the results because it may be
1509 # contained in a playlist
07cce701 1510 return self.extract_info(
1511 ie_result['url'], download,
1512 ie_key=ie_result.get('ie_key'),
1513 extra_info=extra_info)
7fc3fa05
PH
1514 elif result_type == 'url_transparent':
1515 # Use the information from the embedding page
1516 info = self.extract_info(
1517 ie_result['url'], ie_key=ie_result.get('ie_key'),
1518 extra_info=extra_info, download=False, process=False)
1519
1640eb09
S
1520 # extract_info may return None when ignoreerrors is enabled and
1521 # extraction failed with an error, don't crash and return early
1522 # in this case
1523 if not info:
1524 return info
1525
412c617d
PH
1526 force_properties = dict(
1527 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1528 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1529 if f in force_properties:
1530 del force_properties[f]
1531 new_result = info.copy()
1532 new_result.update(force_properties)
7fc3fa05 1533
0563f7ac
S
1534 # Extracted info may not be a video result (i.e.
1535 # info.get('_type', 'video') != video) but rather an url or
1536 # url_transparent. In such cases outer metadata (from ie_result)
1537 # should be propagated to inner one (info). For this to happen
1538 # _type of info should be overridden with url_transparent. This
067aa17e 1539 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1540 if new_result.get('_type') == 'url':
1541 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1542
1543 return self.process_ie_result(
1544 new_result, download=download, extra_info=extra_info)
40fcba5e 1545 elif result_type in ('playlist', 'multi_video'):
30a074c2 1546 # Protect from infinite recursion due to recursively nested playlists
1547 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1548 webpage_url = ie_result['webpage_url']
1549 if webpage_url in self._playlist_urls:
7e85e872 1550 self.to_screen(
30a074c2 1551 '[download] Skipping already downloaded playlist: %s'
1552 % ie_result.get('title') or ie_result.get('id'))
1553 return
7e85e872 1554
30a074c2 1555 self._playlist_level += 1
1556 self._playlist_urls.add(webpage_url)
bc516a3f 1557 self._sanitize_thumbnails(ie_result)
30a074c2 1558 try:
1559 return self.__process_playlist(ie_result, download)
1560 finally:
1561 self._playlist_level -= 1
1562 if not self._playlist_level:
1563 self._playlist_urls.clear()
8222d8de 1564 elif result_type == 'compat_list':
c9bf4114
PH
1565 self.report_warning(
1566 'Extractor %s returned a compat_list result. '
1567 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1568
8222d8de 1569 def _fixup(r):
b868936c 1570 self.add_extra_info(r, {
1571 'extractor': ie_result['extractor'],
1572 'webpage_url': ie_result['webpage_url'],
1573 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1574 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1575 'extractor_key': ie_result['extractor_key'],
1576 })
8222d8de
JMF
1577 return r
1578 ie_result['entries'] = [
b6c45014 1579 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1580 for r in ie_result['entries']
1581 ]
1582 return ie_result
1583 else:
1584 raise Exception('Invalid result type: %s' % result_type)
1585
e92caff5 1586 def _ensure_dir_exists(self, path):
1587 return make_dir(path, self.report_error)
1588
30a074c2 1589 def __process_playlist(self, ie_result, download):
1590 # We process each entry in the playlist
1591 playlist = ie_result.get('title') or ie_result.get('id')
1592 self.to_screen('[download] Downloading playlist: %s' % playlist)
1593
498f5606 1594 if 'entries' not in ie_result:
aa9369a2 1595 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1596
1597 MissingEntry = object()
498f5606 1598 incomplete_entries = bool(ie_result.get('requested_entries'))
1599 if incomplete_entries:
bf5f605e 1600 def fill_missing_entries(entries, indices):
7c7f7161 1601 ret = [MissingEntry] * max(indices)
bf5f605e 1602 for i, entry in zip(indices, entries):
498f5606 1603 ret[i - 1] = entry
1604 return ret
1605 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1606
30a074c2 1607 playlist_results = []
1608
56a8fb4f 1609 playliststart = self.params.get('playliststart', 1)
30a074c2 1610 playlistend = self.params.get('playlistend')
1611 # For backwards compatibility, interpret -1 as whole list
1612 if playlistend == -1:
1613 playlistend = None
1614
1615 playlistitems_str = self.params.get('playlist_items')
1616 playlistitems = None
1617 if playlistitems_str is not None:
1618 def iter_playlistitems(format):
1619 for string_segment in format.split(','):
1620 if '-' in string_segment:
1621 start, end = string_segment.split('-')
1622 for item in range(int(start), int(end) + 1):
1623 yield int(item)
1624 else:
1625 yield int(string_segment)
1626 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1627
1628 ie_entries = ie_result['entries']
56a8fb4f 1629 msg = (
1630 'Downloading %d videos' if not isinstance(ie_entries, list)
1631 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1632
1633 if isinstance(ie_entries, list):
1634 def get_entry(i):
1635 return ie_entries[i - 1]
1636 else:
c586f9e8 1637 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1638 ie_entries = LazyList(ie_entries)
1639
1640 def get_entry(i):
1641 return YoutubeDL.__handle_extraction_exceptions(
1642 lambda self, i: ie_entries[i - 1]
1643 )(self, i)
50fed816 1644
56a8fb4f 1645 entries = []
ff1c7fc9 1646 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1647 for i in items:
1648 if i == 0:
1649 continue
56a8fb4f 1650 if playlistitems is None and playlistend is not None and playlistend < i:
1651 break
1652 entry = None
1653 try:
50fed816 1654 entry = get_entry(i)
7c7f7161 1655 if entry is MissingEntry:
498f5606 1656 raise EntryNotInPlaylist()
56a8fb4f 1657 except (IndexError, EntryNotInPlaylist):
1658 if incomplete_entries:
aa9369a2 1659 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1660 elif not playlistitems:
1661 break
1662 entries.append(entry)
120fe513 1663 try:
1664 if entry is not None:
1665 self._match_entry(entry, incomplete=True, silent=True)
1666 except (ExistingVideoReached, RejectedVideoReached):
1667 break
56a8fb4f 1668 ie_result['entries'] = entries
30a074c2 1669
56a8fb4f 1670 # Save playlist_index before re-ordering
1671 entries = [
9e598870 1672 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1673 for i, entry in enumerate(entries, 1)
1674 if entry is not None]
1675 n_entries = len(entries)
498f5606 1676
e08a85d8 1677 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1678 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1679 ie_result['requested_entries'] = playlistitems
1680
e08a85d8 1681 _infojson_written = False
49a57e70 1682 if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
498f5606 1683 ie_copy = {
1684 'playlist': playlist,
1685 'playlist_id': ie_result.get('id'),
1686 'playlist_title': ie_result.get('title'),
1687 'playlist_uploader': ie_result.get('uploader'),
1688 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1689 'playlist_index': 0,
49a57e70 1690 'n_entries': n_entries,
498f5606 1691 }
1692 ie_copy.update(dict(ie_result))
1693
e08a85d8 1694 _infojson_written = self._write_info_json(
1695 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1696 if _infojson_written is None:
80c03fa9 1697 return
1698 if self._write_description('playlist', ie_result,
1699 self.prepare_filename(ie_copy, 'pl_description')) is None:
1700 return
681de68e 1701 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1702 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1703
1704 if self.params.get('playlistreverse', False):
1705 entries = entries[::-1]
30a074c2 1706 if self.params.get('playlistrandom', False):
1707 random.shuffle(entries)
1708
1709 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1710
56a8fb4f 1711 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1712 failures = 0
1713 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1714 for i, entry_tuple in enumerate(entries, 1):
1715 playlist_index, entry = entry_tuple
81139999 1716 if 'playlist-index' in self.params.get('compat_opts', []):
1717 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1718 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1719 # This __x_forwarded_for_ip thing is a bit ugly but requires
1720 # minimal changes
1721 if x_forwarded_for:
1722 entry['__x_forwarded_for_ip'] = x_forwarded_for
1723 extra = {
1724 'n_entries': n_entries,
f59ae581 1725 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1726 'playlist_index': playlist_index,
1727 'playlist_autonumber': i,
30a074c2 1728 'playlist': playlist,
1729 'playlist_id': ie_result.get('id'),
1730 'playlist_title': ie_result.get('title'),
1731 'playlist_uploader': ie_result.get('uploader'),
1732 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1733 'extractor': ie_result['extractor'],
1734 'webpage_url': ie_result['webpage_url'],
1735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1736 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1737 'extractor_key': ie_result['extractor_key'],
1738 }
1739
1740 if self._match_entry(entry, incomplete=True) is not None:
1741 continue
1742
1743 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1744 if not entry_result:
1745 failures += 1
1746 if failures >= max_failures:
1747 self.report_error(
1748 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1749 break
30a074c2 1750 playlist_results.append(entry_result)
1751 ie_result['entries'] = playlist_results
e08a85d8 1752
1753 # Write the updated info to json
1754 if _infojson_written and self._write_info_json(
1755 'updated playlist', ie_result,
1756 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1757 return
30a074c2 1758 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1759 return ie_result
1760
a0566bbf 1761 @__handle_extraction_exceptions
1762 def __process_iterable_entry(self, entry, download, extra_info):
1763 return self.process_ie_result(
1764 entry, download=download, extra_info=extra_info)
1765
67134eab
JMF
1766 def _build_format_filter(self, filter_spec):
1767 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1768
1769 OPERATORS = {
1770 '<': operator.lt,
1771 '<=': operator.le,
1772 '>': operator.gt,
1773 '>=': operator.ge,
1774 '=': operator.eq,
1775 '!=': operator.ne,
1776 }
67134eab 1777 operator_rex = re.compile(r'''(?x)\s*
187986a8 1778 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1779 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1780 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1781 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1782 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1783 if m:
1784 try:
1785 comparison_value = int(m.group('value'))
1786 except ValueError:
1787 comparison_value = parse_filesize(m.group('value'))
1788 if comparison_value is None:
1789 comparison_value = parse_filesize(m.group('value') + 'B')
1790 if comparison_value is None:
1791 raise ValueError(
1792 'Invalid value %r in format specification %r' % (
67134eab 1793 m.group('value'), filter_spec))
9ddb6925
S
1794 op = OPERATORS[m.group('op')]
1795
083c9df9 1796 if not m:
9ddb6925
S
1797 STR_OPERATORS = {
1798 '=': operator.eq,
10d33b34
YCH
1799 '^=': lambda attr, value: attr.startswith(value),
1800 '$=': lambda attr, value: attr.endswith(value),
1801 '*=': lambda attr, value: value in attr,
9ddb6925 1802 }
187986a8 1803 str_operator_rex = re.compile(r'''(?x)\s*
1804 (?P<key>[a-zA-Z0-9._-]+)\s*
1805 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1806 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1807 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1808 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1809 if m:
1810 comparison_value = m.group('value')
2cc779f4
S
1811 str_op = STR_OPERATORS[m.group('op')]
1812 if m.group('negation'):
e118a879 1813 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1814 else:
1815 op = str_op
083c9df9 1816
9ddb6925 1817 if not m:
187986a8 1818 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1819
1820 def _filter(f):
1821 actual_value = f.get(m.group('key'))
1822 if actual_value is None:
1823 return m.group('none_inclusive')
1824 return op(actual_value, comparison_value)
67134eab
JMF
1825 return _filter
1826
9f1a1c36 1827 def _check_formats(self, formats):
1828 for f in formats:
1829 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1830 path = self.get_output_path('temp')
1831 if not self._ensure_dir_exists(f'{path}/'):
1832 continue
1833 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1834 temp_file.close()
1835 try:
1836 success, _ = self.dl(temp_file.name, f, test=True)
1837 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1838 success = False
1839 finally:
1840 if os.path.exists(temp_file.name):
1841 try:
1842 os.remove(temp_file.name)
1843 except OSError:
1844 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1845 if success:
1846 yield f
1847 else:
1848 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1849
0017d9ad 1850 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1851
af0f7428
S
1852 def can_merge():
1853 merger = FFmpegMergerPP(self)
1854 return merger.available and merger.can_merge()
1855
91ebc640 1856 prefer_best = (
b7b04c78 1857 not self.params.get('simulate')
91ebc640 1858 and download
1859 and (
1860 not can_merge()
19807826 1861 or info_dict.get('is_live', False)
de6000d9 1862 or self.outtmpl_dict['default'] == '-'))
53ed7066 1863 compat = (
1864 prefer_best
1865 or self.params.get('allow_multiple_audio_streams', False)
1866 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1867
1868 return (
53ed7066 1869 'best/bestvideo+bestaudio' if prefer_best
1870 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1871 else 'bestvideo+bestaudio/best')
0017d9ad 1872
67134eab
JMF
1873 def build_format_selector(self, format_spec):
1874 def syntax_error(note, start):
1875 message = (
1876 'Invalid format specification: '
1877 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1878 return SyntaxError(message)
1879
1880 PICKFIRST = 'PICKFIRST'
1881 MERGE = 'MERGE'
1882 SINGLE = 'SINGLE'
0130afb7 1883 GROUP = 'GROUP'
67134eab
JMF
1884 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1885
91ebc640 1886 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1887 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1888
9f1a1c36 1889 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1890
67134eab
JMF
1891 def _parse_filter(tokens):
1892 filter_parts = []
1893 for type, string, start, _, _ in tokens:
1894 if type == tokenize.OP and string == ']':
1895 return ''.join(filter_parts)
1896 else:
1897 filter_parts.append(string)
1898
232541df 1899 def _remove_unused_ops(tokens):
17cc1534 1900 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1901 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1902 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1903 last_string, last_start, last_end, last_line = None, None, None, None
1904 for type, string, start, end, line in tokens:
1905 if type == tokenize.OP and string == '[':
1906 if last_string:
1907 yield tokenize.NAME, last_string, last_start, last_end, last_line
1908 last_string = None
1909 yield type, string, start, end, line
1910 # everything inside brackets will be handled by _parse_filter
1911 for type, string, start, end, line in tokens:
1912 yield type, string, start, end, line
1913 if type == tokenize.OP and string == ']':
1914 break
1915 elif type == tokenize.OP and string in ALLOWED_OPS:
1916 if last_string:
1917 yield tokenize.NAME, last_string, last_start, last_end, last_line
1918 last_string = None
1919 yield type, string, start, end, line
1920 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1921 if not last_string:
1922 last_string = string
1923 last_start = start
1924 last_end = end
1925 else:
1926 last_string += string
1927 if last_string:
1928 yield tokenize.NAME, last_string, last_start, last_end, last_line
1929
cf2ac6df 1930 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1931 selectors = []
1932 current_selector = None
1933 for type, string, start, _, _ in tokens:
1934 # ENCODING is only defined in python 3.x
1935 if type == getattr(tokenize, 'ENCODING', None):
1936 continue
1937 elif type in [tokenize.NAME, tokenize.NUMBER]:
1938 current_selector = FormatSelector(SINGLE, string, [])
1939 elif type == tokenize.OP:
cf2ac6df
JMF
1940 if string == ')':
1941 if not inside_group:
1942 # ')' will be handled by the parentheses group
1943 tokens.restore_last_token()
67134eab 1944 break
cf2ac6df 1945 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1946 tokens.restore_last_token()
1947 break
cf2ac6df
JMF
1948 elif inside_choice and string == ',':
1949 tokens.restore_last_token()
1950 break
1951 elif string == ',':
0a31a350
JMF
1952 if not current_selector:
1953 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1954 selectors.append(current_selector)
1955 current_selector = None
1956 elif string == '/':
d96d604e
JMF
1957 if not current_selector:
1958 raise syntax_error('"/" must follow a format selector', start)
67134eab 1959 first_choice = current_selector
cf2ac6df 1960 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1961 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1962 elif string == '[':
1963 if not current_selector:
1964 current_selector = FormatSelector(SINGLE, 'best', [])
1965 format_filter = _parse_filter(tokens)
1966 current_selector.filters.append(format_filter)
0130afb7
JMF
1967 elif string == '(':
1968 if current_selector:
1969 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1970 group = _parse_format_selection(tokens, inside_group=True)
1971 current_selector = FormatSelector(GROUP, group, [])
67134eab 1972 elif string == '+':
d03cfdce 1973 if not current_selector:
1974 raise syntax_error('Unexpected "+"', start)
1975 selector_1 = current_selector
1976 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1977 if not selector_2:
1978 raise syntax_error('Expected a selector', start)
1979 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1980 else:
1981 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1982 elif type == tokenize.ENDMARKER:
1983 break
1984 if current_selector:
1985 selectors.append(current_selector)
1986 return selectors
1987
f8d4ad9a 1988 def _merge(formats_pair):
1989 format_1, format_2 = formats_pair
1990
1991 formats_info = []
1992 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1993 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1994
1995 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1996 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1997 for (i, fmt_info) in enumerate(formats_info):
551f9388 1998 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1999 formats_info.pop(i)
2000 continue
2001 for aud_vid in ['audio', 'video']:
f8d4ad9a 2002 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2003 if get_no_more[aud_vid]:
2004 formats_info.pop(i)
f5510afe 2005 break
f8d4ad9a 2006 get_no_more[aud_vid] = True
2007
2008 if len(formats_info) == 1:
2009 return formats_info[0]
2010
2011 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2012 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2013
2014 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2015 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2016
2017 output_ext = self.params.get('merge_output_format')
2018 if not output_ext:
2019 if the_only_video:
2020 output_ext = the_only_video['ext']
2021 elif the_only_audio and not video_fmts:
2022 output_ext = the_only_audio['ext']
2023 else:
2024 output_ext = 'mkv'
2025
975a0d0d 2026 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2027
f8d4ad9a 2028 new_dict = {
2029 'requested_formats': formats_info,
975a0d0d 2030 'format': '+'.join(filtered('format')),
2031 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2032 'ext': output_ext,
975a0d0d 2033 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2034 'language': '+'.join(orderedSet(filtered('language'))) or None,
2035 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2036 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2037 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2038 }
2039
2040 if the_only_video:
2041 new_dict.update({
2042 'width': the_only_video.get('width'),
2043 'height': the_only_video.get('height'),
2044 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2045 'fps': the_only_video.get('fps'),
49a57e70 2046 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2047 'vcodec': the_only_video.get('vcodec'),
2048 'vbr': the_only_video.get('vbr'),
2049 'stretched_ratio': the_only_video.get('stretched_ratio'),
2050 })
2051
2052 if the_only_audio:
2053 new_dict.update({
2054 'acodec': the_only_audio.get('acodec'),
2055 'abr': the_only_audio.get('abr'),
975a0d0d 2056 'asr': the_only_audio.get('asr'),
f8d4ad9a 2057 })
2058
2059 return new_dict
2060
e8e73840 2061 def _check_formats(formats):
981052c9 2062 if not check_formats:
2063 yield from formats
b5ac45b1 2064 return
9f1a1c36 2065 yield from self._check_formats(formats)
e8e73840 2066
67134eab 2067 def _build_selector_function(selector):
909d24dd 2068 if isinstance(selector, list): # ,
67134eab
JMF
2069 fs = [_build_selector_function(s) for s in selector]
2070
317f7ab6 2071 def selector_function(ctx):
67134eab 2072 for f in fs:
981052c9 2073 yield from f(ctx)
67134eab 2074 return selector_function
909d24dd 2075
2076 elif selector.type == GROUP: # ()
0130afb7 2077 selector_function = _build_selector_function(selector.selector)
909d24dd 2078
2079 elif selector.type == PICKFIRST: # /
67134eab
JMF
2080 fs = [_build_selector_function(s) for s in selector.selector]
2081
317f7ab6 2082 def selector_function(ctx):
67134eab 2083 for f in fs:
317f7ab6 2084 picked_formats = list(f(ctx))
67134eab
JMF
2085 if picked_formats:
2086 return picked_formats
2087 return []
67134eab 2088
981052c9 2089 elif selector.type == MERGE: # +
2090 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2091
2092 def selector_function(ctx):
adbc4ec4 2093 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2094 yield _merge(pair)
2095
909d24dd 2096 elif selector.type == SINGLE: # atom
598d185d 2097 format_spec = selector.selector or 'best'
909d24dd 2098
f8d4ad9a 2099 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2100 if format_spec == 'all':
2101 def selector_function(ctx):
9222c381 2102 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2103 elif format_spec == 'mergeall':
2104 def selector_function(ctx):
dd2a987d 2105 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2106 if not formats:
2107 return
921b76ca 2108 merged_format = formats[-1]
2109 for f in formats[-2::-1]:
f8d4ad9a 2110 merged_format = _merge((merged_format, f))
2111 yield merged_format
909d24dd 2112
2113 else:
e8e73840 2114 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2115 mobj = re.match(
2116 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2117 format_spec)
2118 if mobj is not None:
2119 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2120 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2121 format_type = (mobj.group('type') or [None])[0]
2122 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2123 format_modified = mobj.group('mod') is not None
909d24dd 2124
2125 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2126 _filter_f = (
eff63539 2127 (lambda f: f.get('%scodec' % format_type) != 'none')
2128 if format_type and format_modified # bv*, ba*, wv*, wa*
2129 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2130 if format_type # bv, ba, wv, wa
2131 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2132 if not format_modified # b, w
8326b00a 2133 else lambda f: True) # b*, w*
2134 filter_f = lambda f: _filter_f(f) and (
2135 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2136 else:
48ee10ee 2137 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2138 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2139 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2140 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2141 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2142 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2143 else:
b5ae35ee 2144 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2145
2146 def selector_function(ctx):
2147 formats = list(ctx['formats'])
909d24dd 2148 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2149 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2150 # for extractors with incomplete formats (audio only (soundcloud)
2151 # or video only (imgur)) best/worst will fallback to
2152 # best/worst {video,audio}-only format
e8e73840 2153 matches = formats
981052c9 2154 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2155 try:
e8e73840 2156 yield matches[format_idx - 1]
981052c9 2157 except IndexError:
2158 return
083c9df9 2159
67134eab 2160 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2161
317f7ab6 2162 def final_selector(ctx):
adbc4ec4 2163 ctx_copy = dict(ctx)
67134eab 2164 for _filter in filters:
317f7ab6
S
2165 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2166 return selector_function(ctx_copy)
67134eab 2167 return final_selector
083c9df9 2168
67134eab 2169 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2170 try:
232541df 2171 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2172 except tokenize.TokenError:
2173 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2174
2175 class TokenIterator(object):
2176 def __init__(self, tokens):
2177 self.tokens = tokens
2178 self.counter = 0
2179
2180 def __iter__(self):
2181 return self
2182
2183 def __next__(self):
2184 if self.counter >= len(self.tokens):
2185 raise StopIteration()
2186 value = self.tokens[self.counter]
2187 self.counter += 1
2188 return value
2189
2190 next = __next__
2191
2192 def restore_last_token(self):
2193 self.counter -= 1
2194
2195 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2196 return _build_selector_function(parsed_selector)
a9c58ad9 2197
e5660ee6
JMF
2198 def _calc_headers(self, info_dict):
2199 res = std_headers.copy()
2200
2201 add_headers = info_dict.get('http_headers')
2202 if add_headers:
2203 res.update(add_headers)
2204
2205 cookies = self._calc_cookies(info_dict)
2206 if cookies:
2207 res['Cookie'] = cookies
2208
0016b84e
S
2209 if 'X-Forwarded-For' not in res:
2210 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2211 if x_forwarded_for_ip:
2212 res['X-Forwarded-For'] = x_forwarded_for_ip
2213
e5660ee6
JMF
2214 return res
2215
2216 def _calc_cookies(self, info_dict):
5c2266df 2217 pr = sanitized_Request(info_dict['url'])
e5660ee6 2218 self.cookiejar.add_cookie_header(pr)
662435f7 2219 return pr.get_header('Cookie')
e5660ee6 2220
9f1a1c36 2221 def _sort_thumbnails(self, thumbnails):
2222 thumbnails.sort(key=lambda t: (
2223 t.get('preference') if t.get('preference') is not None else -1,
2224 t.get('width') if t.get('width') is not None else -1,
2225 t.get('height') if t.get('height') is not None else -1,
2226 t.get('id') if t.get('id') is not None else '',
2227 t.get('url')))
2228
b0249bca 2229 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2230 thumbnails = info_dict.get('thumbnails')
2231 if thumbnails is None:
2232 thumbnail = info_dict.get('thumbnail')
2233 if thumbnail:
2234 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2235 if not thumbnails:
2236 return
2237
2238 def check_thumbnails(thumbnails):
2239 for t in thumbnails:
2240 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2241 try:
2242 self.urlopen(HEADRequest(t['url']))
2243 except network_exceptions as err:
2244 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2245 continue
2246 yield t
2247
2248 self._sort_thumbnails(thumbnails)
2249 for i, t in enumerate(thumbnails):
2250 if t.get('id') is None:
2251 t['id'] = '%d' % i
2252 if t.get('width') and t.get('height'):
2253 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2254 t['url'] = sanitize_url(t['url'])
2255
2256 if self.params.get('check_formats') is True:
282f5709 2257 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2258 else:
2259 info_dict['thumbnails'] = thumbnails
bc516a3f 2260
dd82ffea
JMF
2261 def process_video_result(self, info_dict, download=True):
2262 assert info_dict.get('_type', 'video') == 'video'
2263
bec1fad2
PH
2264 if 'id' not in info_dict:
2265 raise ExtractorError('Missing "id" field in extractor result')
2266 if 'title' not in info_dict:
1151c407 2267 raise ExtractorError('Missing "title" field in extractor result',
2268 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2269
c9969434
S
2270 def report_force_conversion(field, field_not, conversion):
2271 self.report_warning(
2272 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2273 % (field, field_not, conversion))
2274
2275 def sanitize_string_field(info, string_field):
2276 field = info.get(string_field)
2277 if field is None or isinstance(field, compat_str):
2278 return
2279 report_force_conversion(string_field, 'a string', 'string')
2280 info[string_field] = compat_str(field)
2281
2282 def sanitize_numeric_fields(info):
2283 for numeric_field in self._NUMERIC_FIELDS:
2284 field = info.get(numeric_field)
2285 if field is None or isinstance(field, compat_numeric_types):
2286 continue
2287 report_force_conversion(numeric_field, 'numeric', 'int')
2288 info[numeric_field] = int_or_none(field)
2289
2290 sanitize_string_field(info_dict, 'id')
2291 sanitize_numeric_fields(info_dict)
be6217b2 2292
dd82ffea
JMF
2293 if 'playlist' not in info_dict:
2294 # It isn't part of a playlist
2295 info_dict['playlist'] = None
2296 info_dict['playlist_index'] = None
2297
bc516a3f 2298 self._sanitize_thumbnails(info_dict)
d5519808 2299
536a55da 2300 thumbnail = info_dict.get('thumbnail')
bc516a3f 2301 thumbnails = info_dict.get('thumbnails')
536a55da
S
2302 if thumbnail:
2303 info_dict['thumbnail'] = sanitize_url(thumbnail)
2304 elif thumbnails:
d5519808
PH
2305 info_dict['thumbnail'] = thumbnails[-1]['url']
2306
ae30b840 2307 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2308 info_dict['display_id'] = info_dict['id']
2309
239df021 2310 if info_dict.get('duration') is not None:
2311 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2312
10db0d2f 2313 for ts_key, date_key in (
2314 ('timestamp', 'upload_date'),
2315 ('release_timestamp', 'release_date'),
2316 ):
2317 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2318 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2319 # see http://bugs.python.org/issue1646728)
2320 try:
2321 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2322 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2323 except (ValueError, OverflowError, OSError):
2324 pass
9d2ecdbc 2325
ae30b840 2326 live_keys = ('is_live', 'was_live')
2327 live_status = info_dict.get('live_status')
2328 if live_status is None:
2329 for key in live_keys:
2330 if info_dict.get(key) is False:
2331 continue
2332 if info_dict.get(key):
2333 live_status = key
2334 break
2335 if all(info_dict.get(key) is False for key in live_keys):
2336 live_status = 'not_live'
2337 if live_status:
2338 info_dict['live_status'] = live_status
2339 for key in live_keys:
2340 if info_dict.get(key) is None:
2341 info_dict[key] = (live_status == key)
2342
33d2fc2f
S
2343 # Auto generate title fields corresponding to the *_number fields when missing
2344 # in order to always have clean titles. This is very common for TV series.
2345 for field in ('chapter', 'season', 'episode'):
2346 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2347 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2348
05108a49
S
2349 for cc_kind in ('subtitles', 'automatic_captions'):
2350 cc = info_dict.get(cc_kind)
2351 if cc:
2352 for _, subtitle in cc.items():
2353 for subtitle_format in subtitle:
2354 if subtitle_format.get('url'):
2355 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2356 if subtitle_format.get('ext') is None:
2357 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2358
2359 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2360 subtitles = info_dict.get('subtitles')
4bba3716 2361
360e1ca5 2362 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2363 info_dict['id'], subtitles, automatic_captions)
a504ced0 2364
dd82ffea
JMF
2365 if info_dict.get('formats') is None:
2366 # There's only one format available
2367 formats = [info_dict]
2368 else:
2369 formats = info_dict['formats']
2370
e0493e90 2371 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2372 if not self.params.get('allow_unplayable_formats'):
2373 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2374
adbc4ec4
THD
2375 if info_dict.get('is_live'):
2376 get_from_start = bool(self.params.get('live_from_start'))
2377 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
4c922dd3 2378 if not get_from_start:
2379 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
adbc4ec4 2380
db95dc13 2381 if not formats:
1151c407 2382 self.raise_no_formats(info_dict)
db95dc13 2383
73af5cc8
S
2384 def is_wellformed(f):
2385 url = f.get('url')
a5ac0c47 2386 if not url:
73af5cc8
S
2387 self.report_warning(
2388 '"url" field is missing or empty - skipping format, '
2389 'there is an error in extractor')
a5ac0c47
S
2390 return False
2391 if isinstance(url, bytes):
2392 sanitize_string_field(f, 'url')
2393 return True
73af5cc8
S
2394
2395 # Filter out malformed formats for better extraction robustness
2396 formats = list(filter(is_wellformed, formats))
2397
181c7053
S
2398 formats_dict = {}
2399
dd82ffea 2400 # We check that all the formats have the format and format_id fields
db95dc13 2401 for i, format in enumerate(formats):
c9969434
S
2402 sanitize_string_field(format, 'format_id')
2403 sanitize_numeric_fields(format)
dcf77cf1 2404 format['url'] = sanitize_url(format['url'])
e74e3b63 2405 if not format.get('format_id'):
8016c922 2406 format['format_id'] = compat_str(i)
e2effb08
S
2407 else:
2408 # Sanitize format_id from characters used in format selector expression
ec85ded8 2409 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2410 format_id = format['format_id']
2411 if format_id not in formats_dict:
2412 formats_dict[format_id] = []
2413 formats_dict[format_id].append(format)
2414
2415 # Make sure all formats have unique format_id
03b4de72 2416 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2417 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2418 ambigious_id = len(ambiguous_formats) > 1
2419 for i, format in enumerate(ambiguous_formats):
2420 if ambigious_id:
181c7053 2421 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2422 if format.get('ext') is None:
2423 format['ext'] = determine_ext(format['url']).lower()
2424 # Ensure there is no conflict between id and ext in format selection
2425 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2426 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2427 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2428
2429 for i, format in enumerate(formats):
8c51aa65 2430 if format.get('format') is None:
6febd1c1 2431 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2432 id=format['format_id'],
2433 res=self.format_resolution(format),
b868936c 2434 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2435 )
6f0be937 2436 if format.get('protocol') is None:
b5559424 2437 format['protocol'] = determine_protocol(format)
239df021 2438 if format.get('resolution') is None:
2439 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2440 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2441 format['dynamic_range'] = 'SDR'
f2fe69c7 2442 if (info_dict.get('duration') and format.get('tbr')
2443 and not format.get('filesize') and not format.get('filesize_approx')):
2444 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2445
e5660ee6
JMF
2446 # Add HTTP headers, so that external programs can use them from the
2447 # json output
2448 full_format_info = info_dict.copy()
2449 full_format_info.update(format)
2450 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2451 # Remove private housekeeping stuff
2452 if '__x_forwarded_for_ip' in info_dict:
2453 del info_dict['__x_forwarded_for_ip']
dd82ffea 2454
4bcc7bd1 2455 # TODO Central sorting goes here
99e206d5 2456
9f1a1c36 2457 if self.params.get('check_formats') is True:
282f5709 2458 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2459
88acdbc2 2460 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2461 # only set the 'formats' fields if the original info_dict list them
2462 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2463 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2464 # which can't be exported to json
b3d9ef88 2465 info_dict['formats'] = formats
4ec82a72 2466
2467 info_dict, _ = self.pre_process(info_dict)
2468
093a1710 2469 # The pre-processors may have modified the formats
2470 formats = info_dict.get('formats', [info_dict])
2471
fa9f30b8 2472 list_only = self.params.get('simulate') is None and (
2473 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2474 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2475 if self.params.get('list_thumbnails'):
2476 self.list_thumbnails(info_dict)
b7b04c78 2477 if self.params.get('listsubtitles'):
2478 if 'automatic_captions' in info_dict:
2479 self.list_subtitles(
2480 info_dict['id'], automatic_captions, 'automatic captions')
2481 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2482 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2483 self.list_formats(info_dict)
169dbde9 2484 if list_only:
b7b04c78 2485 # Without this printing, -F --print-json will not work
169dbde9 2486 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2487 return
2488
187986a8 2489 format_selector = self.format_selector
2490 if format_selector is None:
0017d9ad 2491 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2492 self.write_debug('Default format spec: %s' % req_format)
187986a8 2493 format_selector = self.build_format_selector(req_format)
317f7ab6 2494
fa9f30b8 2495 while True:
2496 if interactive_format_selection:
2497 req_format = input(
2498 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2499 try:
2500 format_selector = self.build_format_selector(req_format)
2501 except SyntaxError as err:
2502 self.report_error(err, tb=False, is_error=False)
2503 continue
2504
2505 # While in format selection we may need to have an access to the original
2506 # format set in order to calculate some metrics or do some processing.
2507 # For now we need to be able to guess whether original formats provided
2508 # by extractor are incomplete or not (i.e. whether extractor provides only
2509 # video-only or audio-only formats) for proper formats selection for
2510 # extractors with such incomplete formats (see
2511 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2512 # Since formats may be filtered during format selection and may not match
2513 # the original formats the results may be incorrect. Thus original formats
2514 # or pre-calculated metrics should be passed to format selection routines
2515 # as well.
2516 # We will pass a context object containing all necessary additional data
2517 # instead of just formats.
2518 # This fixes incorrect format selection issue (see
2519 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2520 incomplete_formats = (
2521 # All formats are video-only or
2522 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2523 # all formats are audio-only
2524 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2525
2526 ctx = {
2527 'formats': formats,
2528 'incomplete_formats': incomplete_formats,
2529 }
2530
2531 formats_to_download = list(format_selector(ctx))
2532 if interactive_format_selection and not formats_to_download:
2533 self.report_error('Requested format is not available', tb=False, is_error=False)
2534 continue
2535 break
317f7ab6 2536
dd82ffea 2537 if not formats_to_download:
b7da73eb 2538 if not self.params.get('ignore_no_formats_error'):
1151c407 2539 raise ExtractorError('Requested format is not available', expected=True,
2540 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2541 else:
2542 self.report_warning('Requested format is not available')
4513a41a
A
2543 # Process what we can, even without any available formats.
2544 self.process_info(dict(info_dict))
b7da73eb 2545 elif download:
2546 self.to_screen(
07cce701 2547 '[info] %s: Downloading %d format(s): %s' % (
2548 info_dict['id'], len(formats_to_download),
2549 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2550 for fmt in formats_to_download:
dd82ffea 2551 new_info = dict(info_dict)
4ec82a72 2552 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2553 new_info['__original_infodict'] = info_dict
b7da73eb 2554 new_info.update(fmt)
dd82ffea 2555 self.process_info(new_info)
49a57e70 2556 # We update the info dict with the selected best quality format (backwards compatibility)
b7da73eb 2557 if formats_to_download:
2558 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2559 return info_dict
2560
98c70d6f 2561 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2562 """Select the requested subtitles and their format"""
98c70d6f
JMF
2563 available_subs = {}
2564 if normal_subtitles and self.params.get('writesubtitles'):
2565 available_subs.update(normal_subtitles)
2566 if automatic_captions and self.params.get('writeautomaticsub'):
2567 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2568 if lang not in available_subs:
2569 available_subs[lang] = cap_info
2570
4d171848
JMF
2571 if (not self.params.get('writesubtitles') and not
2572 self.params.get('writeautomaticsub') or not
2573 available_subs):
2574 return None
a504ced0 2575
c32b0aab 2576 all_sub_langs = available_subs.keys()
a504ced0 2577 if self.params.get('allsubtitles', False):
c32b0aab 2578 requested_langs = all_sub_langs
2579 elif self.params.get('subtitleslangs', False):
77c4a9ef 2580 # A list is used so that the order of languages will be the same as
2581 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2582 requested_langs = []
2583 for lang_re in self.params.get('subtitleslangs'):
2584 if lang_re == 'all':
2585 requested_langs.extend(all_sub_langs)
c32b0aab 2586 continue
77c4a9ef 2587 discard = lang_re[0] == '-'
c32b0aab 2588 if discard:
77c4a9ef 2589 lang_re = lang_re[1:]
2590 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2591 if discard:
2592 for lang in current_langs:
77c4a9ef 2593 while lang in requested_langs:
2594 requested_langs.remove(lang)
c32b0aab 2595 else:
77c4a9ef 2596 requested_langs.extend(current_langs)
2597 requested_langs = orderedSet(requested_langs)
c32b0aab 2598 elif 'en' in available_subs:
2599 requested_langs = ['en']
a504ced0 2600 else:
c32b0aab 2601 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2602 if requested_langs:
2603 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2604
2605 formats_query = self.params.get('subtitlesformat', 'best')
2606 formats_preference = formats_query.split('/') if formats_query else []
2607 subs = {}
2608 for lang in requested_langs:
2609 formats = available_subs.get(lang)
2610 if formats is None:
2611 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2612 continue
a504ced0
JMF
2613 for ext in formats_preference:
2614 if ext == 'best':
2615 f = formats[-1]
2616 break
2617 matches = list(filter(lambda f: f['ext'] == ext, formats))
2618 if matches:
2619 f = matches[-1]
2620 break
2621 else:
2622 f = formats[-1]
2623 self.report_warning(
2624 'No subtitle format found matching "%s" for language %s, '
2625 'using %s' % (formats_query, lang, f['ext']))
2626 subs[lang] = f
2627 return subs
2628
d06daf23 2629 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2630 def print_mandatory(field, actual_field=None):
2631 if actual_field is None:
2632 actual_field = field
d06daf23 2633 if (self.params.get('force%s' % field, False)
53c18592 2634 and (not incomplete or info_dict.get(actual_field) is not None)):
2635 self.to_stdout(info_dict[actual_field])
d06daf23
S
2636
2637 def print_optional(field):
2638 if (self.params.get('force%s' % field, False)
2639 and info_dict.get(field) is not None):
2640 self.to_stdout(info_dict[field])
2641
53c18592 2642 info_dict = info_dict.copy()
2643 if filename is not None:
2644 info_dict['filename'] = filename
2645 if info_dict.get('requested_formats') is not None:
2646 # For RTMP URLs, also include the playpath
2647 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2648 elif 'url' in info_dict:
2649 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2650
2b8a2973 2651 if self.params.get('forceprint') or self.params.get('forcejson'):
2652 self.post_extract(info_dict)
53c18592 2653 for tmpl in self.params.get('forceprint', []):
b5ae35ee 2654 mobj = re.match(r'\w+(=?)$', tmpl)
2655 if mobj and mobj.group(1):
2656 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2657 elif mobj:
2658 tmpl = '%({})s'.format(tmpl)
2659 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
53c18592 2660
d06daf23
S
2661 print_mandatory('title')
2662 print_mandatory('id')
53c18592 2663 print_mandatory('url', 'urls')
d06daf23
S
2664 print_optional('thumbnail')
2665 print_optional('description')
53c18592 2666 print_optional('filename')
b868936c 2667 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2668 self.to_stdout(formatSeconds(info_dict['duration']))
2669 print_mandatory('format')
53c18592 2670
2b8a2973 2671 if self.params.get('forcejson'):
6e84b215 2672 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2673
e8e73840 2674 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2675 if not info.get('url'):
1151c407 2676 self.raise_no_formats(info, True)
e8e73840 2677
2678 if test:
2679 verbose = self.params.get('verbose')
2680 params = {
2681 'test': True,
a169858f 2682 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2683 'verbose': verbose,
2684 'noprogress': not verbose,
2685 'nopart': True,
2686 'skip_unavailable_fragments': False,
2687 'keep_fragments': False,
2688 'overwrites': True,
2689 '_no_ytdl_file': True,
2690 }
2691 else:
2692 params = self.params
96fccc10 2693 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2694 if not test:
2695 for ph in self._progress_hooks:
2696 fd.add_progress_hook(ph)
18e674b4 2697 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2698 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2699
adbc4ec4
THD
2700 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2701 # But it may contain objects that are not deep-copyable
2702 new_info = self._copy_infodict(info)
e8e73840 2703 if new_info.get('http_headers') is None:
2704 new_info['http_headers'] = self._calc_headers(new_info)
2705 return fd.download(name, new_info, subtitle)
2706
8222d8de
JMF
2707 def process_info(self, info_dict):
2708 """Process a single resolved IE result."""
2709
2710 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2711
2712 max_downloads = self.params.get('max_downloads')
2713 if max_downloads is not None:
2714 if self._num_downloads >= int(max_downloads):
2715 raise MaxDownloadsReached()
8222d8de 2716
d06daf23 2717 # TODO: backward compatibility, to be removed
8222d8de 2718 info_dict['fulltitle'] = info_dict['title']
8222d8de 2719
4513a41a 2720 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2721 info_dict['format'] = info_dict['ext']
2722
c77495e3 2723 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2724 return
2725
277d6ff5 2726 self.post_extract(info_dict)
fd288278 2727 self._num_downloads += 1
8222d8de 2728
dcf64d43 2729 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2730 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2731 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2732 files_to_move = {}
8222d8de
JMF
2733
2734 # Forced printings
4513a41a 2735 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2736
b7b04c78 2737 if self.params.get('simulate'):
2d30509f 2738 if self.params.get('force_write_download_archive', False):
2739 self.record_download_archive(info_dict)
2d30509f 2740 # Do nothing else if in simulate mode
8222d8de
JMF
2741 return
2742
de6000d9 2743 if full_filename is None:
8222d8de 2744 return
e92caff5 2745 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2746 return
e92caff5 2747 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2748 return
2749
80c03fa9 2750 if self._write_description('video', info_dict,
2751 self.prepare_filename(info_dict, 'description')) is None:
2752 return
2753
2754 sub_files = self._write_subtitles(info_dict, temp_filename)
2755 if sub_files is None:
2756 return
2757 files_to_move.update(dict(sub_files))
2758
2759 thumb_files = self._write_thumbnails(
2760 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2761 if thumb_files is None:
2762 return
2763 files_to_move.update(dict(thumb_files))
8222d8de 2764
80c03fa9 2765 infofn = self.prepare_filename(info_dict, 'infojson')
2766 _infojson_written = self._write_info_json('video', info_dict, infofn)
2767 if _infojson_written:
dac5df5a 2768 info_dict['infojson_filename'] = infofn
e75bb0d6 2769 # For backward compatibility, even though it was a private field
80c03fa9 2770 info_dict['__infojson_filename'] = infofn
2771 elif _infojson_written is None:
2772 return
2773
2774 # Note: Annotations are deprecated
2775 annofn = None
1fb07d10 2776 if self.params.get('writeannotations', False):
de6000d9 2777 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2778 if annofn:
e92caff5 2779 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2780 return
0c3d0f51 2781 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2782 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2783 elif not info_dict.get('annotations'):
2784 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2785 else:
2786 try:
6febd1c1 2787 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2788 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2789 annofile.write(info_dict['annotations'])
2790 except (KeyError, TypeError):
6febd1c1 2791 self.report_warning('There are no annotations to write.')
7b6fefc9 2792 except (OSError, IOError):
6febd1c1 2793 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2794 return
1fb07d10 2795
732044af 2796 # Write internet shortcut files
08438d2c 2797 def _write_link_file(link_type):
732044af 2798 if 'webpage_url' not in info_dict:
2799 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
08438d2c 2800 return False
2801 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2802 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2803 return False
10e3742e 2804 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2805 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2806 return True
2807 try:
2808 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2809 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2810 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2811 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2812 if link_type == 'desktop':
2813 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2814 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2815 except (OSError, IOError):
2816 self.report_error(f'Cannot write internet shortcut {linkfn}')
2817 return False
732044af 2818 return True
2819
08438d2c 2820 write_links = {
2821 'url': self.params.get('writeurllink'),
2822 'webloc': self.params.get('writewebloclink'),
2823 'desktop': self.params.get('writedesktoplink'),
2824 }
2825 if self.params.get('writelink'):
2826 link_type = ('webloc' if sys.platform == 'darwin'
2827 else 'desktop' if sys.platform.startswith('linux')
2828 else 'url')
2829 write_links[link_type] = True
2830
2831 if any(should_write and not _write_link_file(link_type)
2832 for link_type, should_write in write_links.items()):
2833 return
732044af 2834
56d868db 2835 try:
2836 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2837 except PostProcessingError as err:
2838 self.report_error('Preprocessing: %s' % str(err))
2839 return
2840
732044af 2841 must_record_download_archive = False
56d868db 2842 if self.params.get('skip_download', False):
2843 info_dict['filepath'] = temp_filename
2844 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2845 info_dict['__files_to_move'] = files_to_move
2846 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2847 else:
2848 # Download
b868936c 2849 info_dict.setdefault('__postprocessors', [])
4340deca 2850 try:
0202b52a 2851
6b591b29 2852 def existing_file(*filepaths):
2853 ext = info_dict.get('ext')
2854 final_ext = self.params.get('final_ext', ext)
2855 existing_files = []
2856 for file in orderedSet(filepaths):
2857 if final_ext != ext:
2858 converted = replace_extension(file, final_ext, ext)
2859 if os.path.exists(encodeFilename(converted)):
2860 existing_files.append(converted)
2861 if os.path.exists(encodeFilename(file)):
2862 existing_files.append(file)
2863
2864 if not existing_files or self.params.get('overwrites', False):
2865 for file in orderedSet(existing_files):
2866 self.report_file_delete(file)
2867 os.remove(encodeFilename(file))
2868 return None
2869
6b591b29 2870 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2871 return existing_files[0]
0202b52a 2872
2873 success = True
4340deca 2874 if info_dict.get('requested_formats') is not None:
81cd954a
S
2875
2876 def compatible_formats(formats):
d03cfdce 2877 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2878 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2879 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2880 if len(video_formats) > 2 or len(audio_formats) > 2:
2881 return False
2882
81cd954a 2883 # Check extension
d03cfdce 2884 exts = set(format.get('ext') for format in formats)
2885 COMPATIBLE_EXTS = (
2886 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2887 set(('webm',)),
2888 )
2889 for ext_sets in COMPATIBLE_EXTS:
2890 if ext_sets.issuperset(exts):
2891 return True
81cd954a
S
2892 # TODO: Check acodec/vcodec
2893 return False
2894
2895 requested_formats = info_dict['requested_formats']
0202b52a 2896 old_ext = info_dict['ext']
4e3b637d 2897 if self.params.get('merge_output_format') is None:
2898 if not compatible_formats(requested_formats):
2899 info_dict['ext'] = 'mkv'
2900 self.report_warning(
2901 'Requested formats are incompatible for merge and will be merged into mkv')
2902 if (info_dict['ext'] == 'webm'
2903 and info_dict.get('thumbnails')
2904 # check with type instead of pp_key, __name__, or isinstance
2905 # since we dont want any custom PPs to trigger this
2906 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2907 info_dict['ext'] = 'mkv'
2908 self.report_warning(
2909 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 2910 new_ext = info_dict['ext']
0202b52a 2911
124bc071 2912 def correct_ext(filename, ext=new_ext):
96fccc10 2913 if filename == '-':
2914 return filename
0202b52a 2915 filename_real_ext = os.path.splitext(filename)[1][1:]
2916 filename_wo_ext = (
2917 os.path.splitext(filename)[0]
124bc071 2918 if filename_real_ext in (old_ext, new_ext)
0202b52a 2919 else filename)
124bc071 2920 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2921
38c6902b 2922 # Ensure filename always has a correct extension for successful merge
0202b52a 2923 full_filename = correct_ext(full_filename)
2924 temp_filename = correct_ext(temp_filename)
2925 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2926 info_dict['__real_download'] = False
18e674b4 2927
adbc4ec4
THD
2928 downloaded = []
2929 merger = FFmpegMergerPP(self)
2930
2931 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 2932 if dl_filename is not None:
6c7274ec 2933 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
2934 elif fd:
2935 for f in requested_formats if fd != FFmpegFD else []:
2936 f['filepath'] = fname = prepend_extension(
2937 correct_ext(temp_filename, info_dict['ext']),
2938 'f%s' % f['format_id'], info_dict['ext'])
2939 downloaded.append(fname)
dbf5416a 2940 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2941 success, real_download = self.dl(temp_filename, info_dict)
2942 info_dict['__real_download'] = real_download
18e674b4 2943 else:
18e674b4 2944 if self.params.get('allow_unplayable_formats'):
2945 self.report_warning(
2946 'You have requested merging of multiple formats '
2947 'while also allowing unplayable formats to be downloaded. '
2948 'The formats won\'t be merged to prevent data corruption.')
2949 elif not merger.available:
2950 self.report_warning(
2951 'You have requested merging of multiple formats but ffmpeg is not installed. '
2952 'The formats won\'t be merged.')
2953
96fccc10 2954 if temp_filename == '-':
adbc4ec4 2955 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 2956 else 'but the formats are incompatible for simultaneous download' if merger.available
2957 else 'but ffmpeg is not installed')
2958 self.report_warning(
2959 f'You have requested downloading multiple formats to stdout {reason}. '
2960 'The formats will be streamed one after the other')
2961 fname = temp_filename
dbf5416a 2962 for f in requested_formats:
2963 new_info = dict(info_dict)
2964 del new_info['requested_formats']
2965 new_info.update(f)
96fccc10 2966 if temp_filename != '-':
124bc071 2967 fname = prepend_extension(
2968 correct_ext(temp_filename, new_info['ext']),
2969 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2970 if not self._ensure_dir_exists(fname):
2971 return
a21e0ab1 2972 f['filepath'] = fname
96fccc10 2973 downloaded.append(fname)
dbf5416a 2974 partial_success, real_download = self.dl(fname, new_info)
2975 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2976 success = success and partial_success
adbc4ec4
THD
2977
2978 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2979 info_dict['__postprocessors'].append(merger)
2980 info_dict['__files_to_merge'] = downloaded
2981 # Even if there were no downloads, it is being merged only now
2982 info_dict['__real_download'] = True
2983 else:
2984 for file in downloaded:
2985 files_to_move[file] = None
4340deca
P
2986 else:
2987 # Just a single file
0202b52a 2988 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2989 if dl_filename is None or dl_filename == temp_filename:
2990 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2991 # So we should try to resume the download
e8e73840 2992 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2993 info_dict['__real_download'] = real_download
6c7274ec 2994 else:
2995 self.report_file_already_downloaded(dl_filename)
0202b52a 2996
0202b52a 2997 dl_filename = dl_filename or temp_filename
c571435f 2998 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2999
3158150c 3000 except network_exceptions as err:
7960b056 3001 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
3002 return
3003 except (OSError, IOError) as err:
3004 raise UnavailableVideoError(err)
3005 except (ContentTooShortError, ) as err:
3006 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3007 return
8222d8de 3008
de6000d9 3009 if success and full_filename != '-':
f17f8651 3010
fd7cfb64 3011 def fixup():
3012 do_fixup = True
3013 fixup_policy = self.params.get('fixup')
3014 vid = info_dict['id']
3015
3016 if fixup_policy in ('ignore', 'never'):
3017 return
3018 elif fixup_policy == 'warn':
3019 do_fixup = False
f89b3e2d 3020 elif fixup_policy != 'force':
3021 assert fixup_policy in ('detect_or_warn', None)
3022 if not info_dict.get('__real_download'):
3023 do_fixup = False
fd7cfb64 3024
3025 def ffmpeg_fixup(cndn, msg, cls):
3026 if not cndn:
3027 return
3028 if not do_fixup:
3029 self.report_warning(f'{vid}: {msg}')
3030 return
3031 pp = cls(self)
3032 if pp.available:
3033 info_dict['__postprocessors'].append(pp)
3034 else:
3035 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3036
3037 stretched_ratio = info_dict.get('stretched_ratio')
3038 ffmpeg_fixup(
3039 stretched_ratio not in (1, None),
3040 f'Non-uniform pixel ratio {stretched_ratio}',
3041 FFmpegFixupStretchedPP)
3042
3043 ffmpeg_fixup(
3044 (info_dict.get('requested_formats') is None
3045 and info_dict.get('container') == 'm4a_dash'
3046 and info_dict.get('ext') == 'm4a'),
3047 'writing DASH m4a. Only some players support this container',
3048 FFmpegFixupM4aPP)
3049
993191c0 3050 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3051 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3052
3053 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3054 ffmpeg_fixup(downloader == 'HlsFD',
3055 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3056 FFmpegFixupM3u8PP)
3057 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3058 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3059
e04b003e 3060 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3061 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3062
3063 fixup()
8222d8de 3064 try:
23c1a667 3065 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 3066 except PostProcessingError as err:
3067 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3068 return
ab8e5e51
AM
3069 try:
3070 for ph in self._post_hooks:
23c1a667 3071 ph(info_dict['filepath'])
ab8e5e51
AM
3072 except Exception as err:
3073 self.report_error('post hooks: %s' % str(err))
3074 return
2d30509f 3075 must_record_download_archive = True
3076
3077 if must_record_download_archive or self.params.get('force_write_download_archive', False):
3078 self.record_download_archive(info_dict)
c3e6ffba 3079 max_downloads = self.params.get('max_downloads')
3080 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3081 raise MaxDownloadsReached()
8222d8de 3082
aa9369a2 3083 def __download_wrapper(self, func):
3084 @functools.wraps(func)
3085 def wrapper(*args, **kwargs):
3086 try:
3087 res = func(*args, **kwargs)
3088 except UnavailableVideoError as e:
3089 self.report_error(e)
b222c271 3090 except MaxDownloadsReached as e:
aa9369a2 3091 self.to_screen(f'[info] {e}')
3092 raise
b222c271 3093 except DownloadCancelled as e:
3094 self.to_screen(f'[info] {e}')
3095 if not self.params.get('break_per_url'):
3096 raise
aa9369a2 3097 else:
3098 if self.params.get('dump_single_json', False):
3099 self.post_extract(res)
3100 self.to_stdout(json.dumps(self.sanitize_info(res)))
3101 return wrapper
3102
8222d8de
JMF
3103 def download(self, url_list):
3104 """Download a given list of URLs."""
aa9369a2 3105 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3106 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3107 if (len(url_list) > 1
3108 and outtmpl != '-'
3109 and '%' not in outtmpl
3110 and self.params.get('max_downloads') != 1):
acd69589 3111 raise SameFileError(outtmpl)
8222d8de
JMF
3112
3113 for url in url_list:
aa9369a2 3114 self.__download_wrapper(self.extract_info)(
3115 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3116
3117 return self._download_retcode
3118
1dcc4c0c 3119 def download_with_info_file(self, info_filename):
31bd3925
JMF
3120 with contextlib.closing(fileinput.FileInput(
3121 [info_filename], mode='r',
3122 openhook=fileinput.hook_encoded('utf-8'))) as f:
3123 # FileInput doesn't have a read method, we can't call json.load
8012d892 3124 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3125 try:
aa9369a2 3126 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3127 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3128 if not isinstance(e, EntryNotInPlaylist):
3129 self.to_stderr('\r')
d4943898
JMF
3130 webpage_url = info.get('webpage_url')
3131 if webpage_url is not None:
aa9369a2 3132 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3133 return self.download([webpage_url])
3134 else:
3135 raise
3136 return self._download_retcode
1dcc4c0c 3137
cb202fd2 3138 @staticmethod
8012d892 3139 def sanitize_info(info_dict, remove_private_keys=False):
3140 ''' Sanitize the infodict for converting to json '''
3ad56b42 3141 if info_dict is None:
3142 return info_dict
6e84b215 3143 info_dict.setdefault('epoch', int(time.time()))
3144 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
a49891c7 3145 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
8012d892 3146 if remove_private_keys:
6e84b215 3147 remove_keys |= {
dac5df5a 3148 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3149 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3150 }
ae8f99e6 3151 reject = lambda k, v: k not in keep_keys and (
ceb98323 3152 k.startswith('_') or k in remove_keys or v is None)
ae8f99e6 3153 else:
ae8f99e6 3154 reject = lambda k, v: k in remove_keys
adbc4ec4
THD
3155
3156 def filter_fn(obj):
3157 if isinstance(obj, dict):
3158 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3159 elif isinstance(obj, (list, tuple, set, LazyList)):
3160 return list(map(filter_fn, obj))
3161 elif obj is None or isinstance(obj, (str, int, float, bool)):
3162 return obj
3163 else:
3164 return repr(obj)
3165
5226731e 3166 return filter_fn(info_dict)
cb202fd2 3167
8012d892 3168 @staticmethod
3169 def filter_requested_info(info_dict, actually_filter=True):
3170 ''' Alias of sanitize_info for backward compatibility '''
3171 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3172
dcf64d43 3173 def run_pp(self, pp, infodict):
5bfa4862 3174 files_to_delete = []
dcf64d43 3175 if '__files_to_move' not in infodict:
3176 infodict['__files_to_move'] = {}
b1940459 3177 try:
3178 files_to_delete, infodict = pp.run(infodict)
3179 except PostProcessingError as e:
3180 # Must be True and not 'only_download'
3181 if self.params.get('ignoreerrors') is True:
3182 self.report_error(e)
3183 return infodict
3184 raise
3185
5bfa4862 3186 if not files_to_delete:
dcf64d43 3187 return infodict
5bfa4862 3188 if self.params.get('keepvideo', False):
3189 for f in files_to_delete:
dcf64d43 3190 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3191 else:
3192 for old_filename in set(files_to_delete):
3193 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3194 try:
3195 os.remove(encodeFilename(old_filename))
3196 except (IOError, OSError):
3197 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3198 if old_filename in infodict['__files_to_move']:
3199 del infodict['__files_to_move'][old_filename]
3200 return infodict
5bfa4862 3201
277d6ff5 3202 @staticmethod
3203 def post_extract(info_dict):
3204 def actual_post_extract(info_dict):
3205 if info_dict.get('_type') in ('playlist', 'multi_video'):
3206 for video_dict in info_dict.get('entries', {}):
b050d210 3207 actual_post_extract(video_dict or {})
277d6ff5 3208 return
3209
07cce701 3210 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3211 extra = post_extractor().items()
3212 info_dict.update(extra)
07cce701 3213 info_dict.pop('__post_extractor', None)
277d6ff5 3214
4ec82a72 3215 original_infodict = info_dict.get('__original_infodict') or {}
3216 original_infodict.update(extra)
3217 original_infodict.pop('__post_extractor', None)
3218
b050d210 3219 actual_post_extract(info_dict or {})
277d6ff5 3220
56d868db 3221 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3222 info = dict(ie_info)
56d868db 3223 info['__files_to_move'] = files_to_move or {}
3224 for pp in self._pps[key]:
dcf64d43 3225 info = self.run_pp(pp, info)
56d868db 3226 return info, info.pop('__files_to_move', None)
5bfa4862 3227
dcf64d43 3228 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3229 """Run all the postprocessors on the given file."""
3230 info = dict(ie_info)
3231 info['filepath'] = filename
dcf64d43 3232 info['__files_to_move'] = files_to_move or {}
0202b52a 3233
56d868db 3234 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3235 info = self.run_pp(pp, info)
3236 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3237 del info['__files_to_move']
56d868db 3238 for pp in self._pps['after_move']:
dcf64d43 3239 info = self.run_pp(pp, info)
23c1a667 3240 return info
c1c9a79c 3241
5db07df6 3242 def _make_archive_id(self, info_dict):
e9fef7ee
S
3243 video_id = info_dict.get('id')
3244 if not video_id:
3245 return
5db07df6
PH
3246 # Future-proof against any change in case
3247 # and backwards compatibility with prior versions
e9fef7ee 3248 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3249 if extractor is None:
1211bb6d
S
3250 url = str_or_none(info_dict.get('url'))
3251 if not url:
3252 return
e9fef7ee 3253 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3254 for ie_key, ie in self._ies.items():
1211bb6d 3255 if ie.suitable(url):
8b7491c8 3256 extractor = ie_key
e9fef7ee
S
3257 break
3258 else:
3259 return
d0757229 3260 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3261
3262 def in_download_archive(self, info_dict):
3263 fn = self.params.get('download_archive')
3264 if fn is None:
3265 return False
3266
3267 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3268 if not vid_id:
7012b23c 3269 return False # Incomplete video information
5db07df6 3270
a45e8619 3271 return vid_id in self.archive
c1c9a79c
PH
3272
3273 def record_download_archive(self, info_dict):
3274 fn = self.params.get('download_archive')
3275 if fn is None:
3276 return
5db07df6
PH
3277 vid_id = self._make_archive_id(info_dict)
3278 assert vid_id
c1c9a79c 3279 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3280 archive_file.write(vid_id + '\n')
a45e8619 3281 self.archive.add(vid_id)
dd82ffea 3282
8c51aa65 3283 @staticmethod
8abeeb94 3284 def format_resolution(format, default='unknown'):
9359f3d4 3285 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3286 return 'audio only'
f49d89ee
PH
3287 if format.get('resolution') is not None:
3288 return format['resolution']
35615307 3289 if format.get('width') and format.get('height'):
ff51ed58 3290 return '%dx%d' % (format['width'], format['height'])
35615307 3291 elif format.get('height'):
ff51ed58 3292 return '%sp' % format['height']
35615307 3293 elif format.get('width'):
ff51ed58 3294 return '%dx?' % format['width']
3295 return default
8c51aa65 3296
c57f7757
PH
3297 def _format_note(self, fdict):
3298 res = ''
3299 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3300 res += '(unsupported)'
32f90364
PH
3301 if fdict.get('language'):
3302 if res:
3303 res += ' '
f304da8a 3304 res += '[%s]' % fdict['language']
c57f7757 3305 if fdict.get('format_note') is not None:
f304da8a 3306 if res:
3307 res += ' '
3308 res += fdict['format_note']
c57f7757 3309 if fdict.get('tbr') is not None:
f304da8a 3310 if res:
3311 res += ', '
3312 res += '%4dk' % fdict['tbr']
c57f7757
PH
3313 if fdict.get('container') is not None:
3314 if res:
3315 res += ', '
3316 res += '%s container' % fdict['container']
3089bc74
S
3317 if (fdict.get('vcodec') is not None
3318 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3319 if res:
3320 res += ', '
3321 res += fdict['vcodec']
91c7271a 3322 if fdict.get('vbr') is not None:
c57f7757
PH
3323 res += '@'
3324 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3325 res += 'video@'
3326 if fdict.get('vbr') is not None:
3327 res += '%4dk' % fdict['vbr']
fbb21cf5 3328 if fdict.get('fps') is not None:
5d583bdf
S
3329 if res:
3330 res += ', '
3331 res += '%sfps' % fdict['fps']
c57f7757
PH
3332 if fdict.get('acodec') is not None:
3333 if res:
3334 res += ', '
3335 if fdict['acodec'] == 'none':
3336 res += 'video only'
3337 else:
3338 res += '%-5s' % fdict['acodec']
3339 elif fdict.get('abr') is not None:
3340 if res:
3341 res += ', '
3342 res += 'audio'
3343 if fdict.get('abr') is not None:
3344 res += '@%3dk' % fdict['abr']
3345 if fdict.get('asr') is not None:
3346 res += ' (%5dHz)' % fdict['asr']
3347 if fdict.get('filesize') is not None:
3348 if res:
3349 res += ', '
3350 res += format_bytes(fdict['filesize'])
9732d77e
PH
3351 elif fdict.get('filesize_approx') is not None:
3352 if res:
3353 res += ', '
3354 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3355 return res
91c7271a 3356
ec11a9f4 3357 def _list_format_headers(self, *headers):
3358 if self.params.get('listformats_table', True) is not False:
3359 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3360 return headers
3361
c57f7757 3362 def list_formats(self, info_dict):
b69fd25c 3363 if not info_dict.get('formats') and not info_dict.get('url'):
3364 self.to_screen('%s has no formats' % info_dict['id'])
3365 return
3366 self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3367
94badb25 3368 formats = info_dict.get('formats', [info_dict])
ec11a9f4 3369 new_format = self.params.get('listformats_table', True) is not False
76d321f6 3370 if new_format:
ec11a9f4 3371 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
76d321f6 3372 table = [
3373 [
ec11a9f4 3374 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
76d321f6 3375 format_field(f, 'ext'),
ff51ed58 3376 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
c5e3f849 3377 format_field(f, 'fps', '\t%d'),
176f1866 3378 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
ec11a9f4 3379 delim,
c5e3f849 3380 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3381 format_field(f, 'tbr', '\t%dk'),
b69fd25c 3382 shorten_protocol_name(f.get('protocol', '')),
ec11a9f4 3383 delim,
ff51ed58 3384 format_field(f, 'vcodec', default='unknown').replace(
3385 'none',
3386 'images' if f.get('acodec') == 'none'
3387 else self._format_screen('audio only', self.Styles.SUPPRESS)),
c5e3f849 3388 format_field(f, 'vbr', '\t%dk'),
ff51ed58 3389 format_field(f, 'acodec', default='unknown').replace(
3390 'none',
3391 '' if f.get('vcodec') == 'none'
3392 else self._format_screen('video only', self.Styles.SUPPRESS)),
c5e3f849 3393 format_field(f, 'abr', '\t%dk'),
3394 format_field(f, 'asr', '\t%dHz'),
34921b43 3395 join_nonempty(
3396 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3f698246 3397 format_field(f, 'language', '[%s]'),
c5e3f849 3398 join_nonempty(
3399 format_field(f, 'format_note'),
3400 format_field(f, 'container', ignore=(None, f.get('ext'))),
3401 delim=', '),
3402 delim=' '),
3f698246 3403 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
ec11a9f4 3404 header_line = self._list_format_headers(
c5e3f849 3405 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3406 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
76d321f6 3407 else:
3408 table = [
3409 [
3410 format_field(f, 'format_id'),
3411 format_field(f, 'ext'),
3412 self.format_resolution(f),
3413 self._format_note(f)]
3414 for f in formats
3415 if f.get('preference') is None or f['preference'] >= -1000]
3416 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3417
169dbde9 3418 self.to_stdout(render_table(
ec11a9f4 3419 header_line, table,
c5e3f849 3420 extra_gap=(0 if new_format else 1),
3421 hide_empty=new_format,
ec11a9f4 3422 delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
cfb56d1a
PH
3423
3424 def list_thumbnails(self, info_dict):
b0249bca 3425 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3426 if not thumbnails:
b7b72db9 3427 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3428 return
cfb56d1a
PH
3429
3430 self.to_screen(
3431 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3432 self.to_stdout(render_table(
ec11a9f4 3433 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
cfb56d1a 3434 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3435
360e1ca5 3436 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3437 if not subtitles:
360e1ca5 3438 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3439 return
a504ced0 3440 self.to_screen(
edab9dbf 3441 'Available %s for %s:' % (name, video_id))
2412044c 3442
3443 def _row(lang, formats):
49c258e1 3444 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3445 if len(set(names)) == 1:
7aee40c1 3446 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3447 return [lang, ', '.join(names), ', '.join(exts)]
3448
169dbde9 3449 self.to_stdout(render_table(
ec11a9f4 3450 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3451 [_row(lang, formats) for lang, formats in subtitles.items()],
c5e3f849 3452 hide_empty=True))
a504ced0 3453
dca08720
PH
3454 def urlopen(self, req):
3455 """ Start an HTTP download """
82d8a8b6 3456 if isinstance(req, compat_basestring):
67dda517 3457 req = sanitized_Request(req)
19a41fc6 3458 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3459
3460 def print_debug_header(self):
3461 if not self.params.get('verbose'):
3462 return
49a57e70 3463
3464 def get_encoding(stream):
3465 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3466 if not supports_terminal_sequences(stream):
e3c7d495 3467 from .compat import WINDOWS_VT_MODE
3468 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3469 return ret
3470
3471 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3472 locale.getpreferredencoding(),
3473 sys.getfilesystemencoding(),
3474 get_encoding(self._screen_file), get_encoding(self._err_file),
3475 self.get_encoding())
883d4b1e 3476
3477 logger = self.params.get('logger')
3478 if logger:
3479 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3480 write_debug(encoding_str)
3481 else:
96565c7e 3482 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3483 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3484
4c88ff87 3485 source = detect_variant()
36eaf303 3486 write_debug(join_nonempty(
3487 'yt-dlp version', __version__,
3488 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3489 '' if source == 'unknown' else f'({source})',
3490 delim=' '))
6e21fdd2 3491 if not _LAZY_LOADER:
3492 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3493 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3494 else:
49a57e70 3495 write_debug('Lazy loading extractors is disabled')
3ae5e797 3496 if plugin_extractors or plugin_postprocessors:
49a57e70 3497 write_debug('Plugins: %s' % [
3ae5e797 3498 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3499 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3500 if self.params.get('compat_opts'):
49a57e70 3501 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3502
3503 if source == 'source':
dca08720 3504 try:
36eaf303 3505 sp = Popen(
3506 ['git', 'rev-parse', '--short', 'HEAD'],
3507 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3508 cwd=os.path.dirname(os.path.abspath(__file__)))
3509 out, err = sp.communicate_or_kill()
3510 out = out.decode().strip()
3511 if re.match('[0-9a-f]+', out):
3512 write_debug('Git HEAD: %s' % out)
70a1165b 3513 except Exception:
36eaf303 3514 try:
3515 sys.exc_clear()
3516 except Exception:
3517 pass
b300cda4
S
3518
3519 def python_implementation():
3520 impl_name = platform.python_implementation()
3521 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3522 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3523 return impl_name
3524
49a57e70 3525 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3526 platform.python_version(),
3527 python_implementation(),
3528 platform.architecture()[0],
b300cda4 3529 platform_name()))
d28b5171 3530
8913ef74 3531 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3532 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3533 if ffmpeg_features:
a4211baf 3534 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3535
4c83c967 3536 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3537 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3538 exe_str = ', '.join(
2831b468 3539 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3540 ) or 'none'
49a57e70 3541 write_debug('exe versions: %s' % exe_str)
dca08720 3542
2831b468 3543 from .downloader.websocket import has_websockets
3544 from .postprocessor.embedthumbnail import has_mutagen
f59f5ef8 3545 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
2831b468 3546
c586f9e8 3547 lib_str = join_nonempty(
edf65256 3548 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
f59f5ef8 3549 SECRETSTORAGE_AVAILABLE and 'secretstorage',
2831b468 3550 has_mutagen and 'mutagen',
3551 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3552 has_websockets and 'websockets',
3553 delim=', ') or 'none'
49a57e70 3554 write_debug('Optional libraries: %s' % lib_str)
2831b468 3555
dca08720
PH
3556 proxy_map = {}
3557 for handler in self._opener.handlers:
3558 if hasattr(handler, 'proxies'):
3559 proxy_map.update(handler.proxies)
49a57e70 3560 write_debug(f'Proxy map: {proxy_map}')
dca08720 3561
49a57e70 3562 # Not implemented
3563 if False and self.params.get('call_home'):
58b1f00d 3564 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3565 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3566 latest_version = self.urlopen(
3567 'https://yt-dl.org/latest/version').read().decode('utf-8')
3568 if version_tuple(latest_version) > version_tuple(__version__):
3569 self.report_warning(
3570 'You are using an outdated version (newest version: %s)! '
3571 'See https://yt-dl.org/update if you need help updating.' %
3572 latest_version)
3573
e344693b 3574 def _setup_opener(self):
6ad14cab 3575 timeout_val = self.params.get('socket_timeout')
17bddf3e 3576 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3577
982ee69a 3578 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3579 opts_cookiefile = self.params.get('cookiefile')
3580 opts_proxy = self.params.get('proxy')
3581
982ee69a 3582 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3583
6a3f4c3f 3584 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3585 if opts_proxy is not None:
3586 if opts_proxy == '':
3587 proxies = {}
3588 else:
3589 proxies = {'http': opts_proxy, 'https': opts_proxy}
3590 else:
3591 proxies = compat_urllib_request.getproxies()
067aa17e 3592 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3593 if 'http' in proxies and 'https' not in proxies:
3594 proxies['https'] = proxies['http']
91410c9b 3595 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3596
3597 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3598 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3599 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3600 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3601 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3602
3603 # When passing our own FileHandler instance, build_opener won't add the
3604 # default FileHandler and allows us to disable the file protocol, which
3605 # can be used for malicious purposes (see
067aa17e 3606 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3607 file_handler = compat_urllib_request.FileHandler()
3608
3609 def file_open(*args, **kwargs):
7a5c1cfe 3610 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3611 file_handler.file_open = file_open
3612
3613 opener = compat_urllib_request.build_opener(
fca6dba8 3614 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3615
dca08720
PH
3616 # Delete the default user-agent header, which would otherwise apply in
3617 # cases where our custom HTTP handler doesn't come into play
067aa17e 3618 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3619 opener.addheaders = []
3620 self._opener = opener
62fec3b2
PH
3621
3622 def encode(self, s):
3623 if isinstance(s, bytes):
3624 return s # Already encoded
3625
3626 try:
3627 return s.encode(self.get_encoding())
3628 except UnicodeEncodeError as err:
3629 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3630 raise
3631
3632 def get_encoding(self):
3633 encoding = self.params.get('encoding')
3634 if encoding is None:
3635 encoding = preferredencoding()
3636 return encoding
ec82d85a 3637
e08a85d8 3638 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
80c03fa9 3639 ''' Write infojson and returns True = written, False = skip, None = error '''
e08a85d8 3640 if overwrite is None:
3641 overwrite = self.params.get('overwrites', True)
80c03fa9 3642 if not self.params.get('writeinfojson'):
3643 return False
3644 elif not infofn:
3645 self.write_debug(f'Skipping writing {label} infojson')
3646 return False
3647 elif not self._ensure_dir_exists(infofn):
3648 return None
e08a85d8 3649 elif not overwrite and os.path.exists(infofn):
80c03fa9 3650 self.to_screen(f'[info] {label.title()} metadata is already present')
3651 else:
3652 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3653 try:
3654 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3655 except (OSError, IOError):
3656 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3657 return None
3658 return True
3659
3660 def _write_description(self, label, ie_result, descfn):
3661 ''' Write description and returns True = written, False = skip, None = error '''
3662 if not self.params.get('writedescription'):
3663 return False
3664 elif not descfn:
3665 self.write_debug(f'Skipping writing {label} description')
3666 return False
3667 elif not self._ensure_dir_exists(descfn):
3668 return None
3669 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3670 self.to_screen(f'[info] {label.title()} description is already present')
3671 elif ie_result.get('description') is None:
3672 self.report_warning(f'There\'s no {label} description to write')
3673 return False
3674 else:
3675 try:
3676 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3677 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3678 descfile.write(ie_result['description'])
3679 except (OSError, IOError):
3680 self.report_error(f'Cannot write {label} description file {descfn}')
3681 return None
3682 return True
3683
3684 def _write_subtitles(self, info_dict, filename):
3685 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3686 ret = []
3687 subtitles = info_dict.get('requested_subtitles')
3688 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3689 # subtitles download errors are already managed as troubles in relevant IE
3690 # that way it will silently go on when used with unsupporting IE
3691 return ret
3692
3693 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3694 if not sub_filename_base:
3695 self.to_screen('[info] Skipping writing video subtitles')
3696 return ret
3697 for sub_lang, sub_info in subtitles.items():
3698 sub_format = sub_info['ext']
3699 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3700 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3701 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3702 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3703 sub_info['filepath'] = sub_filename
3704 ret.append((sub_filename, sub_filename_final))
3705 continue
3706
3707 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3708 if sub_info.get('data') is not None:
3709 try:
3710 # Use newline='' to prevent conversion of newline characters
3711 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3712 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3713 subfile.write(sub_info['data'])
3714 sub_info['filepath'] = sub_filename
3715 ret.append((sub_filename, sub_filename_final))
3716 continue
3717 except (OSError, IOError):
3718 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3719 return None
3720
3721 try:
3722 sub_copy = sub_info.copy()
3723 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3724 self.dl(sub_filename, sub_copy, subtitle=True)
3725 sub_info['filepath'] = sub_filename
3726 ret.append((sub_filename, sub_filename_final))
3727 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3728 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3729 continue
519804a9 3730 return ret
80c03fa9 3731
3732 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3733 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3734 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3735 thumbnails, ret = [], []
6c4fd172 3736 if write_all or self.params.get('writethumbnail', False):
0202b52a 3737 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3738 multiple = write_all and len(thumbnails) > 1
ec82d85a 3739
80c03fa9 3740 if thumb_filename_base is None:
3741 thumb_filename_base = filename
3742 if thumbnails and not thumb_filename_base:
3743 self.write_debug(f'Skipping writing {label} thumbnail')
3744 return ret
3745
dd0228ce 3746 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3747 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3748 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3749 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3750 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3751
80c03fa9 3752 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3753 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3754 t['filepath'] = thumb_filename
aa9369a2 3755 self.to_screen('[info] %s is already present' % (
3756 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
ec82d85a 3757 else:
80c03fa9 3758 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3759 try:
3760 uf = self.urlopen(t['url'])
80c03fa9 3761 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3762 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3763 shutil.copyfileobj(uf, thumbf)
80c03fa9 3764 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3765 t['filepath'] = thumb_filename
3158150c 3766 except network_exceptions as err:
dd0228ce 3767 thumbnails.pop(idx)
80c03fa9 3768 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3769 if ret and not write_all:
3770 break
0202b52a 3771 return ret