]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[youtube] Detect DRM better
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
4390d5ec 35 compat_brotli,
003c69a8 36 compat_get_terminal_size,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
edf65256 40 compat_pycrypto_AES,
7d1eb38a 41 compat_shlex_quote,
ce02ed60 42 compat_str,
67134eab 43 compat_tokenize_tokenize,
ce02ed60
PH
44 compat_urllib_error,
45 compat_urllib_request,
8b172c2e 46 compat_urllib_request_DataHandler,
819e0531 47 windows_enable_vt_mode,
8c25f81b 48)
982ee69a 49from .cookies import load_cookies
8c25f81b 50from .utils import (
eedb7ba5
S
51 age_restricted,
52 args_to_str,
ce02ed60
PH
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
acd69589 56 DEFAULT_OUTTMPL,
ce02ed60 57 determine_ext,
b5559424 58 determine_protocol,
48f79687 59 DownloadCancelled,
ce02ed60 60 DownloadError,
c0384f22 61 encode_compat_str,
ce02ed60 62 encodeFilename,
498f5606 63 EntryNotInPlaylist,
a06916d9 64 error_to_compat_str,
8b0d7497 65 ExistingVideoReached,
590bc6f6 66 expand_path,
ce02ed60 67 ExtractorError,
e29663c6 68 float_or_none,
02dbf93f 69 format_bytes,
76d321f6 70 format_field,
e0fd9573 71 format_decimal_suffix,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
0bb322b9 74 get_domain,
d5820461 75 has_certifi,
b0249bca 76 HEADRequest,
d37707bd 77 InAdvancePagedList,
c9969434 78 int_or_none,
732044af 79 iri_to_uri,
773f291d 80 ISO3166Utils,
34921b43 81 join_nonempty,
56a8fb4f 82 LazyList,
08438d2c 83 LINK_TEMPLATES,
ce02ed60 84 locked_file,
0202b52a 85 make_dir,
dca08720 86 make_HTTPS_handler,
ce02ed60 87 MaxDownloadsReached,
8b7539d2 88 merge_headers,
3158150c 89 network_exceptions,
5c3895ff 90 NO_DEFAULT,
ec11a9f4 91 number_of_digits,
cd6fc19e 92 orderedSet,
a06916d9 93 OUTTMPL_TYPES,
b7ab0590 94 PagedList,
083c9df9 95 parse_filesize,
91410c9b 96 PerRequestProxyHandler,
dca08720 97 platform_name,
d3c93ec2 98 Popen,
1e43a6f7 99 POSTPROCESS_WHEN,
eedb7ba5 100 PostProcessingError,
ce02ed60 101 preferredencoding,
eedb7ba5 102 prepend_extension,
f2ebc5c7 103 ReExtractInfo,
51fb4995 104 register_socks_protocols,
a06916d9 105 RejectedVideoReached,
3efb96a6 106 remove_terminal_sequences,
cfb56d1a 107 render_table,
eedb7ba5 108 replace_extension,
ce02ed60
PH
109 SameFileError,
110 sanitize_filename,
1bb5c511 111 sanitize_path,
dcf77cf1 112 sanitize_url,
67dda517 113 sanitized_Request,
e5660ee6 114 std_headers,
819e0531 115 STR_FORMAT_RE_TMPL,
116 STR_FORMAT_TYPES,
1211bb6d 117 str_or_none,
e29663c6 118 strftime_or_none,
ce02ed60 119 subtitles_filename,
819e0531 120 supports_terminal_sequences,
f2ebc5c7 121 timetuple_from_msec,
732044af 122 to_high_limit_path,
324ad820 123 traverse_obj,
6033d980 124 try_get,
ce02ed60 125 UnavailableVideoError,
29eb5174 126 url_basename,
7d1eb38a 127 variadic,
58b1f00d 128 version_tuple,
ce02ed60
PH
129 write_json_file,
130 write_string,
6a3f4c3f 131 YoutubeDLCookieProcessor,
dca08720 132 YoutubeDLHandler,
fca6dba8 133 YoutubeDLRedirectHandler,
ce02ed60 134)
a0e07d31 135from .cache import Cache
ec11a9f4 136from .minicurses import format_text
52a8a1e1 137from .extractor import (
138 gen_extractor_classes,
139 get_info_extractor,
140 _LAZY_LOADER,
3ae5e797 141 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 142)
4c54b89e 143from .extractor.openload import PhantomJSwrapper
52a8a1e1 144from .downloader import (
dbf5416a 145 FFmpegFD,
52a8a1e1 146 get_suitable_downloader,
147 shorten_protocol_name
148)
4c83c967 149from .downloader.rtmp import rtmpdump_version
4f026faf 150from .postprocessor import (
e36d50c5 151 get_postprocessor,
4e3b637d 152 EmbedThumbnailPP,
adbc4ec4 153 FFmpegFixupDuplicateMoovPP,
e36d50c5 154 FFmpegFixupDurationPP,
f17f8651 155 FFmpegFixupM3u8PP,
62cd676c 156 FFmpegFixupM4aPP,
6271f1ca 157 FFmpegFixupStretchedPP,
e36d50c5 158 FFmpegFixupTimestampPP,
4f026faf
PH
159 FFmpegMergerPP,
160 FFmpegPostProcessor,
0202b52a 161 MoveFilesAfterDownloadPP,
3ae5e797 162 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 163)
4c88ff87 164from .update import detect_variant
36eaf303 165from .version import __version__, RELEASE_GIT_HEAD
8222d8de 166
e9c0cdd3
YCH
167if compat_os_name == 'nt':
168 import ctypes
169
2459b6e1 170
8222d8de
JMF
171class YoutubeDL(object):
172 """YoutubeDL class.
173
174 YoutubeDL objects are the ones responsible of downloading the
175 actual video file and writing it to disk if the user has requested
176 it, among some other tasks. In most cases there should be one per
177 program. As, given a video URL, the downloader doesn't know how to
178 extract all the needed information, task that InfoExtractors do, it
179 has to pass the URL to one of them.
180
181 For this, YoutubeDL objects have a method that allows
182 InfoExtractors to be registered in a given order. When it is passed
183 a URL, the YoutubeDL object handles it to the first InfoExtractor it
184 finds that reports being able to handle it. The InfoExtractor extracts
185 all the information about the video or videos the URL refers to, and
186 YoutubeDL process the extracted information, possibly using a File
187 Downloader to download the video.
188
189 YoutubeDL objects accept a lot of parameters. In order not to saturate
190 the object constructor with arguments, it receives a dictionary of
191 options instead. These options are available through the params
192 attribute for the InfoExtractors to use. The YoutubeDL also
193 registers itself as the downloader in charge for the InfoExtractors
194 that are added to it, so this is a "mutual registration".
195
196 Available options:
197
198 username: Username for authentication purposes.
199 password: Password for authentication purposes.
180940e0 200 videopassword: Password for accessing a video.
1da50aa3
S
201 ap_mso: Adobe Pass multiple-system operator identifier.
202 ap_username: Multiple-system operator account username.
203 ap_password: Multiple-system operator account password.
8222d8de
JMF
204 usenetrc: Use netrc for authentication instead.
205 verbose: Print additional info to stdout.
206 quiet: Do not print messages to stdout.
ad8915b7 207 no_warnings: Do not print out anything for warnings.
bb66c247 208 forceprint: A dict with keys WHEN mapped to a list of templates to
209 print to stdout. The allowed keys are video or any of the
210 items in utils.POSTPROCESS_WHEN.
ca30f449 211 For compatibility, a single list is also accepted
bb66c247 212 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
213 a list of tuples with (template, filename)
53c18592 214 forceurl: Force printing final URL. (Deprecated)
215 forcetitle: Force printing title. (Deprecated)
216 forceid: Force printing ID. (Deprecated)
217 forcethumbnail: Force printing thumbnail URL. (Deprecated)
218 forcedescription: Force printing description. (Deprecated)
219 forcefilename: Force printing final filename. (Deprecated)
220 forceduration: Force printing duration. (Deprecated)
8694c600 221 forcejson: Force printing info_dict as JSON.
63e0be34
PH
222 dump_single_json: Force printing the info_dict of the whole playlist
223 (or video) as a single JSON line.
c25228e5 224 force_write_download_archive: Force writing download archive regardless
225 of 'skip_download' or 'simulate'.
b7b04c78 226 simulate: Do not download the video files. If unset (or None),
227 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 228 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 229 You can also pass a function. The function takes 'ctx' as
230 argument and returns the formats to download.
231 See "build_format_selector" for an implementation
63ad4d43 232 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 233 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
234 extracting metadata even if the video is not actually
235 available for download (experimental)
0930b11f 236 format_sort: A list of fields by which to sort the video formats.
237 See "Sorting Formats" for more details.
c25228e5 238 format_sort_force: Force the given format_sort. see "Sorting Formats"
239 for more details.
08d30158 240 prefer_free_formats: Whether to prefer video formats with free containers
241 over non-free ones of same quality.
c25228e5 242 allow_multiple_video_streams: Allow multiple video streams to be merged
243 into a single file
244 allow_multiple_audio_streams: Allow multiple audio streams to be merged
245 into a single file
0ba692ac 246 check_formats Whether to test if the formats are downloadable.
9f1a1c36 247 Can be True (check all), False (check none),
248 'selected' (check selected formats),
0ba692ac 249 or None (check only if requested by extractor)
4524baf0 250 paths: Dictionary of output paths. The allowed keys are 'home'
251 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 252 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 253 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 254 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
255 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
256 restrictfilenames: Do not allow "&" and spaces in file names
257 trim_file_name: Limit length of filename (extension excluded)
4524baf0 258 windowsfilenames: Force the filenames to be windows compatible
b1940459 259 ignoreerrors: Do not stop on download/postprocessing errors.
260 Can be 'only_download' to ignore only download errors.
261 Default is 'only_download' for CLI, but False for API
26e2805c 262 skip_playlist_after_errors: Number of allowed failures until the rest of
263 the playlist is skipped
d22dec74 264 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 265 overwrites: Overwrite all video and metadata files if True,
266 overwrite only non-video files if None
267 and don't overwrite any file if False
34488702 268 For compatibility with youtube-dl,
269 "nooverwrites" may also be used instead
8222d8de
JMF
270 playliststart: Playlist item to start at.
271 playlistend: Playlist item to end at.
c14e88f0 272 playlist_items: Specific indices of playlist to download.
ff815fe6 273 playlistreverse: Download playlist items in reverse order.
75822ca7 274 playlistrandom: Download playlist items in random order.
8222d8de
JMF
275 matchtitle: Download only matching titles.
276 rejecttitle: Reject downloads for matching titles.
8bf9319e 277 logger: Log messages to a logging.Logger instance.
8222d8de 278 logtostderr: Log messages to stderr instead of stdout.
819e0531 279 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
280 writedescription: Write the video description to a .description file
281 writeinfojson: Write the video description to a .info.json file
75d43ca0 282 clean_infojson: Remove private fields from the infojson
34488702 283 getcomments: Extract video comments. This will not be written to disk
06167fbb 284 unless writeinfojson is also given
1fb07d10 285 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 286 writethumbnail: Write the thumbnail image to a file
c25228e5 287 allow_playlist_files: Whether to write playlists' description, infojson etc
288 also to disk when using the 'write*' options
ec82d85a 289 write_all_thumbnails: Write all thumbnail formats to files
732044af 290 writelink: Write an internet shortcut file, depending on the
291 current platform (.url/.webloc/.desktop)
292 writeurllink: Write a Windows internet shortcut file (.url)
293 writewebloclink: Write a macOS internet shortcut file (.webloc)
294 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 295 writesubtitles: Write the video subtitles to a file
741dd8ea 296 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 297 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 298 Downloads all the subtitles of the video
0b7f3118 299 (requires writesubtitles or writeautomaticsub)
8222d8de 300 listsubtitles: Lists all available subtitles for the video
a504ced0 301 subtitlesformat: The format code for subtitles
c32b0aab 302 subtitleslangs: List of languages of the subtitles to download (can be regex).
303 The list may contain "all" to refer to all the available
304 subtitles. The language can be prefixed with a "-" to
305 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
306 keepvideo: Keep the video file after post-processing
307 daterange: A DateRange object, download only if the upload_date is in the range.
308 skip_download: Skip the actual download of the video file
c35f9e72 309 cachedir: Location of the cache files in the filesystem.
a0e07d31 310 False to disable filesystem cache.
47192f92 311 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
312 age_limit: An integer representing the user's age in years.
313 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
314 min_views: An integer representing the minimum view count the video
315 must have in order to not be skipped.
316 Videos without view count information are always
317 downloaded. None for no limit.
318 max_views: An integer representing the maximum view count.
319 Videos that are more popular than that are not
320 downloaded.
321 Videos without view count information are always
322 downloaded. None for no limit.
323 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
324 Videos already present in the file are not downloaded
325 again.
8a51f564 326 break_on_existing: Stop the download process after attempting to download a
327 file that is in the archive.
328 break_on_reject: Stop the download process when encountering a video that
329 has been filtered out.
b222c271 330 break_per_url: Whether break_on_reject and break_on_existing
331 should act on each input URL as opposed to for the entire queue
8a51f564 332 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
333 cookiesfrombrowser: A tuple containing the name of the browser, the profile
334 name/pathfrom where cookies are loaded, and the name of the
335 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 336 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
337 support RFC 5746 secure renegotiation
f59f5ef8 338 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
339 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
340 At the moment, this is only supported by YouTube.
8b7539d2 341 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 342 proxy: URL of the proxy server to use
38cce791 343 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 344 on geo-restricted sites.
e344693b 345 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
346 bidi_workaround: Work around buggy terminals without bidirectional text
347 support, using fridibi
a0ddb8a2 348 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 349 include_ads: Download ads as well (deprecated)
04b4d394
PH
350 default_search: Prepend this string if an input url is not valid.
351 'auto' for elaborate guessing
62fec3b2 352 encoding: Use this encoding instead of the system-specified.
e8ee972c 353 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
354 Pass in 'in_playlist' to only show this behavior for
355 playlist items.
f2ebc5c7 356 wait_for_video: If given, wait for scheduled streams to become available.
357 The value should be a tuple containing the range
358 (min_secs, max_secs) to wait between retries
4f026faf 359 postprocessors: A list of dictionaries, each with an entry
71b640cc 360 * key: The name of the postprocessor. See
7a5c1cfe 361 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 362 * when: When to run the postprocessor. Allowed values are
363 the entries of utils.POSTPROCESS_WHEN
56d868db 364 Assumed to be 'post_process' if not given
b5ae35ee 365 post_hooks: Deprecated - Register a custom postprocessor instead
366 A list of functions that get called as the final step
ab8e5e51
AM
367 for each video file, after all postprocessors have been
368 called. The filename will be passed as the only argument.
71b640cc
PH
369 progress_hooks: A list of functions that get called on download
370 progress, with a dictionary with the entries
5cda4eda 371 * status: One of "downloading", "error", or "finished".
ee69b99a 372 Check this first and ignore unknown values.
3ba7740d 373 * info_dict: The extracted info_dict
71b640cc 374
5cda4eda 375 If status is one of "downloading", or "finished", the
ee69b99a
PH
376 following properties may also be present:
377 * filename: The final filename (always present)
5cda4eda 378 * tmpfilename: The filename we're currently writing to
71b640cc
PH
379 * downloaded_bytes: Bytes on disk
380 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
381 * total_bytes_estimate: Guess of the eventual file size,
382 None if unavailable.
383 * elapsed: The number of seconds since download started.
71b640cc
PH
384 * eta: The estimated time in seconds, None if unknown
385 * speed: The download speed in bytes/second, None if
386 unknown
5cda4eda
PH
387 * fragment_index: The counter of the currently
388 downloaded video fragment.
389 * fragment_count: The number of fragments (= individual
390 files that will be merged)
71b640cc
PH
391
392 Progress hooks are guaranteed to be called at least once
393 (with status "finished") if the download is successful.
819e0531 394 postprocessor_hooks: A list of functions that get called on postprocessing
395 progress, with a dictionary with the entries
396 * status: One of "started", "processing", or "finished".
397 Check this first and ignore unknown values.
398 * postprocessor: Name of the postprocessor
399 * info_dict: The extracted info_dict
400
401 Progress hooks are guaranteed to be called at least twice
402 (with status "started" and "finished") if the processing is successful.
45598f15 403 merge_output_format: Extension to use when merging formats.
6b591b29 404 final_ext: Expected final extension; used to detect when the file was
59a7a13e 405 already downloaded and converted
6271f1ca
PH
406 fixup: Automatically correct known faults of the file.
407 One of:
408 - "never": do nothing
409 - "warn": only emit a warning
410 - "detect_or_warn": check whether we can do anything
62cd676c 411 about it, warn otherwise (default)
504f20dd 412 source_address: Client-side IP address to bind to.
6ec6cb4e 413 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 414 yt-dlp servers for debugging. (BROKEN)
1cf376f5 415 sleep_interval_requests: Number of seconds to sleep between requests
416 during extraction
7aa589a5
S
417 sleep_interval: Number of seconds to sleep before each download when
418 used alone or a lower bound of a range for randomized
419 sleep before each download (minimum possible number
420 of seconds to sleep) when used along with
421 max_sleep_interval.
422 max_sleep_interval:Upper bound of a range for randomized sleep before each
423 download (maximum possible number of seconds to sleep).
424 Must only be used along with sleep_interval.
425 Actual sleep time will be a random float from range
426 [sleep_interval; max_sleep_interval].
1cf376f5 427 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
428 listformats: Print an overview of available video formats and exit.
429 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
430 match_filter: A function that gets called with the info_dict of
431 every video.
432 If it returns a message, the video is ignored.
433 If it returns None, the video is downloaded.
434 match_filter_func in utils.py is one example for this.
7e5db8c9 435 no_color: Do not emit color codes in output.
0a840f58 436 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 437 HTTP header
0a840f58 438 geo_bypass_country:
773f291d
S
439 Two-letter ISO 3166-2 country code that will be used for
440 explicit geographic restriction bypassing via faking
504f20dd 441 X-Forwarded-For HTTP header
5f95927a
S
442 geo_bypass_ip_block:
443 IP range in CIDR notation that will be used similarly to
504f20dd 444 geo_bypass_country
71b640cc 445
85729c51 446 The following options determine which downloader is picked:
52a8a1e1 447 external_downloader: A dictionary of protocol keys and the executable of the
448 external downloader to use for it. The allowed protocols
449 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
450 Set the value to 'native' to use the native downloader
451 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
452 or {'m3u8': 'ffmpeg'} instead.
453 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
454 if True, otherwise use ffmpeg/avconv if False, otherwise
455 use downloader suggested by extractor if None.
53ed7066 456 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 457 The following options do not work when used through the API:
b5ae35ee 458 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 459 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 460 Refer __init__.py for their implementation
819e0531 461 progress_template: Dictionary of templates for progress outputs.
462 Allowed keys are 'download', 'postprocess',
463 'download-title' (console title) and 'postprocess-title'.
464 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 465
8222d8de 466 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 467 the downloader (see yt_dlp/downloader/common.py):
51d9739f 468 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
469 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
470 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 471 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
472
473 The following options are used by the post processors:
d4a24f40 474 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 475 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
476 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
477 to the binary or its containing directory.
43820c03 478 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 479 and a list of additional command-line arguments for the
480 postprocessor/executable. The dict can also have "PP+EXE" keys
481 which are used when the given exe is used by the given PP.
482 Use 'default' as the name for arguments to passed to all PP
483 For compatibility with youtube-dl, a single list of args
484 can also be used
e409895f 485
486 The following options are used by the extractors:
62bff2c1 487 extractor_retries: Number of times to retry for known errors
488 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 489 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 490 discontinuities such as ad breaks (default: False)
5d3a0e79 491 extractor_args: A dictionary of arguments to be passed to the extractors.
492 See "EXTRACTOR ARGUMENTS" for details.
493 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 494 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 495 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
496 If True (default), DASH manifests and related
62bff2c1 497 data will be downloaded and processed by extractor.
498 You can reduce network I/O by disabling it if you don't
499 care about DASH. (only for youtube)
5d3a0e79 500 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
501 If True (default), HLS manifests and related
62bff2c1 502 data will be downloaded and processed by extractor.
503 You can reduce network I/O by disabling it if you don't
504 care about HLS. (only for youtube)
8222d8de
JMF
505 """
506
c9969434
S
507 _NUMERIC_FIELDS = set((
508 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 509 'timestamp', 'release_timestamp',
c9969434
S
510 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
511 'average_rating', 'comment_count', 'age_limit',
512 'start_time', 'end_time',
513 'chapter_number', 'season_number', 'episode_number',
514 'track_number', 'disc_number', 'release_year',
c9969434
S
515 ))
516
6db9c4d5 517 _format_fields = {
518 # NB: Keep in sync with the docstring of extractor/common.py
519 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
520 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
521 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
522 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
523 'preference', 'language', 'language_preference', 'quality', 'source_preference',
524 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
525 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
526 }
48ee10ee 527 _format_selection_exts = {
528 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
529 'video': {'mp4', 'flv', 'webm', '3gp'},
530 'storyboards': {'mhtml'},
531 }
532
3511266b 533 def __init__(self, params=None, auto_init=True):
883d4b1e 534 """Create a FileDownloader object with the given options.
535 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 536 Set to 'no_verbose_header' to not print the header
883d4b1e 537 """
e9f9a10f
JMF
538 if params is None:
539 params = {}
592b7485 540 self.params = params
8b7491c8 541 self._ies = {}
56c73665 542 self._ies_instances = {}
1e43a6f7 543 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 544 self._printed_messages = set()
1cf376f5 545 self._first_webpage_request = True
ab8e5e51 546 self._post_hooks = []
933605d7 547 self._progress_hooks = []
819e0531 548 self._postprocessor_hooks = []
8222d8de
JMF
549 self._download_retcode = 0
550 self._num_downloads = 0
9c906919 551 self._num_videos = 0
592b7485 552 self._playlist_level = 0
553 self._playlist_urls = set()
a0e07d31 554 self.cache = Cache(self)
34308b30 555
819e0531 556 windows_enable_vt_mode()
cf4f42cb 557 self._out_files = {
558 'error': sys.stderr,
559 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
560 'console': None if compat_os_name == 'nt' else next(
561 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
562 }
563 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 564 self._allow_colors = {
cf4f42cb 565 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
566 for type_ in ('screen', 'error')
ec11a9f4 567 }
819e0531 568
a61f4b28 569 if sys.version_info < (3, 6):
570 self.report_warning(
0181adef 571 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 572
88acdbc2 573 if self.params.get('allow_unplayable_formats'):
574 self.report_warning(
ec11a9f4 575 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 576 'This is a developer option intended for debugging. \n'
577 ' If you experience any issues while using this option, '
ec11a9f4 578 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 579
be5df5ee
S
580 def check_deprecated(param, option, suggestion):
581 if self.params.get(param) is not None:
53ed7066 582 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
583 return True
584 return False
585
586 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
587 if self.params.get('geo_verification_proxy') is None:
588 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
589
0d1bb027 590 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
591 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 592 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 593
49a57e70 594 for msg in self.params.get('_warnings', []):
0d1bb027 595 self.report_warning(msg)
ee8dd27a 596 for msg in self.params.get('_deprecation_warnings', []):
597 self.deprecation_warning(msg)
0d1bb027 598
ec11a9f4 599 if 'list-formats' in self.params.get('compat_opts', []):
600 self.params['listformats_table'] = False
601
b5ae35ee 602 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 603 # nooverwrites was unnecessarily changed to overwrites
604 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
605 # This ensures compatibility with both keys
606 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 607 elif self.params.get('overwrites') is None:
608 self.params.pop('overwrites', None)
b868936c 609 else:
610 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 611
455a15e2 612 self.params.setdefault('forceprint', {})
613 self.params.setdefault('print_to_file', {})
bb66c247 614
615 # Compatibility with older syntax
ca30f449 616 if not isinstance(params['forceprint'], dict):
455a15e2 617 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 618
455a15e2 619 if self.params.get('bidi_workaround', False):
1c088fa8
PH
620 try:
621 import pty
622 master, slave = pty.openpty()
003c69a8 623 width = compat_get_terminal_size().columns
1c088fa8
PH
624 if width is None:
625 width_args = []
626 else:
627 width_args = ['-w', str(width)]
5d681e96 628 sp_kwargs = dict(
1c088fa8
PH
629 stdin=subprocess.PIPE,
630 stdout=slave,
cf4f42cb 631 stderr=self._out_files['error'])
5d681e96 632 try:
d3c93ec2 633 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 634 except OSError:
d3c93ec2 635 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 636 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 637 except OSError as ose:
66e7ace1 638 if ose.errno == errno.ENOENT:
49a57e70 639 self.report_warning(
640 'Could not find fribidi executable, ignoring --bidi-workaround. '
641 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
642 else:
643 raise
0783b09b 644
3089bc74
S
645 if (sys.platform != 'win32'
646 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 647 and not self.params.get('restrictfilenames', False)):
e9137224 648 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 649 self.report_warning(
6febd1c1 650 'Assuming --restrict-filenames since file system encoding '
1b725173 651 'cannot encode all characters. '
6febd1c1 652 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 653 self.params['restrictfilenames'] = True
34308b30 654
de6000d9 655 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 656
187986a8 657 # Creating format selector here allows us to catch syntax errors before the extraction
658 self.format_selector = (
fa9f30b8 659 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 660 else self.params['format'] if callable(self.params['format'])
187986a8 661 else self.build_format_selector(self.params['format']))
662
8b7539d2 663 # Set http_headers defaults according to std_headers
664 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
665
dca08720
PH
666 self._setup_opener()
667
3511266b 668 if auto_init:
883d4b1e 669 if auto_init != 'no_verbose_header':
670 self.print_debug_header()
3511266b
PH
671 self.add_default_info_extractors()
672
013b50b7 673 hooks = {
674 'post_hooks': self.add_post_hook,
675 'progress_hooks': self.add_progress_hook,
676 'postprocessor_hooks': self.add_postprocessor_hook,
677 }
678 for opt, fn in hooks.items():
679 for ph in self.params.get(opt, []):
680 fn(ph)
71b640cc 681
5bfc8bee 682 for pp_def_raw in self.params.get('postprocessors', []):
683 pp_def = dict(pp_def_raw)
684 when = pp_def.pop('when', 'post_process')
685 self.add_post_processor(
686 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
687 when=when)
688
51fb4995
YCH
689 register_socks_protocols()
690
ed39cac5 691 def preload_download_archive(fn):
692 """Preload the archive, if any is specified"""
693 if fn is None:
694 return False
49a57e70 695 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 696 try:
697 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
698 for line in archive_file:
699 self.archive.add(line.strip())
700 except IOError as ioe:
701 if ioe.errno != errno.ENOENT:
702 raise
703 return False
704 return True
705
706 self.archive = set()
707 preload_download_archive(self.params.get('download_archive'))
708
7d4111ed
PH
709 def warn_if_short_id(self, argv):
710 # short YouTube ID starting with dash?
711 idxs = [
712 i for i, a in enumerate(argv)
713 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
714 if idxs:
715 correct_argv = (
7a5c1cfe 716 ['yt-dlp']
3089bc74
S
717 + [a for i, a in enumerate(argv) if i not in idxs]
718 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
719 )
720 self.report_warning(
721 'Long argument string detected. '
49a57e70 722 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
723 args_to_str(correct_argv))
724
8222d8de
JMF
725 def add_info_extractor(self, ie):
726 """Add an InfoExtractor object to the end of the list."""
8b7491c8 727 ie_key = ie.ie_key()
728 self._ies[ie_key] = ie
e52d7f85 729 if not isinstance(ie, type):
8b7491c8 730 self._ies_instances[ie_key] = ie
e52d7f85 731 ie.set_downloader(self)
8222d8de 732
8b7491c8 733 def _get_info_extractor_class(self, ie_key):
734 ie = self._ies.get(ie_key)
735 if ie is None:
736 ie = get_info_extractor(ie_key)
737 self.add_info_extractor(ie)
738 return ie
739
56c73665
JMF
740 def get_info_extractor(self, ie_key):
741 """
742 Get an instance of an IE with name ie_key, it will try to get one from
743 the _ies list, if there's no instance it will create a new one and add
744 it to the extractor list.
745 """
746 ie = self._ies_instances.get(ie_key)
747 if ie is None:
748 ie = get_info_extractor(ie_key)()
749 self.add_info_extractor(ie)
750 return ie
751
023fa8c4
JMF
752 def add_default_info_extractors(self):
753 """
754 Add the InfoExtractors returned by gen_extractors to the end of the list
755 """
e52d7f85 756 for ie in gen_extractor_classes():
023fa8c4
JMF
757 self.add_info_extractor(ie)
758
56d868db 759 def add_post_processor(self, pp, when='post_process'):
8222d8de 760 """Add a PostProcessor object to the end of the chain."""
5bfa4862 761 self._pps[when].append(pp)
8222d8de
JMF
762 pp.set_downloader(self)
763
ab8e5e51
AM
764 def add_post_hook(self, ph):
765 """Add the post hook"""
766 self._post_hooks.append(ph)
767
933605d7 768 def add_progress_hook(self, ph):
819e0531 769 """Add the download progress hook"""
933605d7 770 self._progress_hooks.append(ph)
8ab470f1 771
819e0531 772 def add_postprocessor_hook(self, ph):
773 """Add the postprocessing progress hook"""
774 self._postprocessor_hooks.append(ph)
5bfc8bee 775 for pps in self._pps.values():
776 for pp in pps:
777 pp.add_progress_hook(ph)
819e0531 778
1c088fa8 779 def _bidi_workaround(self, message):
5d681e96 780 if not hasattr(self, '_output_channel'):
1c088fa8
PH
781 return message
782
5d681e96 783 assert hasattr(self, '_output_process')
11b85ce6 784 assert isinstance(message, compat_str)
6febd1c1
PH
785 line_count = message.count('\n') + 1
786 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 787 self._output_process.stdin.flush()
6febd1c1 788 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 789 for _ in range(line_count))
6febd1c1 790 return res[:-len('\n')]
1c088fa8 791
b35496d8 792 def _write_string(self, message, out=None, only_once=False):
793 if only_once:
794 if message in self._printed_messages:
795 return
796 self._printed_messages.add(message)
797 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 798
cf4f42cb 799 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 800 """Print message to stdout"""
cf4f42cb 801 if quiet is not None:
ae6a1b95 802 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 803 self._write_string(
804 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
805 self._out_files['print'])
806
807 def to_screen(self, message, skip_eol=False, quiet=None):
808 """Print message to screen if not in quiet mode"""
8bf9319e 809 if self.params.get('logger'):
43afe285 810 self.params['logger'].debug(message)
cf4f42cb 811 return
812 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
813 return
814 self._write_string(
815 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
816 self._out_files['screen'])
8222d8de 817
b35496d8 818 def to_stderr(self, message, only_once=False):
0760b0a7 819 """Print message to stderr"""
11b85ce6 820 assert isinstance(message, compat_str)
8bf9319e 821 if self.params.get('logger'):
43afe285
IB
822 self.params['logger'].error(message)
823 else:
cf4f42cb 824 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
825
826 def _send_console_code(self, code):
827 if compat_os_name == 'nt' or not self._out_files['console']:
828 return
829 self._write_string(code, self._out_files['console'])
8222d8de 830
1e5b9a95
PH
831 def to_console_title(self, message):
832 if not self.params.get('consoletitle', False):
833 return
3efb96a6 834 message = remove_terminal_sequences(message)
4bede0d8
C
835 if compat_os_name == 'nt':
836 if ctypes.windll.kernel32.GetConsoleWindow():
837 # c_wchar_p() might not be necessary if `message` is
838 # already of type unicode()
839 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 840 else:
841 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 842
bdde425c 843 def save_console_title(self):
cf4f42cb 844 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 845 return
592b7485 846 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
847
848 def restore_console_title(self):
cf4f42cb 849 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 850 return
592b7485 851 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
852
853 def __enter__(self):
854 self.save_console_title()
855 return self
856
857 def __exit__(self, *args):
858 self.restore_console_title()
f89197d7 859
dca08720 860 if self.params.get('cookiefile') is not None:
1bab3437 861 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 862
fa9f30b8 863 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
864 """Determine action to take when a download problem appears.
865
866 Depending on if the downloader has been configured to ignore
867 download errors or not, this method may throw an exception or
868 not when errors are found, after printing the message.
869
fa9f30b8 870 @param tb If given, is additional traceback information
871 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
872 """
873 if message is not None:
874 self.to_stderr(message)
875 if self.params.get('verbose'):
876 if tb is None:
877 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 878 tb = ''
8222d8de 879 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 880 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 881 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
882 else:
883 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 884 tb = ''.join(tb_data)
c19bc311 885 if tb:
886 self.to_stderr(tb)
fa9f30b8 887 if not is_error:
888 return
b1940459 889 if not self.params.get('ignoreerrors'):
8222d8de
JMF
890 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
891 exc_info = sys.exc_info()[1].exc_info
892 else:
893 exc_info = sys.exc_info()
894 raise DownloadError(message, exc_info)
895 self._download_retcode = 1
896
ec11a9f4 897 class Styles(Enum):
898 HEADERS = 'yellow'
f304da8a 899 EMPHASIS = 'light blue'
ec11a9f4 900 ID = 'green'
901 DELIM = 'blue'
902 ERROR = 'red'
903 WARNING = 'yellow'
ff51ed58 904 SUPPRESS = 'light black'
ec11a9f4 905
7578d77d 906 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 907 if test_encoding:
908 original_text = text
5c104538 909 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
910 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 911 text = text.encode(encoding, 'ignore').decode(encoding)
912 if fallback is not None and text != original_text:
913 text = fallback
914 if isinstance(f, self.Styles):
f304da8a 915 f = f.value
7578d77d 916 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 917
918 def _format_screen(self, *args, **kwargs):
7578d77d 919 return self._format_text(
cf4f42cb 920 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 921
922 def _format_err(self, *args, **kwargs):
7578d77d 923 return self._format_text(
cf4f42cb 924 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 925
c84aeac6 926 def report_warning(self, message, only_once=False):
8222d8de
JMF
927 '''
928 Print the message to stderr, it will be prefixed with 'WARNING:'
929 If stderr is a tty file the 'WARNING:' will be colored
930 '''
6d07ce01
JMF
931 if self.params.get('logger') is not None:
932 self.params['logger'].warning(message)
8222d8de 933 else:
ad8915b7
PH
934 if self.params.get('no_warnings'):
935 return
ec11a9f4 936 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 937
ee8dd27a 938 def deprecation_warning(self, message):
939 if self.params.get('logger') is not None:
940 self.params['logger'].warning('DeprecationWarning: {message}')
941 else:
942 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
943
fa9f30b8 944 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
945 '''
946 Do the same as trouble, but prefixes the message with 'ERROR:', colored
947 in red if stderr is a tty file.
948 '''
fa9f30b8 949 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 950
b35496d8 951 def write_debug(self, message, only_once=False):
0760b0a7 952 '''Log debug message or Print message to stderr'''
953 if not self.params.get('verbose', False):
954 return
955 message = '[debug] %s' % message
956 if self.params.get('logger'):
957 self.params['logger'].debug(message)
958 else:
b35496d8 959 self.to_stderr(message, only_once)
0760b0a7 960
8222d8de
JMF
961 def report_file_already_downloaded(self, file_name):
962 """Report file has already been fully downloaded."""
963 try:
6febd1c1 964 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 965 except UnicodeEncodeError:
6febd1c1 966 self.to_screen('[download] The file has already been downloaded')
8222d8de 967
0c3d0f51 968 def report_file_delete(self, file_name):
969 """Report that existing file will be deleted."""
970 try:
c25228e5 971 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 972 except UnicodeEncodeError:
c25228e5 973 self.to_screen('Deleting existing file')
0c3d0f51 974
319b6059 975 def raise_no_formats(self, info, forced=False, *, msg=None):
1151c407 976 has_drm = info.get('__has_drm')
319b6059 977 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
978 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
979 if forced or not ignored:
1151c407 980 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 981 expected=has_drm or ignored or expected)
88acdbc2 982 else:
983 self.report_warning(msg)
984
de6000d9 985 def parse_outtmpl(self):
986 outtmpl_dict = self.params.get('outtmpl', {})
987 if not isinstance(outtmpl_dict, dict):
988 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 989 # Remove spaces in the default template
990 if self.params.get('restrictfilenames'):
991 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
992 else:
993 sanitize = lambda x: x
de6000d9 994 outtmpl_dict.update({
71ce444a 995 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 996 if outtmpl_dict.get(k) is None})
de6000d9 997 for key, val in outtmpl_dict.items():
998 if isinstance(val, bytes):
999 self.report_warning(
1000 'Parameter outtmpl is bytes, but should be a unicode string. '
1001 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
1002 return outtmpl_dict
1003
21cd8fae 1004 def get_output_path(self, dir_type='', filename=None):
1005 paths = self.params.get('paths', {})
1006 assert isinstance(paths, dict)
1007 path = os.path.join(
1008 expand_path(paths.get('home', '').strip()),
1009 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1010 filename or '')
1011
1012 # Temporary fix for #4787
1013 # 'Treat' all problem characters by passing filename through preferredencoding
1014 # to workaround encoding issues with subprocess on python2 @ Windows
1015 if sys.version_info < (3, 0) and sys.platform == 'win32':
1016 path = encodeFilename(path, True).decode(preferredencoding())
1017 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1018
76a264ac 1019 @staticmethod
901130bb 1020 def _outtmpl_expandpath(outtmpl):
1021 # expand_path translates '%%' into '%' and '$$' into '$'
1022 # correspondingly that is not what we want since we need to keep
1023 # '%%' intact for template dict substitution step. Working around
1024 # with boundary-alike separator hack.
1025 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1026 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1027
1028 # outtmpl should be expand_path'ed before template dict substitution
1029 # because meta fields may contain env variables we don't want to
1030 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1031 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1032 return expand_path(outtmpl).replace(sep, '')
1033
1034 @staticmethod
1035 def escape_outtmpl(outtmpl):
1036 ''' Escape any remaining strings like %s, %abc% etc. '''
1037 return re.sub(
1038 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1039 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1040 outtmpl)
1041
1042 @classmethod
1043 def validate_outtmpl(cls, outtmpl):
76a264ac 1044 ''' @return None or Exception object '''
7d1eb38a 1045 outtmpl = re.sub(
37893bb0 1046 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1047 lambda mobj: f'{mobj.group(0)[:-1]}s',
1048 cls._outtmpl_expandpath(outtmpl))
76a264ac 1049 try:
7d1eb38a 1050 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1051 return None
1052 except ValueError as err:
1053 return err
1054
03b4de72 1055 @staticmethod
1056 def _copy_infodict(info_dict):
1057 info_dict = dict(info_dict)
09b49e1f 1058 info_dict.pop('__postprocessors', None)
03b4de72 1059 return info_dict
1060
e0fd9573 1061 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1062 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1063 @param sanitize Whether to sanitize the output as a filename.
1064 For backward compatibility, a function can also be passed
1065 """
1066
6e84b215 1067 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1068
03b4de72 1069 info_dict = self._copy_infodict(info_dict)
752cda38 1070 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1071 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1072 if info_dict.get('duration', None) is not None
1073 else None)
752cda38 1074 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
9c906919 1075 info_dict['video_autonumber'] = self._num_videos
752cda38 1076 if info_dict.get('resolution') is None:
1077 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1078
e6f21b3d 1079 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1080 # of %(field)s to %(field)0Nd for backward compatibility
1081 field_size_compat_map = {
ec11a9f4 1082 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1083 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1084 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1085 }
752cda38 1086
385a27fa 1087 TMPL_DICT = {}
37893bb0 1088 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1089 MATH_FUNCTIONS = {
1090 '+': float.__add__,
1091 '-': float.__sub__,
1092 }
e625be0d 1093 # Field is of the form key1.key2...
1094 # where keys (except first) can be string, int or slice
2b8a2973 1095 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1096 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1097 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1098 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1099 (?P<negate>-)?
385a27fa 1100 (?P<fields>{field})
1101 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1102 (?:>(?P<strf_format>.+?))?
34baa9fd 1103 (?P<remaining>
1104 (?P<alternate>(?<!\\),[^|&)]+)?
1105 (?:&(?P<replacement>.*?))?
1106 (?:\|(?P<default>.*?))?
1107 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1108
2b8a2973 1109 def _traverse_infodict(k):
1110 k = k.split('.')
1111 if k[0] == '':
1112 k.pop(0)
1113 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1114
752cda38 1115 def get_value(mdict):
1116 # Object traversal
2b8a2973 1117 value = _traverse_infodict(mdict['fields'])
752cda38 1118 # Negative
1119 if mdict['negate']:
1120 value = float_or_none(value)
1121 if value is not None:
1122 value *= -1
1123 # Do maths
385a27fa 1124 offset_key = mdict['maths']
1125 if offset_key:
752cda38 1126 value = float_or_none(value)
1127 operator = None
385a27fa 1128 while offset_key:
1129 item = re.match(
1130 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1131 offset_key).group(0)
1132 offset_key = offset_key[len(item):]
1133 if operator is None:
752cda38 1134 operator = MATH_FUNCTIONS[item]
385a27fa 1135 continue
1136 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1137 offset = float_or_none(item)
1138 if offset is None:
2b8a2973 1139 offset = float_or_none(_traverse_infodict(item))
385a27fa 1140 try:
1141 value = operator(value, multiplier * offset)
1142 except (TypeError, ZeroDivisionError):
1143 return None
1144 operator = None
752cda38 1145 # Datetime formatting
1146 if mdict['strf_format']:
7c37ff97 1147 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1148
1149 return value
1150
b868936c 1151 na = self.params.get('outtmpl_na_placeholder', 'NA')
1152
e0fd9573 1153 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1154 return sanitize_filename(str(value), restricted=restricted, is_id=(
1155 bool(re.search(r'(^|[_.])id(\.|$)', key))
1156 if 'filename-sanitization' in self.params.get('compat_opts', [])
1157 else NO_DEFAULT))
e0fd9573 1158
1159 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1160 sanitize = bool(sanitize)
1161
6e84b215 1162 def _dumpjson_default(obj):
1163 if isinstance(obj, (set, LazyList)):
1164 return list(obj)
adbc4ec4 1165 return repr(obj)
6e84b215 1166
752cda38 1167 def create_key(outer_mobj):
1168 if not outer_mobj.group('has_key'):
b836dc94 1169 return outer_mobj.group(0)
752cda38 1170 key = outer_mobj.group('key')
752cda38 1171 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1172 initial_field = mobj.group('fields') if mobj else ''
e978789f 1173 value, replacement, default = None, None, na
7c37ff97 1174 while mobj:
e625be0d 1175 mobj = mobj.groupdict()
7c37ff97 1176 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1177 value = get_value(mobj)
e978789f 1178 replacement = mobj['replacement']
7c37ff97 1179 if value is None and mobj['alternate']:
34baa9fd 1180 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1181 else:
1182 break
752cda38 1183
b868936c 1184 fmt = outer_mobj.group('format')
752cda38 1185 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1186 fmt = '0{:d}d'.format(field_size_compat_map[key])
1187
e978789f 1188 value = default if value is None else value if replacement is None else replacement
752cda38 1189
4476d2c7 1190 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1191 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1192 if fmt[-1] == 'l': # list
4476d2c7 1193 delim = '\n' if '#' in flags else ', '
9e907ebd 1194 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1195 elif fmt[-1] == 'j': # json
4476d2c7 1196 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1197 elif fmt[-1] == 'q': # quoted
4476d2c7 1198 value = map(str, variadic(value) if '#' in flags else [value])
1199 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1200 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1201 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1202 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1203 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1204 value, fmt = unicodedata.normalize(
1205 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1206 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1207 value), str_fmt
e0fd9573 1208 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1209 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1210 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1211 factor=1024 if '#' in flags else 1000)
37893bb0 1212 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1213 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1214 elif fmt[-1] == 'c':
524e2e4f 1215 if value:
1216 value = str(value)[0]
76a264ac 1217 else:
524e2e4f 1218 fmt = str_fmt
76a264ac 1219 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1220 value = float_or_none(value)
752cda38 1221 if value is None:
1222 value, fmt = default, 's'
901130bb 1223
752cda38 1224 if sanitize:
1225 if fmt[-1] == 'r':
1226 # If value is an object, sanitize might convert it to a string
1227 # So we convert it to repr first
7d1eb38a 1228 value, fmt = repr(value), str_fmt
639f1cea 1229 if fmt[-1] in 'csr':
e0fd9573 1230 value = sanitizer(initial_field, value)
901130bb 1231
b868936c 1232 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1233 TMPL_DICT[key] = value
b868936c 1234 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1235
385a27fa 1236 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1237
819e0531 1238 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1239 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1240 return self.escape_outtmpl(outtmpl) % info_dict
1241
de6000d9 1242 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1243 try:
b836dc94 1244 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
e0fd9573 1245 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1246 if not filename:
1247 return None
15da37c7 1248
6a0546e3 1249 if tmpl_type in ('default', 'temp'):
1250 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1251 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1252 filename = replace_extension(filename, ext, final_ext)
1253 else:
1254 force_ext = OUTTMPL_TYPES[tmpl_type]
1255 if force_ext:
1256 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1257
bdc3fd2f
U
1258 # https://github.com/blackjack4494/youtube-dlc/issues/85
1259 trim_file_name = self.params.get('trim_file_name', False)
1260 if trim_file_name:
5c22c63d 1261 no_ext, *ext = filename.rsplit('.', 2)
1262 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1263
0202b52a 1264 return filename
8222d8de 1265 except ValueError as err:
6febd1c1 1266 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1267 return None
1268
de6000d9 1269 def prepare_filename(self, info_dict, dir_type='', warn=False):
1270 """Generate the output filename."""
21cd8fae 1271
de6000d9 1272 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1273 if not filename and dir_type not in ('', 'temp'):
1274 return ''
de6000d9 1275
c84aeac6 1276 if warn:
21cd8fae 1277 if not self.params.get('paths'):
de6000d9 1278 pass
1279 elif filename == '-':
c84aeac6 1280 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1281 elif os.path.isabs(filename):
c84aeac6 1282 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1283 if filename == '-' or not filename:
1284 return filename
1285
21cd8fae 1286 return self.get_output_path(dir_type, filename)
0202b52a 1287
120fe513 1288 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1289 """ Returns None if the file should be downloaded """
8222d8de 1290
c77495e3 1291 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1292
8b0d7497 1293 def check_filter():
8b0d7497 1294 if 'title' in info_dict:
1295 # This can happen when we're just evaluating the playlist
1296 title = info_dict['title']
1297 matchtitle = self.params.get('matchtitle', False)
1298 if matchtitle:
1299 if not re.search(matchtitle, title, re.IGNORECASE):
1300 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1301 rejecttitle = self.params.get('rejecttitle', False)
1302 if rejecttitle:
1303 if re.search(rejecttitle, title, re.IGNORECASE):
1304 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1305 date = info_dict.get('upload_date')
1306 if date is not None:
1307 dateRange = self.params.get('daterange', DateRange())
1308 if date not in dateRange:
1309 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1310 view_count = info_dict.get('view_count')
1311 if view_count is not None:
1312 min_views = self.params.get('min_views')
1313 if min_views is not None and view_count < min_views:
1314 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1315 max_views = self.params.get('max_views')
1316 if max_views is not None and view_count > max_views:
1317 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1318 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1319 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1320
8f18aca8 1321 match_filter = self.params.get('match_filter')
1322 if match_filter is not None:
1323 try:
1324 ret = match_filter(info_dict, incomplete=incomplete)
1325 except TypeError:
1326 # For backward compatibility
1327 ret = None if incomplete else match_filter(info_dict)
1328 if ret is not None:
1329 return ret
8b0d7497 1330 return None
1331
c77495e3 1332 if self.in_download_archive(info_dict):
1333 reason = '%s has already been recorded in the archive' % video_title
1334 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1335 else:
1336 reason = check_filter()
1337 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1338 if reason is not None:
120fe513 1339 if not silent:
1340 self.to_screen('[download] ' + reason)
c77495e3 1341 if self.params.get(break_opt, False):
1342 raise break_err()
8b0d7497 1343 return reason
fe7e0c98 1344
b6c45014
JMF
1345 @staticmethod
1346 def add_extra_info(info_dict, extra_info):
1347 '''Set the keys from extra_info in info dict if they are missing'''
1348 for key, value in extra_info.items():
1349 info_dict.setdefault(key, value)
1350
409e1828 1351 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1352 process=True, force_generic_extractor=False):
41d1cca3 1353 """
1354 Return a list with a dictionary for each video extracted.
1355
1356 Arguments:
1357 url -- URL to extract
1358
1359 Keyword arguments:
1360 download -- whether to download videos during extraction
1361 ie_key -- extractor key hint
1362 extra_info -- dictionary containing the extra values to add to each result
1363 process -- whether to resolve all unresolved references (URLs, playlist items),
1364 must be True for download to work.
1365 force_generic_extractor -- force using the generic extractor
1366 """
fe7e0c98 1367
409e1828 1368 if extra_info is None:
1369 extra_info = {}
1370
61aa5ba3 1371 if not ie_key and force_generic_extractor:
d22dec74
S
1372 ie_key = 'Generic'
1373
8222d8de 1374 if ie_key:
8b7491c8 1375 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1376 else:
1377 ies = self._ies
1378
8b7491c8 1379 for ie_key, ie in ies.items():
8222d8de
JMF
1380 if not ie.suitable(url):
1381 continue
1382
1383 if not ie.working():
6febd1c1
PH
1384 self.report_warning('The program functionality for this site has been marked as broken, '
1385 'and will probably not work.')
8222d8de 1386
1151c407 1387 temp_id = ie.get_temp_id(url)
a0566bbf 1388 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1389 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1390 if self.params.get('break_on_existing', False):
1391 raise ExistingVideoReached()
a0566bbf 1392 break
8b7491c8 1393 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1394 else:
1395 self.report_error('no suitable InfoExtractor for URL %s' % url)
1396
8e5fecc8 1397 def __handle_extraction_exceptions(func):
b5ae35ee 1398 @functools.wraps(func)
a0566bbf 1399 def wrapper(self, *args, **kwargs):
6da22e7d 1400 while True:
1401 try:
1402 return func(self, *args, **kwargs)
1403 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1404 raise
6da22e7d 1405 except ReExtractInfo as e:
1406 if e.expected:
1407 self.to_screen(f'{e}; Re-extracting data')
1408 else:
1409 self.to_stderr('\r')
1410 self.report_warning(f'{e}; Re-extracting data')
1411 continue
1412 except GeoRestrictedError as e:
1413 msg = e.msg
1414 if e.countries:
1415 msg += '\nThis video is available in %s.' % ', '.join(
1416 map(ISO3166Utils.short2full, e.countries))
1417 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1418 self.report_error(msg)
1419 except ExtractorError as e: # An error we somewhat expected
1420 self.report_error(str(e), e.format_traceback())
1421 except Exception as e:
1422 if self.params.get('ignoreerrors'):
1423 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1424 else:
1425 raise
1426 break
a0566bbf 1427 return wrapper
1428
f2ebc5c7 1429 def _wait_for_video(self, ie_result):
1430 if (not self.params.get('wait_for_video')
1431 or ie_result.get('_type', 'video') != 'video'
1432 or ie_result.get('formats') or ie_result.get('url')):
1433 return
1434
1435 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1436 last_msg = ''
1437
1438 def progress(msg):
1439 nonlocal last_msg
1440 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1441 last_msg = msg
1442
1443 min_wait, max_wait = self.params.get('wait_for_video')
1444 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1445 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1446 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1447 self.report_warning('Release time of video is not known')
1448 elif (diff or 0) <= 0:
1449 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1450 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1451 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1452
1453 wait_till = time.time() + diff
1454 try:
1455 while True:
1456 diff = wait_till - time.time()
1457 if diff <= 0:
1458 progress('')
1459 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1460 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1461 time.sleep(1)
1462 except KeyboardInterrupt:
1463 progress('')
1464 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1465 except BaseException as e:
1466 if not isinstance(e, ReExtractInfo):
1467 self.to_screen('')
1468 raise
1469
a0566bbf 1470 @__handle_extraction_exceptions
58f197b7 1471 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1472 ie_result = ie.extract(url)
1473 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1474 return
1475 if isinstance(ie_result, list):
1476 # Backwards compatibility: old IE result format
1477 ie_result = {
1478 '_type': 'compat_list',
1479 'entries': ie_result,
1480 }
e37d0efb 1481 if extra_info.get('original_url'):
1482 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1483 self.add_default_extra_info(ie_result, ie, url)
1484 if process:
f2ebc5c7 1485 self._wait_for_video(ie_result)
a0566bbf 1486 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1487 else:
a0566bbf 1488 return ie_result
fe7e0c98 1489
ea38e55f 1490 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1491 if url is not None:
1492 self.add_extra_info(ie_result, {
1493 'webpage_url': url,
1494 'original_url': url,
57ebfca3 1495 })
1496 webpage_url = ie_result.get('webpage_url')
1497 if webpage_url:
1498 self.add_extra_info(ie_result, {
1499 'webpage_url_basename': url_basename(webpage_url),
1500 'webpage_url_domain': get_domain(webpage_url),
6033d980 1501 })
1502 if ie is not None:
1503 self.add_extra_info(ie_result, {
1504 'extractor': ie.IE_NAME,
1505 'extractor_key': ie.ie_key(),
1506 })
ea38e55f 1507
58adec46 1508 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1509 """
1510 Take the result of the ie(may be modified) and resolve all unresolved
1511 references (URLs, playlist items).
1512
1513 It will also download the videos if 'download'.
1514 Returns the resolved ie_result.
1515 """
58adec46 1516 if extra_info is None:
1517 extra_info = {}
e8ee972c
PH
1518 result_type = ie_result.get('_type', 'video')
1519
057a5206 1520 if result_type in ('url', 'url_transparent'):
134c6ea8 1521 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1522 if ie_result.get('original_url'):
1523 extra_info.setdefault('original_url', ie_result['original_url'])
1524
057a5206 1525 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1526 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1527 or extract_flat is True):
ecb54191 1528 info_copy = ie_result.copy()
6033d980 1529 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1530 if ie and not ie_result.get('id'):
4614bc22 1531 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1532 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1533 self.add_extra_info(info_copy, extra_info)
b5475f11 1534 info_copy, _ = self.pre_process(info_copy)
ecb54191 1535 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1536 if self.params.get('force_write_download_archive', False):
1537 self.record_download_archive(info_copy)
e8ee972c
PH
1538 return ie_result
1539
8222d8de 1540 if result_type == 'video':
b6c45014 1541 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1542 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1543 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1544 if additional_urls:
e9f4ccd1 1545 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1546 if isinstance(additional_urls, compat_str):
1547 additional_urls = [additional_urls]
1548 self.to_screen(
1549 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1550 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1551 ie_result['additional_entries'] = [
1552 self.extract_info(
b69fd25c 1553 url, download, extra_info=extra_info,
9c2b75b5 1554 force_generic_extractor=self.params.get('force_generic_extractor'))
1555 for url in additional_urls
1556 ]
1557 return ie_result
8222d8de
JMF
1558 elif result_type == 'url':
1559 # We have to add extra_info to the results because it may be
1560 # contained in a playlist
07cce701 1561 return self.extract_info(
1562 ie_result['url'], download,
1563 ie_key=ie_result.get('ie_key'),
1564 extra_info=extra_info)
7fc3fa05
PH
1565 elif result_type == 'url_transparent':
1566 # Use the information from the embedding page
1567 info = self.extract_info(
1568 ie_result['url'], ie_key=ie_result.get('ie_key'),
1569 extra_info=extra_info, download=False, process=False)
1570
1640eb09
S
1571 # extract_info may return None when ignoreerrors is enabled and
1572 # extraction failed with an error, don't crash and return early
1573 # in this case
1574 if not info:
1575 return info
1576
412c617d
PH
1577 force_properties = dict(
1578 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1579 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1580 if f in force_properties:
1581 del force_properties[f]
1582 new_result = info.copy()
1583 new_result.update(force_properties)
7fc3fa05 1584
0563f7ac
S
1585 # Extracted info may not be a video result (i.e.
1586 # info.get('_type', 'video') != video) but rather an url or
1587 # url_transparent. In such cases outer metadata (from ie_result)
1588 # should be propagated to inner one (info). For this to happen
1589 # _type of info should be overridden with url_transparent. This
067aa17e 1590 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1591 if new_result.get('_type') == 'url':
1592 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1593
1594 return self.process_ie_result(
1595 new_result, download=download, extra_info=extra_info)
40fcba5e 1596 elif result_type in ('playlist', 'multi_video'):
30a074c2 1597 # Protect from infinite recursion due to recursively nested playlists
1598 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1599 webpage_url = ie_result['webpage_url']
1600 if webpage_url in self._playlist_urls:
7e85e872 1601 self.to_screen(
30a074c2 1602 '[download] Skipping already downloaded playlist: %s'
1603 % ie_result.get('title') or ie_result.get('id'))
1604 return
7e85e872 1605
30a074c2 1606 self._playlist_level += 1
1607 self._playlist_urls.add(webpage_url)
03f83004 1608 self._fill_common_fields(ie_result, False)
bc516a3f 1609 self._sanitize_thumbnails(ie_result)
30a074c2 1610 try:
1611 return self.__process_playlist(ie_result, download)
1612 finally:
1613 self._playlist_level -= 1
1614 if not self._playlist_level:
1615 self._playlist_urls.clear()
8222d8de 1616 elif result_type == 'compat_list':
c9bf4114
PH
1617 self.report_warning(
1618 'Extractor %s returned a compat_list result. '
1619 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1620
8222d8de 1621 def _fixup(r):
b868936c 1622 self.add_extra_info(r, {
1623 'extractor': ie_result['extractor'],
1624 'webpage_url': ie_result['webpage_url'],
1625 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1626 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1627 'extractor_key': ie_result['extractor_key'],
1628 })
8222d8de
JMF
1629 return r
1630 ie_result['entries'] = [
b6c45014 1631 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1632 for r in ie_result['entries']
1633 ]
1634 return ie_result
1635 else:
1636 raise Exception('Invalid result type: %s' % result_type)
1637
e92caff5 1638 def _ensure_dir_exists(self, path):
1639 return make_dir(path, self.report_error)
1640
3b603dbd 1641 @staticmethod
1642 def _playlist_infodict(ie_result, **kwargs):
1643 return {
1644 **ie_result,
1645 'playlist': ie_result.get('title') or ie_result.get('id'),
1646 'playlist_id': ie_result.get('id'),
1647 'playlist_title': ie_result.get('title'),
1648 'playlist_uploader': ie_result.get('uploader'),
1649 'playlist_uploader_id': ie_result.get('uploader_id'),
1650 'playlist_index': 0,
1651 **kwargs,
1652 }
1653
30a074c2 1654 def __process_playlist(self, ie_result, download):
1655 # We process each entry in the playlist
1656 playlist = ie_result.get('title') or ie_result.get('id')
1657 self.to_screen('[download] Downloading playlist: %s' % playlist)
1658
498f5606 1659 if 'entries' not in ie_result:
aa9369a2 1660 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1661
1662 MissingEntry = object()
498f5606 1663 incomplete_entries = bool(ie_result.get('requested_entries'))
1664 if incomplete_entries:
bf5f605e 1665 def fill_missing_entries(entries, indices):
7c7f7161 1666 ret = [MissingEntry] * max(indices)
bf5f605e 1667 for i, entry in zip(indices, entries):
498f5606 1668 ret[i - 1] = entry
1669 return ret
1670 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1671
30a074c2 1672 playlist_results = []
1673
56a8fb4f 1674 playliststart = self.params.get('playliststart', 1)
30a074c2 1675 playlistend = self.params.get('playlistend')
1676 # For backwards compatibility, interpret -1 as whole list
1677 if playlistend == -1:
1678 playlistend = None
1679
1680 playlistitems_str = self.params.get('playlist_items')
1681 playlistitems = None
1682 if playlistitems_str is not None:
1683 def iter_playlistitems(format):
1684 for string_segment in format.split(','):
1685 if '-' in string_segment:
1686 start, end = string_segment.split('-')
1687 for item in range(int(start), int(end) + 1):
1688 yield int(item)
1689 else:
1690 yield int(string_segment)
1691 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1692
1693 ie_entries = ie_result['entries']
8e5fecc8 1694 if isinstance(ie_entries, list):
ed8d87f9 1695 playlist_count = len(ie_entries)
f0d785d3 1696 msg = f'Collected {playlist_count} videos; downloading %d of them'
1697 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1698
8e5fecc8 1699 def get_entry(i):
1700 return ie_entries[i - 1]
1701 else:
f0d785d3 1702 msg = 'Downloading %d videos'
c586f9e8 1703 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1704 ie_entries = LazyList(ie_entries)
d37707bd 1705 elif isinstance(ie_entries, InAdvancePagedList):
1706 if ie_entries._pagesize == 1:
1707 playlist_count = ie_entries._pagecount
8e5fecc8 1708
1709 def get_entry(i):
1710 return YoutubeDL.__handle_extraction_exceptions(
1711 lambda self, i: ie_entries[i - 1]
1712 )(self, i)
50fed816 1713
f0d785d3 1714 entries, broken = [], False
ff1c7fc9 1715 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1716 for i in items:
1717 if i == 0:
1718 continue
56a8fb4f 1719 if playlistitems is None and playlistend is not None and playlistend < i:
1720 break
1721 entry = None
1722 try:
50fed816 1723 entry = get_entry(i)
7c7f7161 1724 if entry is MissingEntry:
498f5606 1725 raise EntryNotInPlaylist()
56a8fb4f 1726 except (IndexError, EntryNotInPlaylist):
1727 if incomplete_entries:
aa9369a2 1728 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1729 elif not playlistitems:
1730 break
1731 entries.append(entry)
120fe513 1732 try:
1733 if entry is not None:
1734 self._match_entry(entry, incomplete=True, silent=True)
1735 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1736 broken = True
120fe513 1737 break
56a8fb4f 1738 ie_result['entries'] = entries
30a074c2 1739
56a8fb4f 1740 # Save playlist_index before re-ordering
1741 entries = [
9e598870 1742 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1743 for i, entry in enumerate(entries, 1)
1744 if entry is not None]
1745 n_entries = len(entries)
498f5606 1746
f0d785d3 1747 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1748 ie_result['playlist_count'] = n_entries
1749
e08a85d8 1750 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1751 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1752 ie_result['requested_entries'] = playlistitems
1753
e08a85d8 1754 _infojson_written = False
0bfc53d0 1755 write_playlist_files = self.params.get('allow_playlist_files', True)
1756 if write_playlist_files and self.params.get('list_thumbnails'):
1757 self.list_thumbnails(ie_result)
1758 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1759 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1760 _infojson_written = self._write_info_json(
1761 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1762 if _infojson_written is None:
80c03fa9 1763 return
1764 if self._write_description('playlist', ie_result,
1765 self.prepare_filename(ie_copy, 'pl_description')) is None:
1766 return
681de68e 1767 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1768 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1769
1770 if self.params.get('playlistreverse', False):
1771 entries = entries[::-1]
30a074c2 1772 if self.params.get('playlistrandom', False):
1773 random.shuffle(entries)
1774
1775 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1776
56a8fb4f 1777 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1778 failures = 0
1779 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1780 for i, entry_tuple in enumerate(entries, 1):
1781 playlist_index, entry = entry_tuple
81139999 1782 if 'playlist-index' in self.params.get('compat_opts', []):
1783 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1784 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1785 # This __x_forwarded_for_ip thing is a bit ugly but requires
1786 # minimal changes
1787 if x_forwarded_for:
1788 entry['__x_forwarded_for_ip'] = x_forwarded_for
1789 extra = {
1790 'n_entries': n_entries,
f59ae581 1791 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1792 'playlist_count': ie_result.get('playlist_count'),
71729754 1793 'playlist_index': playlist_index,
1794 'playlist_autonumber': i,
30a074c2 1795 'playlist': playlist,
1796 'playlist_id': ie_result.get('id'),
1797 'playlist_title': ie_result.get('title'),
1798 'playlist_uploader': ie_result.get('uploader'),
1799 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1800 'extractor': ie_result['extractor'],
1801 'webpage_url': ie_result['webpage_url'],
1802 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1803 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1804 'extractor_key': ie_result['extractor_key'],
1805 }
1806
1807 if self._match_entry(entry, incomplete=True) is not None:
1808 continue
1809
1810 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1811 if not entry_result:
1812 failures += 1
1813 if failures >= max_failures:
1814 self.report_error(
1815 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1816 break
30a074c2 1817 playlist_results.append(entry_result)
1818 ie_result['entries'] = playlist_results
e08a85d8 1819
1820 # Write the updated info to json
1821 if _infojson_written and self._write_info_json(
1822 'updated playlist', ie_result,
1823 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1824 return
ca30f449 1825
ed5835b4 1826 ie_result = self.run_all_pps('playlist', ie_result)
1827 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1828 return ie_result
1829
a0566bbf 1830 @__handle_extraction_exceptions
1831 def __process_iterable_entry(self, entry, download, extra_info):
1832 return self.process_ie_result(
1833 entry, download=download, extra_info=extra_info)
1834
67134eab
JMF
1835 def _build_format_filter(self, filter_spec):
1836 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1837
1838 OPERATORS = {
1839 '<': operator.lt,
1840 '<=': operator.le,
1841 '>': operator.gt,
1842 '>=': operator.ge,
1843 '=': operator.eq,
1844 '!=': operator.ne,
1845 }
67134eab 1846 operator_rex = re.compile(r'''(?x)\s*
187986a8 1847 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1848 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1850 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1851 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1852 if m:
1853 try:
1854 comparison_value = int(m.group('value'))
1855 except ValueError:
1856 comparison_value = parse_filesize(m.group('value'))
1857 if comparison_value is None:
1858 comparison_value = parse_filesize(m.group('value') + 'B')
1859 if comparison_value is None:
1860 raise ValueError(
1861 'Invalid value %r in format specification %r' % (
67134eab 1862 m.group('value'), filter_spec))
9ddb6925
S
1863 op = OPERATORS[m.group('op')]
1864
083c9df9 1865 if not m:
9ddb6925
S
1866 STR_OPERATORS = {
1867 '=': operator.eq,
10d33b34
YCH
1868 '^=': lambda attr, value: attr.startswith(value),
1869 '$=': lambda attr, value: attr.endswith(value),
1870 '*=': lambda attr, value: value in attr,
1ce9a3cb 1871 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1872 }
187986a8 1873 str_operator_rex = re.compile(r'''(?x)\s*
1874 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1875 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1876 (?P<quote>["'])?
1877 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1878 (?(quote)(?P=quote))\s*
9ddb6925 1879 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1880 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1881 if m:
1ce9a3cb
LF
1882 if m.group('op') == '~=':
1883 comparison_value = re.compile(m.group('value'))
1884 else:
1885 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1886 str_op = STR_OPERATORS[m.group('op')]
1887 if m.group('negation'):
e118a879 1888 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1889 else:
1890 op = str_op
083c9df9 1891
9ddb6925 1892 if not m:
187986a8 1893 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1894
1895 def _filter(f):
1896 actual_value = f.get(m.group('key'))
1897 if actual_value is None:
1898 return m.group('none_inclusive')
1899 return op(actual_value, comparison_value)
67134eab
JMF
1900 return _filter
1901
9f1a1c36 1902 def _check_formats(self, formats):
1903 for f in formats:
1904 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1905 path = self.get_output_path('temp')
1906 if not self._ensure_dir_exists(f'{path}/'):
1907 continue
1908 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1909 temp_file.close()
1910 try:
1911 success, _ = self.dl(temp_file.name, f, test=True)
1912 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1913 success = False
1914 finally:
1915 if os.path.exists(temp_file.name):
1916 try:
1917 os.remove(temp_file.name)
1918 except OSError:
1919 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1920 if success:
1921 yield f
1922 else:
1923 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1924
0017d9ad 1925 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1926
af0f7428
S
1927 def can_merge():
1928 merger = FFmpegMergerPP(self)
1929 return merger.available and merger.can_merge()
1930
91ebc640 1931 prefer_best = (
b7b04c78 1932 not self.params.get('simulate')
91ebc640 1933 and download
1934 and (
1935 not can_merge()
19807826 1936 or info_dict.get('is_live', False)
de6000d9 1937 or self.outtmpl_dict['default'] == '-'))
53ed7066 1938 compat = (
1939 prefer_best
1940 or self.params.get('allow_multiple_audio_streams', False)
1941 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1942
1943 return (
53ed7066 1944 'best/bestvideo+bestaudio' if prefer_best
1945 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1946 else 'bestvideo+bestaudio/best')
0017d9ad 1947
67134eab
JMF
1948 def build_format_selector(self, format_spec):
1949 def syntax_error(note, start):
1950 message = (
1951 'Invalid format specification: '
1952 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1953 return SyntaxError(message)
1954
1955 PICKFIRST = 'PICKFIRST'
1956 MERGE = 'MERGE'
1957 SINGLE = 'SINGLE'
0130afb7 1958 GROUP = 'GROUP'
67134eab
JMF
1959 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1960
91ebc640 1961 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1962 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1963
9f1a1c36 1964 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1965
67134eab
JMF
1966 def _parse_filter(tokens):
1967 filter_parts = []
1968 for type, string, start, _, _ in tokens:
1969 if type == tokenize.OP and string == ']':
1970 return ''.join(filter_parts)
1971 else:
1972 filter_parts.append(string)
1973
232541df 1974 def _remove_unused_ops(tokens):
17cc1534 1975 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1976 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1977 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1978 last_string, last_start, last_end, last_line = None, None, None, None
1979 for type, string, start, end, line in tokens:
1980 if type == tokenize.OP and string == '[':
1981 if last_string:
1982 yield tokenize.NAME, last_string, last_start, last_end, last_line
1983 last_string = None
1984 yield type, string, start, end, line
1985 # everything inside brackets will be handled by _parse_filter
1986 for type, string, start, end, line in tokens:
1987 yield type, string, start, end, line
1988 if type == tokenize.OP and string == ']':
1989 break
1990 elif type == tokenize.OP and string in ALLOWED_OPS:
1991 if last_string:
1992 yield tokenize.NAME, last_string, last_start, last_end, last_line
1993 last_string = None
1994 yield type, string, start, end, line
1995 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1996 if not last_string:
1997 last_string = string
1998 last_start = start
1999 last_end = end
2000 else:
2001 last_string += string
2002 if last_string:
2003 yield tokenize.NAME, last_string, last_start, last_end, last_line
2004
cf2ac6df 2005 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2006 selectors = []
2007 current_selector = None
2008 for type, string, start, _, _ in tokens:
2009 # ENCODING is only defined in python 3.x
2010 if type == getattr(tokenize, 'ENCODING', None):
2011 continue
2012 elif type in [tokenize.NAME, tokenize.NUMBER]:
2013 current_selector = FormatSelector(SINGLE, string, [])
2014 elif type == tokenize.OP:
cf2ac6df
JMF
2015 if string == ')':
2016 if not inside_group:
2017 # ')' will be handled by the parentheses group
2018 tokens.restore_last_token()
67134eab 2019 break
cf2ac6df 2020 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2021 tokens.restore_last_token()
2022 break
cf2ac6df
JMF
2023 elif inside_choice and string == ',':
2024 tokens.restore_last_token()
2025 break
2026 elif string == ',':
0a31a350
JMF
2027 if not current_selector:
2028 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2029 selectors.append(current_selector)
2030 current_selector = None
2031 elif string == '/':
d96d604e
JMF
2032 if not current_selector:
2033 raise syntax_error('"/" must follow a format selector', start)
67134eab 2034 first_choice = current_selector
cf2ac6df 2035 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2036 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2037 elif string == '[':
2038 if not current_selector:
2039 current_selector = FormatSelector(SINGLE, 'best', [])
2040 format_filter = _parse_filter(tokens)
2041 current_selector.filters.append(format_filter)
0130afb7
JMF
2042 elif string == '(':
2043 if current_selector:
2044 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2045 group = _parse_format_selection(tokens, inside_group=True)
2046 current_selector = FormatSelector(GROUP, group, [])
67134eab 2047 elif string == '+':
d03cfdce 2048 if not current_selector:
2049 raise syntax_error('Unexpected "+"', start)
2050 selector_1 = current_selector
2051 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2052 if not selector_2:
2053 raise syntax_error('Expected a selector', start)
2054 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
2055 else:
2056 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2057 elif type == tokenize.ENDMARKER:
2058 break
2059 if current_selector:
2060 selectors.append(current_selector)
2061 return selectors
2062
f8d4ad9a 2063 def _merge(formats_pair):
2064 format_1, format_2 = formats_pair
2065
2066 formats_info = []
2067 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2068 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2069
2070 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2071 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2072 for (i, fmt_info) in enumerate(formats_info):
551f9388 2073 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2074 formats_info.pop(i)
2075 continue
2076 for aud_vid in ['audio', 'video']:
f8d4ad9a 2077 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2078 if get_no_more[aud_vid]:
2079 formats_info.pop(i)
f5510afe 2080 break
f8d4ad9a 2081 get_no_more[aud_vid] = True
2082
2083 if len(formats_info) == 1:
2084 return formats_info[0]
2085
2086 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2087 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2088
2089 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2090 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2091
2092 output_ext = self.params.get('merge_output_format')
2093 if not output_ext:
2094 if the_only_video:
2095 output_ext = the_only_video['ext']
2096 elif the_only_audio and not video_fmts:
2097 output_ext = the_only_audio['ext']
2098 else:
2099 output_ext = 'mkv'
2100
975a0d0d 2101 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2102
f8d4ad9a 2103 new_dict = {
2104 'requested_formats': formats_info,
975a0d0d 2105 'format': '+'.join(filtered('format')),
2106 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2107 'ext': output_ext,
975a0d0d 2108 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2109 'language': '+'.join(orderedSet(filtered('language'))) or None,
2110 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2111 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2112 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2113 }
2114
2115 if the_only_video:
2116 new_dict.update({
2117 'width': the_only_video.get('width'),
2118 'height': the_only_video.get('height'),
2119 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2120 'fps': the_only_video.get('fps'),
49a57e70 2121 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2122 'vcodec': the_only_video.get('vcodec'),
2123 'vbr': the_only_video.get('vbr'),
2124 'stretched_ratio': the_only_video.get('stretched_ratio'),
2125 })
2126
2127 if the_only_audio:
2128 new_dict.update({
2129 'acodec': the_only_audio.get('acodec'),
2130 'abr': the_only_audio.get('abr'),
975a0d0d 2131 'asr': the_only_audio.get('asr'),
f8d4ad9a 2132 })
2133
2134 return new_dict
2135
e8e73840 2136 def _check_formats(formats):
981052c9 2137 if not check_formats:
2138 yield from formats
b5ac45b1 2139 return
9f1a1c36 2140 yield from self._check_formats(formats)
e8e73840 2141
67134eab 2142 def _build_selector_function(selector):
909d24dd 2143 if isinstance(selector, list): # ,
67134eab
JMF
2144 fs = [_build_selector_function(s) for s in selector]
2145
317f7ab6 2146 def selector_function(ctx):
67134eab 2147 for f in fs:
981052c9 2148 yield from f(ctx)
67134eab 2149 return selector_function
909d24dd 2150
2151 elif selector.type == GROUP: # ()
0130afb7 2152 selector_function = _build_selector_function(selector.selector)
909d24dd 2153
2154 elif selector.type == PICKFIRST: # /
67134eab
JMF
2155 fs = [_build_selector_function(s) for s in selector.selector]
2156
317f7ab6 2157 def selector_function(ctx):
67134eab 2158 for f in fs:
317f7ab6 2159 picked_formats = list(f(ctx))
67134eab
JMF
2160 if picked_formats:
2161 return picked_formats
2162 return []
67134eab 2163
981052c9 2164 elif selector.type == MERGE: # +
2165 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2166
2167 def selector_function(ctx):
adbc4ec4 2168 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2169 yield _merge(pair)
2170
909d24dd 2171 elif selector.type == SINGLE: # atom
598d185d 2172 format_spec = selector.selector or 'best'
909d24dd 2173
f8d4ad9a 2174 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2175 if format_spec == 'all':
2176 def selector_function(ctx):
9222c381 2177 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2178 elif format_spec == 'mergeall':
2179 def selector_function(ctx):
dd2a987d 2180 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2181 if not formats:
2182 return
921b76ca 2183 merged_format = formats[-1]
2184 for f in formats[-2::-1]:
f8d4ad9a 2185 merged_format = _merge((merged_format, f))
2186 yield merged_format
909d24dd 2187
2188 else:
e8e73840 2189 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2190 mobj = re.match(
2191 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2192 format_spec)
2193 if mobj is not None:
2194 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2195 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2196 format_type = (mobj.group('type') or [None])[0]
2197 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2198 format_modified = mobj.group('mod') is not None
909d24dd 2199
2200 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2201 _filter_f = (
eff63539 2202 (lambda f: f.get('%scodec' % format_type) != 'none')
2203 if format_type and format_modified # bv*, ba*, wv*, wa*
2204 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2205 if format_type # bv, ba, wv, wa
2206 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2207 if not format_modified # b, w
8326b00a 2208 else lambda f: True) # b*, w*
2209 filter_f = lambda f: _filter_f(f) and (
2210 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2211 else:
48ee10ee 2212 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2213 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2214 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2215 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2216 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2217 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2218 else:
b5ae35ee 2219 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2220
2221 def selector_function(ctx):
2222 formats = list(ctx['formats'])
909d24dd 2223 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2224 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2225 # for extractors with incomplete formats (audio only (soundcloud)
2226 # or video only (imgur)) best/worst will fallback to
2227 # best/worst {video,audio}-only format
e8e73840 2228 matches = formats
981052c9 2229 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2230 try:
e8e73840 2231 yield matches[format_idx - 1]
981052c9 2232 except IndexError:
2233 return
083c9df9 2234
67134eab 2235 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2236
317f7ab6 2237 def final_selector(ctx):
adbc4ec4 2238 ctx_copy = dict(ctx)
67134eab 2239 for _filter in filters:
317f7ab6
S
2240 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2241 return selector_function(ctx_copy)
67134eab 2242 return final_selector
083c9df9 2243
67134eab 2244 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2245 try:
232541df 2246 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2247 except tokenize.TokenError:
2248 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2249
2250 class TokenIterator(object):
2251 def __init__(self, tokens):
2252 self.tokens = tokens
2253 self.counter = 0
2254
2255 def __iter__(self):
2256 return self
2257
2258 def __next__(self):
2259 if self.counter >= len(self.tokens):
2260 raise StopIteration()
2261 value = self.tokens[self.counter]
2262 self.counter += 1
2263 return value
2264
2265 next = __next__
2266
2267 def restore_last_token(self):
2268 self.counter -= 1
2269
2270 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2271 return _build_selector_function(parsed_selector)
a9c58ad9 2272
e5660ee6 2273 def _calc_headers(self, info_dict):
8b7539d2 2274 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2275
2276 cookies = self._calc_cookies(info_dict)
2277 if cookies:
2278 res['Cookie'] = cookies
2279
0016b84e
S
2280 if 'X-Forwarded-For' not in res:
2281 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2282 if x_forwarded_for_ip:
2283 res['X-Forwarded-For'] = x_forwarded_for_ip
2284
e5660ee6
JMF
2285 return res
2286
2287 def _calc_cookies(self, info_dict):
5c2266df 2288 pr = sanitized_Request(info_dict['url'])
e5660ee6 2289 self.cookiejar.add_cookie_header(pr)
662435f7 2290 return pr.get_header('Cookie')
e5660ee6 2291
9f1a1c36 2292 def _sort_thumbnails(self, thumbnails):
2293 thumbnails.sort(key=lambda t: (
2294 t.get('preference') if t.get('preference') is not None else -1,
2295 t.get('width') if t.get('width') is not None else -1,
2296 t.get('height') if t.get('height') is not None else -1,
2297 t.get('id') if t.get('id') is not None else '',
2298 t.get('url')))
2299
b0249bca 2300 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2301 thumbnails = info_dict.get('thumbnails')
2302 if thumbnails is None:
2303 thumbnail = info_dict.get('thumbnail')
2304 if thumbnail:
2305 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2306 if not thumbnails:
2307 return
2308
2309 def check_thumbnails(thumbnails):
2310 for t in thumbnails:
2311 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2312 try:
2313 self.urlopen(HEADRequest(t['url']))
2314 except network_exceptions as err:
2315 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2316 continue
2317 yield t
2318
2319 self._sort_thumbnails(thumbnails)
2320 for i, t in enumerate(thumbnails):
2321 if t.get('id') is None:
2322 t['id'] = '%d' % i
2323 if t.get('width') and t.get('height'):
2324 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2325 t['url'] = sanitize_url(t['url'])
2326
2327 if self.params.get('check_formats') is True:
282f5709 2328 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2329 else:
2330 info_dict['thumbnails'] = thumbnails
bc516a3f 2331
03f83004
LNO
2332 def _fill_common_fields(self, info_dict, is_video=True):
2333 # TODO: move sanitization here
2334 if is_video:
2335 # playlists are allowed to lack "title"
2336 info_dict['fulltitle'] = info_dict.get('title')
2337 if 'title' not in info_dict:
2338 raise ExtractorError('Missing "title" field in extractor result',
2339 video_id=info_dict['id'], ie=info_dict['extractor'])
2340 elif not info_dict.get('title'):
2341 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2342 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2343
2344 if info_dict.get('duration') is not None:
2345 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2346
2347 for ts_key, date_key in (
2348 ('timestamp', 'upload_date'),
2349 ('release_timestamp', 'release_date'),
2350 ('modified_timestamp', 'modified_date'),
2351 ):
2352 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2353 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2354 # see http://bugs.python.org/issue1646728)
2355 try:
2356 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2357 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2358 except (ValueError, OverflowError, OSError):
2359 pass
2360
2361 live_keys = ('is_live', 'was_live')
2362 live_status = info_dict.get('live_status')
2363 if live_status is None:
2364 for key in live_keys:
2365 if info_dict.get(key) is False:
2366 continue
2367 if info_dict.get(key):
2368 live_status = key
2369 break
2370 if all(info_dict.get(key) is False for key in live_keys):
2371 live_status = 'not_live'
2372 if live_status:
2373 info_dict['live_status'] = live_status
2374 for key in live_keys:
2375 if info_dict.get(key) is None:
2376 info_dict[key] = (live_status == key)
2377
2378 # Auto generate title fields corresponding to the *_number fields when missing
2379 # in order to always have clean titles. This is very common for TV series.
2380 for field in ('chapter', 'season', 'episode'):
2381 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2382 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2383
dd82ffea
JMF
2384 def process_video_result(self, info_dict, download=True):
2385 assert info_dict.get('_type', 'video') == 'video'
9c906919 2386 self._num_videos += 1
dd82ffea 2387
bec1fad2 2388 if 'id' not in info_dict:
fc08bdd6 2389 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2390 elif not info_dict.get('id'):
2391 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2392
c9969434
S
2393 def report_force_conversion(field, field_not, conversion):
2394 self.report_warning(
2395 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2396 % (field, field_not, conversion))
2397
2398 def sanitize_string_field(info, string_field):
2399 field = info.get(string_field)
2400 if field is None or isinstance(field, compat_str):
2401 return
2402 report_force_conversion(string_field, 'a string', 'string')
2403 info[string_field] = compat_str(field)
2404
2405 def sanitize_numeric_fields(info):
2406 for numeric_field in self._NUMERIC_FIELDS:
2407 field = info.get(numeric_field)
2408 if field is None or isinstance(field, compat_numeric_types):
2409 continue
2410 report_force_conversion(numeric_field, 'numeric', 'int')
2411 info[numeric_field] = int_or_none(field)
2412
2413 sanitize_string_field(info_dict, 'id')
2414 sanitize_numeric_fields(info_dict)
4c3f8c3f 2415 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2416 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2417
dd82ffea
JMF
2418 if 'playlist' not in info_dict:
2419 # It isn't part of a playlist
2420 info_dict['playlist'] = None
2421 info_dict['playlist_index'] = None
2422
bc516a3f 2423 self._sanitize_thumbnails(info_dict)
d5519808 2424
536a55da 2425 thumbnail = info_dict.get('thumbnail')
bc516a3f 2426 thumbnails = info_dict.get('thumbnails')
536a55da
S
2427 if thumbnail:
2428 info_dict['thumbnail'] = sanitize_url(thumbnail)
2429 elif thumbnails:
d5519808
PH
2430 info_dict['thumbnail'] = thumbnails[-1]['url']
2431
ae30b840 2432 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2433 info_dict['display_id'] = info_dict['id']
2434
03f83004 2435 self._fill_common_fields(info_dict)
33d2fc2f 2436
05108a49
S
2437 for cc_kind in ('subtitles', 'automatic_captions'):
2438 cc = info_dict.get(cc_kind)
2439 if cc:
2440 for _, subtitle in cc.items():
2441 for subtitle_format in subtitle:
2442 if subtitle_format.get('url'):
2443 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2444 if subtitle_format.get('ext') is None:
2445 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2446
2447 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2448 subtitles = info_dict.get('subtitles')
4bba3716 2449
360e1ca5 2450 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2451 info_dict['id'], subtitles, automatic_captions)
a504ced0 2452
dd82ffea
JMF
2453 if info_dict.get('formats') is None:
2454 # There's only one format available
2455 formats = [info_dict]
2456 else:
2457 formats = info_dict['formats']
2458
e0493e90 2459 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2460 if not self.params.get('allow_unplayable_formats'):
2461 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2462
319b6059 2463 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2464 if not get_from_start:
2465 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2466 if info_dict.get('is_live') and formats:
adbc4ec4 2467 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2468 if get_from_start and not formats:
2469 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2470 'If you want to download from the current time, pass --no-live-from-start')
adbc4ec4 2471
db95dc13 2472 if not formats:
1151c407 2473 self.raise_no_formats(info_dict)
db95dc13 2474
73af5cc8
S
2475 def is_wellformed(f):
2476 url = f.get('url')
a5ac0c47 2477 if not url:
73af5cc8
S
2478 self.report_warning(
2479 '"url" field is missing or empty - skipping format, '
2480 'there is an error in extractor')
a5ac0c47
S
2481 return False
2482 if isinstance(url, bytes):
2483 sanitize_string_field(f, 'url')
2484 return True
73af5cc8
S
2485
2486 # Filter out malformed formats for better extraction robustness
2487 formats = list(filter(is_wellformed, formats))
2488
181c7053
S
2489 formats_dict = {}
2490
dd82ffea 2491 # We check that all the formats have the format and format_id fields
db95dc13 2492 for i, format in enumerate(formats):
c9969434
S
2493 sanitize_string_field(format, 'format_id')
2494 sanitize_numeric_fields(format)
dcf77cf1 2495 format['url'] = sanitize_url(format['url'])
e74e3b63 2496 if not format.get('format_id'):
8016c922 2497 format['format_id'] = compat_str(i)
e2effb08
S
2498 else:
2499 # Sanitize format_id from characters used in format selector expression
ec85ded8 2500 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2501 format_id = format['format_id']
2502 if format_id not in formats_dict:
2503 formats_dict[format_id] = []
2504 formats_dict[format_id].append(format)
2505
2506 # Make sure all formats have unique format_id
03b4de72 2507 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2508 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2509 ambigious_id = len(ambiguous_formats) > 1
2510 for i, format in enumerate(ambiguous_formats):
2511 if ambigious_id:
181c7053 2512 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2513 if format.get('ext') is None:
2514 format['ext'] = determine_ext(format['url']).lower()
2515 # Ensure there is no conflict between id and ext in format selection
2516 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2517 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2518 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2519
2520 for i, format in enumerate(formats):
8c51aa65 2521 if format.get('format') is None:
6febd1c1 2522 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2523 id=format['format_id'],
2524 res=self.format_resolution(format),
b868936c 2525 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2526 )
6f0be937 2527 if format.get('protocol') is None:
b5559424 2528 format['protocol'] = determine_protocol(format)
239df021 2529 if format.get('resolution') is None:
2530 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2531 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2532 format['dynamic_range'] = 'SDR'
f2fe69c7 2533 if (info_dict.get('duration') and format.get('tbr')
2534 and not format.get('filesize') and not format.get('filesize_approx')):
2535 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2536
e5660ee6
JMF
2537 # Add HTTP headers, so that external programs can use them from the
2538 # json output
2539 full_format_info = info_dict.copy()
2540 full_format_info.update(format)
2541 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2542 # Remove private housekeeping stuff
2543 if '__x_forwarded_for_ip' in info_dict:
2544 del info_dict['__x_forwarded_for_ip']
dd82ffea 2545
9f1a1c36 2546 if self.params.get('check_formats') is True:
282f5709 2547 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2548
88acdbc2 2549 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2550 # only set the 'formats' fields if the original info_dict list them
2551 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2552 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2553 # which can't be exported to json
b3d9ef88 2554 info_dict['formats'] = formats
4ec82a72 2555
2556 info_dict, _ = self.pre_process(info_dict)
2557
6db9c4d5 2558 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2559 return info_dict
2560
2561 self.post_extract(info_dict)
2562 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2563
093a1710 2564 # The pre-processors may have modified the formats
2565 formats = info_dict.get('formats', [info_dict])
2566
fa9f30b8 2567 list_only = self.params.get('simulate') is None and (
2568 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2569 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2570 if self.params.get('list_thumbnails'):
2571 self.list_thumbnails(info_dict)
b7b04c78 2572 if self.params.get('listsubtitles'):
2573 if 'automatic_captions' in info_dict:
2574 self.list_subtitles(
2575 info_dict['id'], automatic_captions, 'automatic captions')
2576 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2577 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2578 self.list_formats(info_dict)
169dbde9 2579 if list_only:
b7b04c78 2580 # Without this printing, -F --print-json will not work
169dbde9 2581 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2582 return
2583
187986a8 2584 format_selector = self.format_selector
2585 if format_selector is None:
0017d9ad 2586 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2587 self.write_debug('Default format spec: %s' % req_format)
187986a8 2588 format_selector = self.build_format_selector(req_format)
317f7ab6 2589
fa9f30b8 2590 while True:
2591 if interactive_format_selection:
2592 req_format = input(
2593 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2594 try:
2595 format_selector = self.build_format_selector(req_format)
2596 except SyntaxError as err:
2597 self.report_error(err, tb=False, is_error=False)
2598 continue
2599
2600 # While in format selection we may need to have an access to the original
2601 # format set in order to calculate some metrics or do some processing.
2602 # For now we need to be able to guess whether original formats provided
2603 # by extractor are incomplete or not (i.e. whether extractor provides only
2604 # video-only or audio-only formats) for proper formats selection for
2605 # extractors with such incomplete formats (see
2606 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2607 # Since formats may be filtered during format selection and may not match
2608 # the original formats the results may be incorrect. Thus original formats
2609 # or pre-calculated metrics should be passed to format selection routines
2610 # as well.
2611 # We will pass a context object containing all necessary additional data
2612 # instead of just formats.
2613 # This fixes incorrect format selection issue (see
2614 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2615 incomplete_formats = (
2616 # All formats are video-only or
2617 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2618 # all formats are audio-only
2619 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2620
2621 ctx = {
2622 'formats': formats,
2623 'incomplete_formats': incomplete_formats,
2624 }
2625
2626 formats_to_download = list(format_selector(ctx))
2627 if interactive_format_selection and not formats_to_download:
2628 self.report_error('Requested format is not available', tb=False, is_error=False)
2629 continue
2630 break
317f7ab6 2631
dd82ffea 2632 if not formats_to_download:
b7da73eb 2633 if not self.params.get('ignore_no_formats_error'):
1151c407 2634 raise ExtractorError('Requested format is not available', expected=True,
2635 video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2636 self.report_warning('Requested format is not available')
2637 # Process what we can, even without any available formats.
2638 formats_to_download = [{}]
a13e6848 2639
b62fa6d7 2640 best_format = formats_to_download[-1]
2641 if download:
2642 if best_format:
2643 self.to_screen(
2644 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2645 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2646 max_downloads_reached = False
f46e2f9d 2647 for i, fmt in enumerate(formats_to_download):
09b49e1f 2648 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2649 new_info.update(fmt)
a13e6848 2650 try:
2651 self.process_info(new_info)
2652 except MaxDownloadsReached:
2653 max_downloads_reached = True
f46e2f9d 2654 # Remove copied info
2655 for key, val in tuple(new_info.items()):
2656 if info_dict.get(key) == val:
2657 new_info.pop(key)
a13e6848 2658 if max_downloads_reached:
2659 break
ebed8b37 2660
9e907ebd 2661 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
a13e6848 2662 assert write_archive.issubset({True, False, 'ignore'})
2663 if True in write_archive and False not in write_archive:
2664 self.record_download_archive(info_dict)
be72c624 2665
2666 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2667 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2668 if max_downloads_reached:
2669 raise MaxDownloadsReached()
ebed8b37 2670
49a57e70 2671 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2672 info_dict.update(best_format)
dd82ffea
JMF
2673 return info_dict
2674
98c70d6f 2675 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2676 """Select the requested subtitles and their format"""
98c70d6f
JMF
2677 available_subs = {}
2678 if normal_subtitles and self.params.get('writesubtitles'):
2679 available_subs.update(normal_subtitles)
2680 if automatic_captions and self.params.get('writeautomaticsub'):
2681 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2682 if lang not in available_subs:
2683 available_subs[lang] = cap_info
2684
4d171848
JMF
2685 if (not self.params.get('writesubtitles') and not
2686 self.params.get('writeautomaticsub') or not
2687 available_subs):
2688 return None
a504ced0 2689
c32b0aab 2690 all_sub_langs = available_subs.keys()
a504ced0 2691 if self.params.get('allsubtitles', False):
c32b0aab 2692 requested_langs = all_sub_langs
2693 elif self.params.get('subtitleslangs', False):
77c4a9ef 2694 # A list is used so that the order of languages will be the same as
2695 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2696 requested_langs = []
2697 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2698 discard = lang_re[0] == '-'
c32b0aab 2699 if discard:
77c4a9ef 2700 lang_re = lang_re[1:]
3aa91540 2701 if lang_re == 'all':
2702 if discard:
2703 requested_langs = []
2704 else:
2705 requested_langs.extend(all_sub_langs)
2706 continue
77c4a9ef 2707 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2708 if discard:
2709 for lang in current_langs:
77c4a9ef 2710 while lang in requested_langs:
2711 requested_langs.remove(lang)
c32b0aab 2712 else:
77c4a9ef 2713 requested_langs.extend(current_langs)
2714 requested_langs = orderedSet(requested_langs)
c32b0aab 2715 elif 'en' in available_subs:
2716 requested_langs = ['en']
a504ced0 2717 else:
c32b0aab 2718 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2719 if requested_langs:
2720 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2721
2722 formats_query = self.params.get('subtitlesformat', 'best')
2723 formats_preference = formats_query.split('/') if formats_query else []
2724 subs = {}
2725 for lang in requested_langs:
2726 formats = available_subs.get(lang)
2727 if formats is None:
2728 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2729 continue
a504ced0
JMF
2730 for ext in formats_preference:
2731 if ext == 'best':
2732 f = formats[-1]
2733 break
2734 matches = list(filter(lambda f: f['ext'] == ext, formats))
2735 if matches:
2736 f = matches[-1]
2737 break
2738 else:
2739 f = formats[-1]
2740 self.report_warning(
2741 'No subtitle format found matching "%s" for language %s, '
2742 'using %s' % (formats_query, lang, f['ext']))
2743 subs[lang] = f
2744 return subs
2745
bb66c247 2746 def _forceprint(self, key, info_dict):
2747 if info_dict is None:
2748 return
2749 info_copy = info_dict.copy()
2750 info_copy['formats_table'] = self.render_formats_table(info_dict)
2751 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2752 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2753 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2754
2755 def format_tmpl(tmpl):
2756 mobj = re.match(r'\w+(=?)$', tmpl)
2757 if mobj and mobj.group(1):
2758 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2759 elif mobj:
2760 return f'%({tmpl})s'
2761 return tmpl
8130779d 2762
bb66c247 2763 for tmpl in self.params['forceprint'].get(key, []):
2764 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2765
2766 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2767 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2768 tmpl = format_tmpl(tmpl)
2769 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2770 if self._ensure_dir_exists(filename):
2771 with io.open(filename, 'a', encoding='utf-8') as f:
2772 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2773
d06daf23 2774 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2775 def print_mandatory(field, actual_field=None):
2776 if actual_field is None:
2777 actual_field = field
d06daf23 2778 if (self.params.get('force%s' % field, False)
53c18592 2779 and (not incomplete or info_dict.get(actual_field) is not None)):
2780 self.to_stdout(info_dict[actual_field])
d06daf23
S
2781
2782 def print_optional(field):
2783 if (self.params.get('force%s' % field, False)
2784 and info_dict.get(field) is not None):
2785 self.to_stdout(info_dict[field])
2786
53c18592 2787 info_dict = info_dict.copy()
2788 if filename is not None:
2789 info_dict['filename'] = filename
2790 if info_dict.get('requested_formats') is not None:
2791 # For RTMP URLs, also include the playpath
2792 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2793 elif info_dict.get('url'):
53c18592 2794 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2795
bb66c247 2796 if (self.params.get('forcejson')
2797 or self.params['forceprint'].get('video')
2798 or self.params['print_to_file'].get('video')):
2b8a2973 2799 self.post_extract(info_dict)
bb66c247 2800 self._forceprint('video', info_dict)
53c18592 2801
d06daf23
S
2802 print_mandatory('title')
2803 print_mandatory('id')
53c18592 2804 print_mandatory('url', 'urls')
d06daf23
S
2805 print_optional('thumbnail')
2806 print_optional('description')
53c18592 2807 print_optional('filename')
b868936c 2808 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2809 self.to_stdout(formatSeconds(info_dict['duration']))
2810 print_mandatory('format')
53c18592 2811
2b8a2973 2812 if self.params.get('forcejson'):
6e84b215 2813 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2814
e8e73840 2815 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2816 if not info.get('url'):
1151c407 2817 self.raise_no_formats(info, True)
e8e73840 2818
2819 if test:
2820 verbose = self.params.get('verbose')
2821 params = {
2822 'test': True,
a169858f 2823 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2824 'verbose': verbose,
2825 'noprogress': not verbose,
2826 'nopart': True,
2827 'skip_unavailable_fragments': False,
2828 'keep_fragments': False,
2829 'overwrites': True,
2830 '_no_ytdl_file': True,
2831 }
2832 else:
2833 params = self.params
96fccc10 2834 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2835 if not test:
2836 for ph in self._progress_hooks:
2837 fd.add_progress_hook(ph)
42676437
M
2838 urls = '", "'.join(
2839 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2840 for f in info.get('requested_formats', []) or [info])
18e674b4 2841 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2842
adbc4ec4
THD
2843 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2844 # But it may contain objects that are not deep-copyable
2845 new_info = self._copy_infodict(info)
e8e73840 2846 if new_info.get('http_headers') is None:
2847 new_info['http_headers'] = self._calc_headers(new_info)
2848 return fd.download(name, new_info, subtitle)
2849
e04938ab 2850 def existing_file(self, filepaths, *, default_overwrite=True):
2851 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2852 if existing_files and not self.params.get('overwrites', default_overwrite):
2853 return existing_files[0]
2854
2855 for file in existing_files:
2856 self.report_file_delete(file)
2857 os.remove(file)
2858 return None
2859
8222d8de 2860 def process_info(self, info_dict):
09b49e1f 2861 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2862
2863 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2864 original_infodict = info_dict
fd288278 2865
4513a41a 2866 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2867 info_dict['format'] = info_dict['ext']
2868
09b49e1f 2869 # This is mostly just for backward compatibility of process_info
2870 # As a side-effect, this allows for format-specific filters
c77495e3 2871 if self._match_entry(info_dict) is not None:
9e907ebd 2872 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2873 return
2874
09b49e1f 2875 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2876 self.post_extract(info_dict)
0c14d66a 2877 self._num_downloads += 1
8222d8de 2878
dcf64d43 2879 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2880 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2881 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2882 files_to_move = {}
8222d8de
JMF
2883
2884 # Forced printings
4513a41a 2885 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2886
b7b04c78 2887 if self.params.get('simulate'):
9e907ebd 2888 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2889 return
2890
de6000d9 2891 if full_filename is None:
8222d8de 2892 return
e92caff5 2893 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2894 return
e92caff5 2895 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2896 return
2897
80c03fa9 2898 if self._write_description('video', info_dict,
2899 self.prepare_filename(info_dict, 'description')) is None:
2900 return
2901
2902 sub_files = self._write_subtitles(info_dict, temp_filename)
2903 if sub_files is None:
2904 return
2905 files_to_move.update(dict(sub_files))
2906
2907 thumb_files = self._write_thumbnails(
2908 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2909 if thumb_files is None:
2910 return
2911 files_to_move.update(dict(thumb_files))
8222d8de 2912
80c03fa9 2913 infofn = self.prepare_filename(info_dict, 'infojson')
2914 _infojson_written = self._write_info_json('video', info_dict, infofn)
2915 if _infojson_written:
dac5df5a 2916 info_dict['infojson_filename'] = infofn
e75bb0d6 2917 # For backward compatibility, even though it was a private field
80c03fa9 2918 info_dict['__infojson_filename'] = infofn
2919 elif _infojson_written is None:
2920 return
2921
2922 # Note: Annotations are deprecated
2923 annofn = None
1fb07d10 2924 if self.params.get('writeannotations', False):
de6000d9 2925 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2926 if annofn:
e92caff5 2927 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2928 return
0c3d0f51 2929 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2930 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2931 elif not info_dict.get('annotations'):
2932 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2933 else:
2934 try:
6febd1c1 2935 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2936 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2937 annofile.write(info_dict['annotations'])
2938 except (KeyError, TypeError):
6febd1c1 2939 self.report_warning('There are no annotations to write.')
7b6fefc9 2940 except (OSError, IOError):
6febd1c1 2941 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2942 return
1fb07d10 2943
732044af 2944 # Write internet shortcut files
08438d2c 2945 def _write_link_file(link_type):
60f3e995 2946 url = try_get(info_dict['webpage_url'], iri_to_uri)
2947 if not url:
2948 self.report_warning(
2949 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2950 return True
08438d2c 2951 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2952 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2953 return False
10e3742e 2954 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2955 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2956 return True
2957 try:
2958 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2959 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2960 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2961 template_vars = {'url': url}
08438d2c 2962 if link_type == 'desktop':
2963 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2964 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2965 except (OSError, IOError):
2966 self.report_error(f'Cannot write internet shortcut {linkfn}')
2967 return False
732044af 2968 return True
2969
08438d2c 2970 write_links = {
2971 'url': self.params.get('writeurllink'),
2972 'webloc': self.params.get('writewebloclink'),
2973 'desktop': self.params.get('writedesktoplink'),
2974 }
2975 if self.params.get('writelink'):
2976 link_type = ('webloc' if sys.platform == 'darwin'
2977 else 'desktop' if sys.platform.startswith('linux')
2978 else 'url')
2979 write_links[link_type] = True
2980
2981 if any(should_write and not _write_link_file(link_type)
2982 for link_type, should_write in write_links.items()):
2983 return
732044af 2984
f46e2f9d 2985 def replace_info_dict(new_info):
2986 nonlocal info_dict
2987 if new_info == info_dict:
2988 return
2989 info_dict.clear()
2990 info_dict.update(new_info)
2991
56d868db 2992 try:
f46e2f9d 2993 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2994 replace_info_dict(new_info)
56d868db 2995 except PostProcessingError as err:
2996 self.report_error('Preprocessing: %s' % str(err))
2997 return
2998
a13e6848 2999 if self.params.get('skip_download'):
56d868db 3000 info_dict['filepath'] = temp_filename
3001 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3002 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3003 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3004 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3005 else:
3006 # Download
b868936c 3007 info_dict.setdefault('__postprocessors', [])
4340deca 3008 try:
0202b52a 3009
e04938ab 3010 def existing_video_file(*filepaths):
6b591b29 3011 ext = info_dict.get('ext')
e04938ab 3012 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3013 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3014 default_overwrite=False)
3015 if file:
3016 info_dict['ext'] = os.path.splitext(file)[1][1:]
3017 return file
0202b52a 3018
3019 success = True
4340deca 3020 if info_dict.get('requested_formats') is not None:
81cd954a
S
3021
3022 def compatible_formats(formats):
d03cfdce 3023 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3024 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3025 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3026 if len(video_formats) > 2 or len(audio_formats) > 2:
3027 return False
3028
81cd954a 3029 # Check extension
d03cfdce 3030 exts = set(format.get('ext') for format in formats)
3031 COMPATIBLE_EXTS = (
3032 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3033 set(('webm',)),
3034 )
3035 for ext_sets in COMPATIBLE_EXTS:
3036 if ext_sets.issuperset(exts):
3037 return True
81cd954a
S
3038 # TODO: Check acodec/vcodec
3039 return False
3040
3041 requested_formats = info_dict['requested_formats']
0202b52a 3042 old_ext = info_dict['ext']
4e3b637d 3043 if self.params.get('merge_output_format') is None:
3044 if not compatible_formats(requested_formats):
3045 info_dict['ext'] = 'mkv'
3046 self.report_warning(
3047 'Requested formats are incompatible for merge and will be merged into mkv')
3048 if (info_dict['ext'] == 'webm'
3049 and info_dict.get('thumbnails')
3050 # check with type instead of pp_key, __name__, or isinstance
3051 # since we dont want any custom PPs to trigger this
3052 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3053 info_dict['ext'] = 'mkv'
3054 self.report_warning(
3055 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3056 new_ext = info_dict['ext']
0202b52a 3057
124bc071 3058 def correct_ext(filename, ext=new_ext):
96fccc10 3059 if filename == '-':
3060 return filename
0202b52a 3061 filename_real_ext = os.path.splitext(filename)[1][1:]
3062 filename_wo_ext = (
3063 os.path.splitext(filename)[0]
124bc071 3064 if filename_real_ext in (old_ext, new_ext)
0202b52a 3065 else filename)
124bc071 3066 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 3067
38c6902b 3068 # Ensure filename always has a correct extension for successful merge
0202b52a 3069 full_filename = correct_ext(full_filename)
3070 temp_filename = correct_ext(temp_filename)
e04938ab 3071 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3072 info_dict['__real_download'] = False
18e674b4 3073
adbc4ec4
THD
3074 downloaded = []
3075 merger = FFmpegMergerPP(self)
3076
3077 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3078 if dl_filename is not None:
6c7274ec 3079 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3080 elif fd:
3081 for f in requested_formats if fd != FFmpegFD else []:
3082 f['filepath'] = fname = prepend_extension(
3083 correct_ext(temp_filename, info_dict['ext']),
3084 'f%s' % f['format_id'], info_dict['ext'])
3085 downloaded.append(fname)
dbf5416a 3086 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3087 success, real_download = self.dl(temp_filename, info_dict)
3088 info_dict['__real_download'] = real_download
18e674b4 3089 else:
18e674b4 3090 if self.params.get('allow_unplayable_formats'):
3091 self.report_warning(
3092 'You have requested merging of multiple formats '
3093 'while also allowing unplayable formats to be downloaded. '
3094 'The formats won\'t be merged to prevent data corruption.')
3095 elif not merger.available:
e8969bda 3096 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3097 if not self.params.get('ignoreerrors'):
3098 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3099 return
3100 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3101
96fccc10 3102 if temp_filename == '-':
adbc4ec4 3103 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3104 else 'but the formats are incompatible for simultaneous download' if merger.available
3105 else 'but ffmpeg is not installed')
3106 self.report_warning(
3107 f'You have requested downloading multiple formats to stdout {reason}. '
3108 'The formats will be streamed one after the other')
3109 fname = temp_filename
dbf5416a 3110 for f in requested_formats:
3111 new_info = dict(info_dict)
3112 del new_info['requested_formats']
3113 new_info.update(f)
96fccc10 3114 if temp_filename != '-':
124bc071 3115 fname = prepend_extension(
3116 correct_ext(temp_filename, new_info['ext']),
3117 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3118 if not self._ensure_dir_exists(fname):
3119 return
a21e0ab1 3120 f['filepath'] = fname
96fccc10 3121 downloaded.append(fname)
dbf5416a 3122 partial_success, real_download = self.dl(fname, new_info)
3123 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3124 success = success and partial_success
adbc4ec4
THD
3125
3126 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3127 info_dict['__postprocessors'].append(merger)
3128 info_dict['__files_to_merge'] = downloaded
3129 # Even if there were no downloads, it is being merged only now
3130 info_dict['__real_download'] = True
3131 else:
3132 for file in downloaded:
3133 files_to_move[file] = None
4340deca
P
3134 else:
3135 # Just a single file
e04938ab 3136 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3137 if dl_filename is None or dl_filename == temp_filename:
3138 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3139 # So we should try to resume the download
e8e73840 3140 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3141 info_dict['__real_download'] = real_download
6c7274ec 3142 else:
3143 self.report_file_already_downloaded(dl_filename)
0202b52a 3144
0202b52a 3145 dl_filename = dl_filename or temp_filename
c571435f 3146 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3147
3158150c 3148 except network_exceptions as err:
7960b056 3149 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
3150 return
3151 except (OSError, IOError) as err:
3152 raise UnavailableVideoError(err)
3153 except (ContentTooShortError, ) as err:
3154 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3155 return
8222d8de 3156
de6000d9 3157 if success and full_filename != '-':
f17f8651 3158
fd7cfb64 3159 def fixup():
3160 do_fixup = True
3161 fixup_policy = self.params.get('fixup')
3162 vid = info_dict['id']
3163
3164 if fixup_policy in ('ignore', 'never'):
3165 return
3166 elif fixup_policy == 'warn':
3167 do_fixup = False
f89b3e2d 3168 elif fixup_policy != 'force':
3169 assert fixup_policy in ('detect_or_warn', None)
3170 if not info_dict.get('__real_download'):
3171 do_fixup = False
fd7cfb64 3172
3173 def ffmpeg_fixup(cndn, msg, cls):
3174 if not cndn:
3175 return
3176 if not do_fixup:
3177 self.report_warning(f'{vid}: {msg}')
3178 return
3179 pp = cls(self)
3180 if pp.available:
3181 info_dict['__postprocessors'].append(pp)
3182 else:
3183 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3184
3185 stretched_ratio = info_dict.get('stretched_ratio')
3186 ffmpeg_fixup(
3187 stretched_ratio not in (1, None),
3188 f'Non-uniform pixel ratio {stretched_ratio}',
3189 FFmpegFixupStretchedPP)
3190
3191 ffmpeg_fixup(
3192 (info_dict.get('requested_formats') is None
3193 and info_dict.get('container') == 'm4a_dash'
3194 and info_dict.get('ext') == 'm4a'),
3195 'writing DASH m4a. Only some players support this container',
3196 FFmpegFixupM4aPP)
3197
993191c0 3198 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3199 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3200
3201 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3202 ffmpeg_fixup(downloader == 'HlsFD',
3203 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3204 FFmpegFixupM3u8PP)
3205 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3206 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3207
e04b003e 3208 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3209 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3210
3211 fixup()
8222d8de 3212 try:
f46e2f9d 3213 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3214 except PostProcessingError as err:
3215 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3216 return
ab8e5e51
AM
3217 try:
3218 for ph in self._post_hooks:
23c1a667 3219 ph(info_dict['filepath'])
ab8e5e51
AM
3220 except Exception as err:
3221 self.report_error('post hooks: %s' % str(err))
3222 return
9e907ebd 3223 info_dict['__write_download_archive'] = True
2d30509f 3224
a13e6848 3225 if self.params.get('force_write_download_archive'):
9e907ebd 3226 info_dict['__write_download_archive'] = True
a13e6848 3227
3228 # Make sure the info_dict was modified in-place
f46e2f9d 3229 assert info_dict is original_infodict
a13e6848 3230
c3e6ffba 3231 max_downloads = self.params.get('max_downloads')
3232 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3233 raise MaxDownloadsReached()
8222d8de 3234
aa9369a2 3235 def __download_wrapper(self, func):
3236 @functools.wraps(func)
3237 def wrapper(*args, **kwargs):
3238 try:
3239 res = func(*args, **kwargs)
3240 except UnavailableVideoError as e:
3241 self.report_error(e)
b222c271 3242 except MaxDownloadsReached as e:
aa9369a2 3243 self.to_screen(f'[info] {e}')
3244 raise
b222c271 3245 except DownloadCancelled as e:
3246 self.to_screen(f'[info] {e}')
3247 if not self.params.get('break_per_url'):
3248 raise
aa9369a2 3249 else:
3250 if self.params.get('dump_single_json', False):
3251 self.post_extract(res)
3252 self.to_stdout(json.dumps(self.sanitize_info(res)))
3253 return wrapper
3254
8222d8de
JMF
3255 def download(self, url_list):
3256 """Download a given list of URLs."""
aa9369a2 3257 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3258 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3259 if (len(url_list) > 1
3260 and outtmpl != '-'
3261 and '%' not in outtmpl
3262 and self.params.get('max_downloads') != 1):
acd69589 3263 raise SameFileError(outtmpl)
8222d8de
JMF
3264
3265 for url in url_list:
aa9369a2 3266 self.__download_wrapper(self.extract_info)(
3267 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3268
3269 return self._download_retcode
3270
1dcc4c0c 3271 def download_with_info_file(self, info_filename):
31bd3925
JMF
3272 with contextlib.closing(fileinput.FileInput(
3273 [info_filename], mode='r',
3274 openhook=fileinput.hook_encoded('utf-8'))) as f:
3275 # FileInput doesn't have a read method, we can't call json.load
8012d892 3276 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3277 try:
aa9369a2 3278 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3279 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3280 if not isinstance(e, EntryNotInPlaylist):
3281 self.to_stderr('\r')
d4943898
JMF
3282 webpage_url = info.get('webpage_url')
3283 if webpage_url is not None:
aa9369a2 3284 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3285 return self.download([webpage_url])
3286 else:
3287 raise
3288 return self._download_retcode
1dcc4c0c 3289
cb202fd2 3290 @staticmethod
8012d892 3291 def sanitize_info(info_dict, remove_private_keys=False):
3292 ''' Sanitize the infodict for converting to json '''
3ad56b42 3293 if info_dict is None:
3294 return info_dict
6e84b215 3295 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3296 info_dict.setdefault('_type', 'video')
09b49e1f 3297
8012d892 3298 if remove_private_keys:
09b49e1f 3299 reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
f46e2f9d 3300 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3301 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3302 }
ae8f99e6 3303 else:
09b49e1f 3304 reject = lambda k, v: False
adbc4ec4
THD
3305
3306 def filter_fn(obj):
3307 if isinstance(obj, dict):
3308 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3309 elif isinstance(obj, (list, tuple, set, LazyList)):
3310 return list(map(filter_fn, obj))
3311 elif obj is None or isinstance(obj, (str, int, float, bool)):
3312 return obj
3313 else:
3314 return repr(obj)
3315
5226731e 3316 return filter_fn(info_dict)
cb202fd2 3317
8012d892 3318 @staticmethod
3319 def filter_requested_info(info_dict, actually_filter=True):
3320 ''' Alias of sanitize_info for backward compatibility '''
3321 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3322
ed5835b4 3323 @staticmethod
3324 def post_extract(info_dict):
3325 def actual_post_extract(info_dict):
3326 if info_dict.get('_type') in ('playlist', 'multi_video'):
3327 for video_dict in info_dict.get('entries', {}):
3328 actual_post_extract(video_dict or {})
3329 return
3330
09b49e1f 3331 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3332 info_dict.update(post_extractor())
ed5835b4 3333
3334 actual_post_extract(info_dict or {})
3335
dcf64d43 3336 def run_pp(self, pp, infodict):
5bfa4862 3337 files_to_delete = []
dcf64d43 3338 if '__files_to_move' not in infodict:
3339 infodict['__files_to_move'] = {}
b1940459 3340 try:
3341 files_to_delete, infodict = pp.run(infodict)
3342 except PostProcessingError as e:
3343 # Must be True and not 'only_download'
3344 if self.params.get('ignoreerrors') is True:
3345 self.report_error(e)
3346 return infodict
3347 raise
3348
5bfa4862 3349 if not files_to_delete:
dcf64d43 3350 return infodict
5bfa4862 3351 if self.params.get('keepvideo', False):
3352 for f in files_to_delete:
dcf64d43 3353 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3354 else:
3355 for old_filename in set(files_to_delete):
3356 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3357 try:
3358 os.remove(encodeFilename(old_filename))
3359 except (IOError, OSError):
3360 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3361 if old_filename in infodict['__files_to_move']:
3362 del infodict['__files_to_move'][old_filename]
3363 return infodict
5bfa4862 3364
ed5835b4 3365 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3366 self._forceprint(key, info)
ed5835b4 3367 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3368 info = self.run_pp(pp, info)
ed5835b4 3369 return info
277d6ff5 3370
56d868db 3371 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3372 info = dict(ie_info)
56d868db 3373 info['__files_to_move'] = files_to_move or {}
ed5835b4 3374 info = self.run_all_pps(key, info)
56d868db 3375 return info, info.pop('__files_to_move', None)
5bfa4862 3376
f46e2f9d 3377 def post_process(self, filename, info, files_to_move=None):
8222d8de 3378 """Run all the postprocessors on the given file."""
8222d8de 3379 info['filepath'] = filename
dcf64d43 3380 info['__files_to_move'] = files_to_move or {}
ed5835b4 3381 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3382 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3383 del info['__files_to_move']
ed5835b4 3384 return self.run_all_pps('after_move', info)
c1c9a79c 3385
5db07df6 3386 def _make_archive_id(self, info_dict):
e9fef7ee
S
3387 video_id = info_dict.get('id')
3388 if not video_id:
3389 return
5db07df6
PH
3390 # Future-proof against any change in case
3391 # and backwards compatibility with prior versions
e9fef7ee 3392 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3393 if extractor is None:
1211bb6d
S
3394 url = str_or_none(info_dict.get('url'))
3395 if not url:
3396 return
e9fef7ee 3397 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3398 for ie_key, ie in self._ies.items():
1211bb6d 3399 if ie.suitable(url):
8b7491c8 3400 extractor = ie_key
e9fef7ee
S
3401 break
3402 else:
3403 return
d0757229 3404 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3405
3406 def in_download_archive(self, info_dict):
3407 fn = self.params.get('download_archive')
3408 if fn is None:
3409 return False
3410
3411 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3412 if not vid_id:
7012b23c 3413 return False # Incomplete video information
5db07df6 3414
a45e8619 3415 return vid_id in self.archive
c1c9a79c
PH
3416
3417 def record_download_archive(self, info_dict):
3418 fn = self.params.get('download_archive')
3419 if fn is None:
3420 return
5db07df6
PH
3421 vid_id = self._make_archive_id(info_dict)
3422 assert vid_id
a13e6848 3423 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3424 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3425 archive_file.write(vid_id + '\n')
a45e8619 3426 self.archive.add(vid_id)
dd82ffea 3427
8c51aa65 3428 @staticmethod
8abeeb94 3429 def format_resolution(format, default='unknown'):
9359f3d4 3430 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3431 return 'audio only'
f49d89ee
PH
3432 if format.get('resolution') is not None:
3433 return format['resolution']
35615307 3434 if format.get('width') and format.get('height'):
ff51ed58 3435 return '%dx%d' % (format['width'], format['height'])
35615307 3436 elif format.get('height'):
ff51ed58 3437 return '%sp' % format['height']
35615307 3438 elif format.get('width'):
ff51ed58 3439 return '%dx?' % format['width']
3440 return default
8c51aa65 3441
8130779d 3442 def _list_format_headers(self, *headers):
3443 if self.params.get('listformats_table', True) is not False:
3444 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3445 return headers
3446
c57f7757
PH
3447 def _format_note(self, fdict):
3448 res = ''
3449 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3450 res += '(unsupported)'
32f90364
PH
3451 if fdict.get('language'):
3452 if res:
3453 res += ' '
f304da8a 3454 res += '[%s]' % fdict['language']
c57f7757 3455 if fdict.get('format_note') is not None:
f304da8a 3456 if res:
3457 res += ' '
3458 res += fdict['format_note']
c57f7757 3459 if fdict.get('tbr') is not None:
f304da8a 3460 if res:
3461 res += ', '
3462 res += '%4dk' % fdict['tbr']
c57f7757
PH
3463 if fdict.get('container') is not None:
3464 if res:
3465 res += ', '
3466 res += '%s container' % fdict['container']
3089bc74
S
3467 if (fdict.get('vcodec') is not None
3468 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3469 if res:
3470 res += ', '
3471 res += fdict['vcodec']
91c7271a 3472 if fdict.get('vbr') is not None:
c57f7757
PH
3473 res += '@'
3474 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3475 res += 'video@'
3476 if fdict.get('vbr') is not None:
3477 res += '%4dk' % fdict['vbr']
fbb21cf5 3478 if fdict.get('fps') is not None:
5d583bdf
S
3479 if res:
3480 res += ', '
3481 res += '%sfps' % fdict['fps']
c57f7757
PH
3482 if fdict.get('acodec') is not None:
3483 if res:
3484 res += ', '
3485 if fdict['acodec'] == 'none':
3486 res += 'video only'
3487 else:
3488 res += '%-5s' % fdict['acodec']
3489 elif fdict.get('abr') is not None:
3490 if res:
3491 res += ', '
3492 res += 'audio'
3493 if fdict.get('abr') is not None:
3494 res += '@%3dk' % fdict['abr']
3495 if fdict.get('asr') is not None:
3496 res += ' (%5dHz)' % fdict['asr']
3497 if fdict.get('filesize') is not None:
3498 if res:
3499 res += ', '
3500 res += format_bytes(fdict['filesize'])
9732d77e
PH
3501 elif fdict.get('filesize_approx') is not None:
3502 if res:
3503 res += ', '
3504 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3505 return res
91c7271a 3506
8130779d 3507 def render_formats_table(self, info_dict):
b69fd25c 3508 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3509 return None
b69fd25c 3510
94badb25 3511 formats = info_dict.get('formats', [info_dict])
8130779d 3512 if not self.params.get('listformats_table', True) is not False:
76d321f6 3513 table = [
3514 [
3515 format_field(f, 'format_id'),
3516 format_field(f, 'ext'),
3517 self.format_resolution(f),
8130779d 3518 self._format_note(f)
3519 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3520 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3521
3522 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3523 table = [
3524 [
3525 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3526 format_field(f, 'ext'),
3527 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3528 format_field(f, 'fps', '\t%d'),
3529 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3530 delim,
3531 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3532 format_field(f, 'tbr', '\t%dk'),
3533 shorten_protocol_name(f.get('protocol', '')),
3534 delim,
3535 format_field(f, 'vcodec', default='unknown').replace(
3536 'none', 'images' if f.get('acodec') == 'none'
3537 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3538 format_field(f, 'vbr', '\t%dk'),
3539 format_field(f, 'acodec', default='unknown').replace(
3540 'none', '' if f.get('vcodec') == 'none'
3541 else self._format_screen('video only', self.Styles.SUPPRESS)),
3542 format_field(f, 'abr', '\t%dk'),
3543 format_field(f, 'asr', '\t%dHz'),
3544 join_nonempty(
3545 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3546 format_field(f, 'language', '[%s]'),
3547 join_nonempty(format_field(f, 'format_note'),
3548 format_field(f, 'container', ignore=(None, f.get('ext'))),
3549 delim=', '),
3550 delim=' '),
3551 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3552 header_line = self._list_format_headers(
3553 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3554 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3555
3556 return render_table(
3557 header_line, table, hide_empty=True,
3558 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3559
3560 def render_thumbnails_table(self, info_dict):
88f23a18 3561 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3562 if not thumbnails:
8130779d 3563 return None
3564 return render_table(
ec11a9f4 3565 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3566 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3567
8130779d 3568 def render_subtitles_table(self, video_id, subtitles):
2412044c 3569 def _row(lang, formats):
49c258e1 3570 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3571 if len(set(names)) == 1:
7aee40c1 3572 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3573 return [lang, ', '.join(names), ', '.join(exts)]
3574
8130779d 3575 if not subtitles:
3576 return None
3577 return render_table(
ec11a9f4 3578 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3579 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3580 hide_empty=True)
3581
3582 def __list_table(self, video_id, name, func, *args):
3583 table = func(*args)
3584 if not table:
3585 self.to_screen(f'{video_id} has no {name}')
3586 return
3587 self.to_screen(f'[info] Available {name} for {video_id}:')
3588 self.to_stdout(table)
3589
3590 def list_formats(self, info_dict):
3591 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3592
3593 def list_thumbnails(self, info_dict):
3594 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3595
3596 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3597 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3598
dca08720
PH
3599 def urlopen(self, req):
3600 """ Start an HTTP download """
82d8a8b6 3601 if isinstance(req, compat_basestring):
67dda517 3602 req = sanitized_Request(req)
19a41fc6 3603 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3604
3605 def print_debug_header(self):
3606 if not self.params.get('verbose'):
3607 return
49a57e70 3608
3609 def get_encoding(stream):
2a938746 3610 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3611 if not supports_terminal_sequences(stream):
e3c7d495 3612 from .compat import WINDOWS_VT_MODE
3613 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3614 return ret
3615
3616 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3617 locale.getpreferredencoding(),
3618 sys.getfilesystemencoding(),
cf4f42cb 3619 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3620 self.get_encoding())
883d4b1e 3621
3622 logger = self.params.get('logger')
3623 if logger:
3624 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3625 write_debug(encoding_str)
3626 else:
96565c7e 3627 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3628 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3629
4c88ff87 3630 source = detect_variant()
36eaf303 3631 write_debug(join_nonempty(
3632 'yt-dlp version', __version__,
3633 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3634 '' if source == 'unknown' else f'({source})',
3635 delim=' '))
6e21fdd2 3636 if not _LAZY_LOADER:
3637 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3638 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3639 else:
49a57e70 3640 write_debug('Lazy loading extractors is disabled')
3ae5e797 3641 if plugin_extractors or plugin_postprocessors:
49a57e70 3642 write_debug('Plugins: %s' % [
3ae5e797 3643 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3644 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3645 if self.params.get('compat_opts'):
49a57e70 3646 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3647
3648 if source == 'source':
dca08720 3649 try:
36eaf303 3650 sp = Popen(
3651 ['git', 'rev-parse', '--short', 'HEAD'],
3652 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3653 cwd=os.path.dirname(os.path.abspath(__file__)))
3654 out, err = sp.communicate_or_kill()
3655 out = out.decode().strip()
3656 if re.match('[0-9a-f]+', out):
3657 write_debug('Git HEAD: %s' % out)
70a1165b 3658 except Exception:
36eaf303 3659 try:
3660 sys.exc_clear()
3661 except Exception:
3662 pass
b300cda4
S
3663
3664 def python_implementation():
3665 impl_name = platform.python_implementation()
3666 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3667 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3668 return impl_name
3669
49a57e70 3670 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3671 platform.python_version(),
3672 python_implementation(),
3673 platform.architecture()[0],
b300cda4 3674 platform_name()))
d28b5171 3675
8913ef74 3676 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3677 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3678 if ffmpeg_features:
a4211baf 3679 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3680
4c83c967 3681 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3682 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3683 exe_str = ', '.join(
2831b468 3684 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3685 ) or 'none'
49a57e70 3686 write_debug('exe versions: %s' % exe_str)
dca08720 3687
2831b468 3688 from .downloader.websocket import has_websockets
3689 from .postprocessor.embedthumbnail import has_mutagen
f59f5ef8 3690 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
2831b468 3691
c586f9e8 3692 lib_str = join_nonempty(
4390d5ec 3693 compat_brotli and compat_brotli.__name__,
d5820461 3694 has_certifi and 'certifi',
edf65256 3695 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
f59f5ef8 3696 SECRETSTORAGE_AVAILABLE and 'secretstorage',
2831b468 3697 has_mutagen and 'mutagen',
3698 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3699 has_websockets and 'websockets',
3700 delim=', ') or 'none'
49a57e70 3701 write_debug('Optional libraries: %s' % lib_str)
2831b468 3702
dca08720
PH
3703 proxy_map = {}
3704 for handler in self._opener.handlers:
3705 if hasattr(handler, 'proxies'):
3706 proxy_map.update(handler.proxies)
49a57e70 3707 write_debug(f'Proxy map: {proxy_map}')
dca08720 3708
49a57e70 3709 # Not implemented
3710 if False and self.params.get('call_home'):
58b1f00d 3711 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3712 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3713 latest_version = self.urlopen(
3714 'https://yt-dl.org/latest/version').read().decode('utf-8')
3715 if version_tuple(latest_version) > version_tuple(__version__):
3716 self.report_warning(
3717 'You are using an outdated version (newest version: %s)! '
3718 'See https://yt-dl.org/update if you need help updating.' %
3719 latest_version)
3720
e344693b 3721 def _setup_opener(self):
6ad14cab 3722 timeout_val = self.params.get('socket_timeout')
17bddf3e 3723 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3724
982ee69a 3725 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3726 opts_cookiefile = self.params.get('cookiefile')
3727 opts_proxy = self.params.get('proxy')
3728
982ee69a 3729 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3730
6a3f4c3f 3731 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3732 if opts_proxy is not None:
3733 if opts_proxy == '':
3734 proxies = {}
3735 else:
3736 proxies = {'http': opts_proxy, 'https': opts_proxy}
3737 else:
3738 proxies = compat_urllib_request.getproxies()
067aa17e 3739 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3740 if 'http' in proxies and 'https' not in proxies:
3741 proxies['https'] = proxies['http']
91410c9b 3742 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3743
3744 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3745 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3746 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3747 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3748 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3749
3750 # When passing our own FileHandler instance, build_opener won't add the
3751 # default FileHandler and allows us to disable the file protocol, which
3752 # can be used for malicious purposes (see
067aa17e 3753 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3754 file_handler = compat_urllib_request.FileHandler()
3755
3756 def file_open(*args, **kwargs):
7a5c1cfe 3757 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3758 file_handler.file_open = file_open
3759
3760 opener = compat_urllib_request.build_opener(
fca6dba8 3761 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3762
dca08720
PH
3763 # Delete the default user-agent header, which would otherwise apply in
3764 # cases where our custom HTTP handler doesn't come into play
067aa17e 3765 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3766 opener.addheaders = []
3767 self._opener = opener
62fec3b2
PH
3768
3769 def encode(self, s):
3770 if isinstance(s, bytes):
3771 return s # Already encoded
3772
3773 try:
3774 return s.encode(self.get_encoding())
3775 except UnicodeEncodeError as err:
3776 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3777 raise
3778
3779 def get_encoding(self):
3780 encoding = self.params.get('encoding')
3781 if encoding is None:
3782 encoding = preferredencoding()
3783 return encoding
ec82d85a 3784
e08a85d8 3785 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
80c03fa9 3786 ''' Write infojson and returns True = written, False = skip, None = error '''
e08a85d8 3787 if overwrite is None:
3788 overwrite = self.params.get('overwrites', True)
80c03fa9 3789 if not self.params.get('writeinfojson'):
3790 return False
3791 elif not infofn:
3792 self.write_debug(f'Skipping writing {label} infojson')
3793 return False
3794 elif not self._ensure_dir_exists(infofn):
3795 return None
e08a85d8 3796 elif not overwrite and os.path.exists(infofn):
80c03fa9 3797 self.to_screen(f'[info] {label.title()} metadata is already present')
3798 else:
3799 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3800 try:
3801 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3802 except (OSError, IOError):
3803 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3804 return None
3805 return True
3806
3807 def _write_description(self, label, ie_result, descfn):
3808 ''' Write description and returns True = written, False = skip, None = error '''
3809 if not self.params.get('writedescription'):
3810 return False
3811 elif not descfn:
3812 self.write_debug(f'Skipping writing {label} description')
3813 return False
3814 elif not self._ensure_dir_exists(descfn):
3815 return None
3816 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3817 self.to_screen(f'[info] {label.title()} description is already present')
3818 elif ie_result.get('description') is None:
3819 self.report_warning(f'There\'s no {label} description to write')
3820 return False
3821 else:
3822 try:
3823 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3824 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3825 descfile.write(ie_result['description'])
3826 except (OSError, IOError):
3827 self.report_error(f'Cannot write {label} description file {descfn}')
3828 return None
3829 return True
3830
3831 def _write_subtitles(self, info_dict, filename):
3832 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3833 ret = []
3834 subtitles = info_dict.get('requested_subtitles')
3835 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3836 # subtitles download errors are already managed as troubles in relevant IE
3837 # that way it will silently go on when used with unsupporting IE
3838 return ret
3839
3840 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3841 if not sub_filename_base:
3842 self.to_screen('[info] Skipping writing video subtitles')
3843 return ret
3844 for sub_lang, sub_info in subtitles.items():
3845 sub_format = sub_info['ext']
3846 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3847 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3848 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3849 if existing_sub:
80c03fa9 3850 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3851 sub_info['filepath'] = existing_sub
3852 ret.append((existing_sub, sub_filename_final))
80c03fa9 3853 continue
3854
3855 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3856 if sub_info.get('data') is not None:
3857 try:
3858 # Use newline='' to prevent conversion of newline characters
3859 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3860 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3861 subfile.write(sub_info['data'])
3862 sub_info['filepath'] = sub_filename
3863 ret.append((sub_filename, sub_filename_final))
3864 continue
3865 except (OSError, IOError):
3866 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3867 return None
3868
3869 try:
3870 sub_copy = sub_info.copy()
3871 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3872 self.dl(sub_filename, sub_copy, subtitle=True)
3873 sub_info['filepath'] = sub_filename
3874 ret.append((sub_filename, sub_filename_final))
6020e05d 3875 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3876 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3877 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3878 if not self.params.get('ignoreerrors'):
3879 self.report_error(msg)
3880 raise DownloadError(msg)
3881 self.report_warning(msg)
519804a9 3882 return ret
80c03fa9 3883
3884 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3885 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3886 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3887 thumbnails, ret = [], []
6c4fd172 3888 if write_all or self.params.get('writethumbnail', False):
0202b52a 3889 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3890 multiple = write_all and len(thumbnails) > 1
ec82d85a 3891
80c03fa9 3892 if thumb_filename_base is None:
3893 thumb_filename_base = filename
3894 if thumbnails and not thumb_filename_base:
3895 self.write_debug(f'Skipping writing {label} thumbnail')
3896 return ret
3897
dd0228ce 3898 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3899 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3900 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3901 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3902 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3903
e04938ab 3904 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3905 if existing_thumb:
aa9369a2 3906 self.to_screen('[info] %s is already present' % (
3907 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3908 t['filepath'] = existing_thumb
3909 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3910 else:
80c03fa9 3911 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3912 try:
297e9952 3913 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3914 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3915 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3916 shutil.copyfileobj(uf, thumbf)
80c03fa9 3917 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3918 t['filepath'] = thumb_filename
3158150c 3919 except network_exceptions as err:
dd0228ce 3920 thumbnails.pop(idx)
80c03fa9 3921 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3922 if ret and not write_all:
3923 break
0202b52a 3924 return ret