]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[VideoConvertor] Ensure all streams are copied
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
4390d5ec 35 compat_brotli,
003c69a8 36 compat_get_terminal_size,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
edf65256 40 compat_pycrypto_AES,
7d1eb38a 41 compat_shlex_quote,
ce02ed60 42 compat_str,
67134eab 43 compat_tokenize_tokenize,
ce02ed60
PH
44 compat_urllib_error,
45 compat_urllib_request,
8b172c2e 46 compat_urllib_request_DataHandler,
819e0531 47 windows_enable_vt_mode,
8c25f81b 48)
982ee69a 49from .cookies import load_cookies
8c25f81b 50from .utils import (
eedb7ba5
S
51 age_restricted,
52 args_to_str,
ce02ed60
PH
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
acd69589 56 DEFAULT_OUTTMPL,
ce02ed60 57 determine_ext,
b5559424 58 determine_protocol,
48f79687 59 DownloadCancelled,
ce02ed60 60 DownloadError,
c0384f22 61 encode_compat_str,
ce02ed60 62 encodeFilename,
498f5606 63 EntryNotInPlaylist,
a06916d9 64 error_to_compat_str,
8b0d7497 65 ExistingVideoReached,
590bc6f6 66 expand_path,
ce02ed60 67 ExtractorError,
e29663c6 68 float_or_none,
02dbf93f 69 format_bytes,
76d321f6 70 format_field,
e0fd9573 71 format_decimal_suffix,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
0bb322b9 74 get_domain,
d5820461 75 has_certifi,
b0249bca 76 HEADRequest,
d37707bd 77 InAdvancePagedList,
c9969434 78 int_or_none,
732044af 79 iri_to_uri,
773f291d 80 ISO3166Utils,
34921b43 81 join_nonempty,
56a8fb4f 82 LazyList,
08438d2c 83 LINK_TEMPLATES,
ce02ed60 84 locked_file,
0202b52a 85 make_dir,
dca08720 86 make_HTTPS_handler,
ce02ed60 87 MaxDownloadsReached,
8b7539d2 88 merge_headers,
3158150c 89 network_exceptions,
ec11a9f4 90 number_of_digits,
cd6fc19e 91 orderedSet,
a06916d9 92 OUTTMPL_TYPES,
b7ab0590 93 PagedList,
083c9df9 94 parse_filesize,
91410c9b 95 PerRequestProxyHandler,
dca08720 96 platform_name,
d3c93ec2 97 Popen,
1e43a6f7 98 POSTPROCESS_WHEN,
eedb7ba5 99 PostProcessingError,
ce02ed60 100 preferredencoding,
eedb7ba5 101 prepend_extension,
f2ebc5c7 102 ReExtractInfo,
51fb4995 103 register_socks_protocols,
a06916d9 104 RejectedVideoReached,
3efb96a6 105 remove_terminal_sequences,
cfb56d1a 106 render_table,
eedb7ba5 107 replace_extension,
ce02ed60
PH
108 SameFileError,
109 sanitize_filename,
1bb5c511 110 sanitize_path,
dcf77cf1 111 sanitize_url,
67dda517 112 sanitized_Request,
e5660ee6 113 std_headers,
819e0531 114 STR_FORMAT_RE_TMPL,
115 STR_FORMAT_TYPES,
1211bb6d 116 str_or_none,
e29663c6 117 strftime_or_none,
ce02ed60 118 subtitles_filename,
819e0531 119 supports_terminal_sequences,
f2ebc5c7 120 timetuple_from_msec,
732044af 121 to_high_limit_path,
324ad820 122 traverse_obj,
6033d980 123 try_get,
ce02ed60 124 UnavailableVideoError,
29eb5174 125 url_basename,
7d1eb38a 126 variadic,
58b1f00d 127 version_tuple,
ce02ed60
PH
128 write_json_file,
129 write_string,
6a3f4c3f 130 YoutubeDLCookieProcessor,
dca08720 131 YoutubeDLHandler,
fca6dba8 132 YoutubeDLRedirectHandler,
ce02ed60 133)
a0e07d31 134from .cache import Cache
ec11a9f4 135from .minicurses import format_text
52a8a1e1 136from .extractor import (
137 gen_extractor_classes,
138 get_info_extractor,
139 _LAZY_LOADER,
3ae5e797 140 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 141)
4c54b89e 142from .extractor.openload import PhantomJSwrapper
52a8a1e1 143from .downloader import (
dbf5416a 144 FFmpegFD,
52a8a1e1 145 get_suitable_downloader,
146 shorten_protocol_name
147)
4c83c967 148from .downloader.rtmp import rtmpdump_version
4f026faf 149from .postprocessor import (
e36d50c5 150 get_postprocessor,
4e3b637d 151 EmbedThumbnailPP,
adbc4ec4 152 FFmpegFixupDuplicateMoovPP,
e36d50c5 153 FFmpegFixupDurationPP,
f17f8651 154 FFmpegFixupM3u8PP,
62cd676c 155 FFmpegFixupM4aPP,
6271f1ca 156 FFmpegFixupStretchedPP,
e36d50c5 157 FFmpegFixupTimestampPP,
4f026faf
PH
158 FFmpegMergerPP,
159 FFmpegPostProcessor,
0202b52a 160 MoveFilesAfterDownloadPP,
3ae5e797 161 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 162)
4c88ff87 163from .update import detect_variant
36eaf303 164from .version import __version__, RELEASE_GIT_HEAD
8222d8de 165
e9c0cdd3
YCH
166if compat_os_name == 'nt':
167 import ctypes
168
2459b6e1 169
8222d8de
JMF
170class YoutubeDL(object):
171 """YoutubeDL class.
172
173 YoutubeDL objects are the ones responsible of downloading the
174 actual video file and writing it to disk if the user has requested
175 it, among some other tasks. In most cases there should be one per
176 program. As, given a video URL, the downloader doesn't know how to
177 extract all the needed information, task that InfoExtractors do, it
178 has to pass the URL to one of them.
179
180 For this, YoutubeDL objects have a method that allows
181 InfoExtractors to be registered in a given order. When it is passed
182 a URL, the YoutubeDL object handles it to the first InfoExtractor it
183 finds that reports being able to handle it. The InfoExtractor extracts
184 all the information about the video or videos the URL refers to, and
185 YoutubeDL process the extracted information, possibly using a File
186 Downloader to download the video.
187
188 YoutubeDL objects accept a lot of parameters. In order not to saturate
189 the object constructor with arguments, it receives a dictionary of
190 options instead. These options are available through the params
191 attribute for the InfoExtractors to use. The YoutubeDL also
192 registers itself as the downloader in charge for the InfoExtractors
193 that are added to it, so this is a "mutual registration".
194
195 Available options:
196
197 username: Username for authentication purposes.
198 password: Password for authentication purposes.
180940e0 199 videopassword: Password for accessing a video.
1da50aa3
S
200 ap_mso: Adobe Pass multiple-system operator identifier.
201 ap_username: Multiple-system operator account username.
202 ap_password: Multiple-system operator account password.
8222d8de
JMF
203 usenetrc: Use netrc for authentication instead.
204 verbose: Print additional info to stdout.
205 quiet: Do not print messages to stdout.
ad8915b7 206 no_warnings: Do not print out anything for warnings.
bb66c247 207 forceprint: A dict with keys WHEN mapped to a list of templates to
208 print to stdout. The allowed keys are video or any of the
209 items in utils.POSTPROCESS_WHEN.
ca30f449 210 For compatibility, a single list is also accepted
bb66c247 211 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
212 a list of tuples with (template, filename)
53c18592 213 forceurl: Force printing final URL. (Deprecated)
214 forcetitle: Force printing title. (Deprecated)
215 forceid: Force printing ID. (Deprecated)
216 forcethumbnail: Force printing thumbnail URL. (Deprecated)
217 forcedescription: Force printing description. (Deprecated)
218 forcefilename: Force printing final filename. (Deprecated)
219 forceduration: Force printing duration. (Deprecated)
8694c600 220 forcejson: Force printing info_dict as JSON.
63e0be34
PH
221 dump_single_json: Force printing the info_dict of the whole playlist
222 (or video) as a single JSON line.
c25228e5 223 force_write_download_archive: Force writing download archive regardless
224 of 'skip_download' or 'simulate'.
b7b04c78 225 simulate: Do not download the video files. If unset (or None),
226 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 227 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 228 You can also pass a function. The function takes 'ctx' as
229 argument and returns the formats to download.
230 See "build_format_selector" for an implementation
63ad4d43 231 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 232 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
233 extracting metadata even if the video is not actually
234 available for download (experimental)
0930b11f 235 format_sort: A list of fields by which to sort the video formats.
236 See "Sorting Formats" for more details.
c25228e5 237 format_sort_force: Force the given format_sort. see "Sorting Formats"
238 for more details.
08d30158 239 prefer_free_formats: Whether to prefer video formats with free containers
240 over non-free ones of same quality.
c25228e5 241 allow_multiple_video_streams: Allow multiple video streams to be merged
242 into a single file
243 allow_multiple_audio_streams: Allow multiple audio streams to be merged
244 into a single file
0ba692ac 245 check_formats Whether to test if the formats are downloadable.
9f1a1c36 246 Can be True (check all), False (check none),
247 'selected' (check selected formats),
0ba692ac 248 or None (check only if requested by extractor)
4524baf0 249 paths: Dictionary of output paths. The allowed keys are 'home'
250 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 251 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 252 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 253 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
254 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
255 restrictfilenames: Do not allow "&" and spaces in file names
256 trim_file_name: Limit length of filename (extension excluded)
4524baf0 257 windowsfilenames: Force the filenames to be windows compatible
b1940459 258 ignoreerrors: Do not stop on download/postprocessing errors.
259 Can be 'only_download' to ignore only download errors.
260 Default is 'only_download' for CLI, but False for API
26e2805c 261 skip_playlist_after_errors: Number of allowed failures until the rest of
262 the playlist is skipped
d22dec74 263 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 264 overwrites: Overwrite all video and metadata files if True,
265 overwrite only non-video files if None
266 and don't overwrite any file if False
34488702 267 For compatibility with youtube-dl,
268 "nooverwrites" may also be used instead
8222d8de
JMF
269 playliststart: Playlist item to start at.
270 playlistend: Playlist item to end at.
c14e88f0 271 playlist_items: Specific indices of playlist to download.
ff815fe6 272 playlistreverse: Download playlist items in reverse order.
75822ca7 273 playlistrandom: Download playlist items in random order.
8222d8de
JMF
274 matchtitle: Download only matching titles.
275 rejecttitle: Reject downloads for matching titles.
8bf9319e 276 logger: Log messages to a logging.Logger instance.
8222d8de 277 logtostderr: Log messages to stderr instead of stdout.
819e0531 278 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
279 writedescription: Write the video description to a .description file
280 writeinfojson: Write the video description to a .info.json file
75d43ca0 281 clean_infojson: Remove private fields from the infojson
34488702 282 getcomments: Extract video comments. This will not be written to disk
06167fbb 283 unless writeinfojson is also given
1fb07d10 284 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 285 writethumbnail: Write the thumbnail image to a file
c25228e5 286 allow_playlist_files: Whether to write playlists' description, infojson etc
287 also to disk when using the 'write*' options
ec82d85a 288 write_all_thumbnails: Write all thumbnail formats to files
732044af 289 writelink: Write an internet shortcut file, depending on the
290 current platform (.url/.webloc/.desktop)
291 writeurllink: Write a Windows internet shortcut file (.url)
292 writewebloclink: Write a macOS internet shortcut file (.webloc)
293 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 294 writesubtitles: Write the video subtitles to a file
741dd8ea 295 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 296 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 297 Downloads all the subtitles of the video
0b7f3118 298 (requires writesubtitles or writeautomaticsub)
8222d8de 299 listsubtitles: Lists all available subtitles for the video
a504ced0 300 subtitlesformat: The format code for subtitles
c32b0aab 301 subtitleslangs: List of languages of the subtitles to download (can be regex).
302 The list may contain "all" to refer to all the available
303 subtitles. The language can be prefixed with a "-" to
304 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
305 keepvideo: Keep the video file after post-processing
306 daterange: A DateRange object, download only if the upload_date is in the range.
307 skip_download: Skip the actual download of the video file
c35f9e72 308 cachedir: Location of the cache files in the filesystem.
a0e07d31 309 False to disable filesystem cache.
47192f92 310 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
311 age_limit: An integer representing the user's age in years.
312 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
313 min_views: An integer representing the minimum view count the video
314 must have in order to not be skipped.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 max_views: An integer representing the maximum view count.
318 Videos that are more popular than that are not
319 downloaded.
320 Videos without view count information are always
321 downloaded. None for no limit.
322 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
323 Videos already present in the file are not downloaded
324 again.
8a51f564 325 break_on_existing: Stop the download process after attempting to download a
326 file that is in the archive.
327 break_on_reject: Stop the download process when encountering a video that
328 has been filtered out.
b222c271 329 break_per_url: Whether break_on_reject and break_on_existing
330 should act on each input URL as opposed to for the entire queue
8a51f564 331 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
332 cookiesfrombrowser: A tuple containing the name of the browser, the profile
333 name/pathfrom where cookies are loaded, and the name of the
334 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 335 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
336 support RFC 5746 secure renegotiation
f59f5ef8 337 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
338 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
339 At the moment, this is only supported by YouTube.
8b7539d2 340 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 341 proxy: URL of the proxy server to use
38cce791 342 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 343 on geo-restricted sites.
e344693b 344 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
345 bidi_workaround: Work around buggy terminals without bidirectional text
346 support, using fridibi
a0ddb8a2 347 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 348 include_ads: Download ads as well (deprecated)
04b4d394
PH
349 default_search: Prepend this string if an input url is not valid.
350 'auto' for elaborate guessing
62fec3b2 351 encoding: Use this encoding instead of the system-specified.
e8ee972c 352 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
353 Pass in 'in_playlist' to only show this behavior for
354 playlist items.
f2ebc5c7 355 wait_for_video: If given, wait for scheduled streams to become available.
356 The value should be a tuple containing the range
357 (min_secs, max_secs) to wait between retries
4f026faf 358 postprocessors: A list of dictionaries, each with an entry
71b640cc 359 * key: The name of the postprocessor. See
7a5c1cfe 360 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 361 * when: When to run the postprocessor. Allowed values are
362 the entries of utils.POSTPROCESS_WHEN
56d868db 363 Assumed to be 'post_process' if not given
b5ae35ee 364 post_hooks: Deprecated - Register a custom postprocessor instead
365 A list of functions that get called as the final step
ab8e5e51
AM
366 for each video file, after all postprocessors have been
367 called. The filename will be passed as the only argument.
71b640cc
PH
368 progress_hooks: A list of functions that get called on download
369 progress, with a dictionary with the entries
5cda4eda 370 * status: One of "downloading", "error", or "finished".
ee69b99a 371 Check this first and ignore unknown values.
3ba7740d 372 * info_dict: The extracted info_dict
71b640cc 373
5cda4eda 374 If status is one of "downloading", or "finished", the
ee69b99a
PH
375 following properties may also be present:
376 * filename: The final filename (always present)
5cda4eda 377 * tmpfilename: The filename we're currently writing to
71b640cc
PH
378 * downloaded_bytes: Bytes on disk
379 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
380 * total_bytes_estimate: Guess of the eventual file size,
381 None if unavailable.
382 * elapsed: The number of seconds since download started.
71b640cc
PH
383 * eta: The estimated time in seconds, None if unknown
384 * speed: The download speed in bytes/second, None if
385 unknown
5cda4eda
PH
386 * fragment_index: The counter of the currently
387 downloaded video fragment.
388 * fragment_count: The number of fragments (= individual
389 files that will be merged)
71b640cc
PH
390
391 Progress hooks are guaranteed to be called at least once
392 (with status "finished") if the download is successful.
819e0531 393 postprocessor_hooks: A list of functions that get called on postprocessing
394 progress, with a dictionary with the entries
395 * status: One of "started", "processing", or "finished".
396 Check this first and ignore unknown values.
397 * postprocessor: Name of the postprocessor
398 * info_dict: The extracted info_dict
399
400 Progress hooks are guaranteed to be called at least twice
401 (with status "started" and "finished") if the processing is successful.
45598f15 402 merge_output_format: Extension to use when merging formats.
6b591b29 403 final_ext: Expected final extension; used to detect when the file was
59a7a13e 404 already downloaded and converted
6271f1ca
PH
405 fixup: Automatically correct known faults of the file.
406 One of:
407 - "never": do nothing
408 - "warn": only emit a warning
409 - "detect_or_warn": check whether we can do anything
62cd676c 410 about it, warn otherwise (default)
504f20dd 411 source_address: Client-side IP address to bind to.
6ec6cb4e 412 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 413 yt-dlp servers for debugging. (BROKEN)
1cf376f5 414 sleep_interval_requests: Number of seconds to sleep between requests
415 during extraction
7aa589a5
S
416 sleep_interval: Number of seconds to sleep before each download when
417 used alone or a lower bound of a range for randomized
418 sleep before each download (minimum possible number
419 of seconds to sleep) when used along with
420 max_sleep_interval.
421 max_sleep_interval:Upper bound of a range for randomized sleep before each
422 download (maximum possible number of seconds to sleep).
423 Must only be used along with sleep_interval.
424 Actual sleep time will be a random float from range
425 [sleep_interval; max_sleep_interval].
1cf376f5 426 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
427 listformats: Print an overview of available video formats and exit.
428 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
429 match_filter: A function that gets called with the info_dict of
430 every video.
431 If it returns a message, the video is ignored.
432 If it returns None, the video is downloaded.
433 match_filter_func in utils.py is one example for this.
7e5db8c9 434 no_color: Do not emit color codes in output.
0a840f58 435 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 436 HTTP header
0a840f58 437 geo_bypass_country:
773f291d
S
438 Two-letter ISO 3166-2 country code that will be used for
439 explicit geographic restriction bypassing via faking
504f20dd 440 X-Forwarded-For HTTP header
5f95927a
S
441 geo_bypass_ip_block:
442 IP range in CIDR notation that will be used similarly to
504f20dd 443 geo_bypass_country
71b640cc 444
85729c51 445 The following options determine which downloader is picked:
52a8a1e1 446 external_downloader: A dictionary of protocol keys and the executable of the
447 external downloader to use for it. The allowed protocols
448 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
449 Set the value to 'native' to use the native downloader
450 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
451 or {'m3u8': 'ffmpeg'} instead.
452 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
453 if True, otherwise use ffmpeg/avconv if False, otherwise
454 use downloader suggested by extractor if None.
53ed7066 455 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 456 The following options do not work when used through the API:
b5ae35ee 457 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 458 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 459 Refer __init__.py for their implementation
819e0531 460 progress_template: Dictionary of templates for progress outputs.
461 Allowed keys are 'download', 'postprocess',
462 'download-title' (console title) and 'postprocess-title'.
463 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 464
8222d8de 465 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 466 the downloader (see yt_dlp/downloader/common.py):
51d9739f 467 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
468 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
469 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 470 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
471
472 The following options are used by the post processors:
d4a24f40 473 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 474 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
475 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
476 to the binary or its containing directory.
43820c03 477 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 478 and a list of additional command-line arguments for the
479 postprocessor/executable. The dict can also have "PP+EXE" keys
480 which are used when the given exe is used by the given PP.
481 Use 'default' as the name for arguments to passed to all PP
482 For compatibility with youtube-dl, a single list of args
483 can also be used
e409895f 484
485 The following options are used by the extractors:
62bff2c1 486 extractor_retries: Number of times to retry for known errors
487 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 488 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 489 discontinuities such as ad breaks (default: False)
5d3a0e79 490 extractor_args: A dictionary of arguments to be passed to the extractors.
491 See "EXTRACTOR ARGUMENTS" for details.
492 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 493 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 494 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
495 If True (default), DASH manifests and related
62bff2c1 496 data will be downloaded and processed by extractor.
497 You can reduce network I/O by disabling it if you don't
498 care about DASH. (only for youtube)
5d3a0e79 499 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
500 If True (default), HLS manifests and related
62bff2c1 501 data will be downloaded and processed by extractor.
502 You can reduce network I/O by disabling it if you don't
503 care about HLS. (only for youtube)
8222d8de
JMF
504 """
505
c9969434
S
506 _NUMERIC_FIELDS = set((
507 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 508 'timestamp', 'release_timestamp',
c9969434
S
509 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
510 'average_rating', 'comment_count', 'age_limit',
511 'start_time', 'end_time',
512 'chapter_number', 'season_number', 'episode_number',
513 'track_number', 'disc_number', 'release_year',
c9969434
S
514 ))
515
6db9c4d5 516 _format_fields = {
517 # NB: Keep in sync with the docstring of extractor/common.py
518 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
519 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
520 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
521 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
522 'preference', 'language', 'language_preference', 'quality', 'source_preference',
523 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
524 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
525 }
48ee10ee 526 _format_selection_exts = {
527 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
528 'video': {'mp4', 'flv', 'webm', '3gp'},
529 'storyboards': {'mhtml'},
530 }
531
3511266b 532 def __init__(self, params=None, auto_init=True):
883d4b1e 533 """Create a FileDownloader object with the given options.
534 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 535 Set to 'no_verbose_header' to not print the header
883d4b1e 536 """
e9f9a10f
JMF
537 if params is None:
538 params = {}
592b7485 539 self.params = params
8b7491c8 540 self._ies = {}
56c73665 541 self._ies_instances = {}
1e43a6f7 542 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 543 self._printed_messages = set()
1cf376f5 544 self._first_webpage_request = True
ab8e5e51 545 self._post_hooks = []
933605d7 546 self._progress_hooks = []
819e0531 547 self._postprocessor_hooks = []
8222d8de
JMF
548 self._download_retcode = 0
549 self._num_downloads = 0
9c906919 550 self._num_videos = 0
592b7485 551 self._playlist_level = 0
552 self._playlist_urls = set()
a0e07d31 553 self.cache = Cache(self)
34308b30 554
819e0531 555 windows_enable_vt_mode()
cf4f42cb 556 self._out_files = {
557 'error': sys.stderr,
558 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
559 'console': None if compat_os_name == 'nt' else next(
560 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
561 }
562 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 563 self._allow_colors = {
cf4f42cb 564 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
565 for type_ in ('screen', 'error')
ec11a9f4 566 }
819e0531 567
a61f4b28 568 if sys.version_info < (3, 6):
569 self.report_warning(
0181adef 570 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 571
88acdbc2 572 if self.params.get('allow_unplayable_formats'):
573 self.report_warning(
ec11a9f4 574 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 575 'This is a developer option intended for debugging. \n'
576 ' If you experience any issues while using this option, '
ec11a9f4 577 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 578
be5df5ee
S
579 def check_deprecated(param, option, suggestion):
580 if self.params.get(param) is not None:
53ed7066 581 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
582 return True
583 return False
584
585 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
586 if self.params.get('geo_verification_proxy') is None:
587 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
588
0d1bb027 589 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
590 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 591 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 592
49a57e70 593 for msg in self.params.get('_warnings', []):
0d1bb027 594 self.report_warning(msg)
ee8dd27a 595 for msg in self.params.get('_deprecation_warnings', []):
596 self.deprecation_warning(msg)
0d1bb027 597
ec11a9f4 598 if 'list-formats' in self.params.get('compat_opts', []):
599 self.params['listformats_table'] = False
600
b5ae35ee 601 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 602 # nooverwrites was unnecessarily changed to overwrites
603 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
604 # This ensures compatibility with both keys
605 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 606 elif self.params.get('overwrites') is None:
607 self.params.pop('overwrites', None)
b868936c 608 else:
609 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 610
455a15e2 611 self.params.setdefault('forceprint', {})
612 self.params.setdefault('print_to_file', {})
bb66c247 613
614 # Compatibility with older syntax
ca30f449 615 if not isinstance(params['forceprint'], dict):
455a15e2 616 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 617
455a15e2 618 if self.params.get('bidi_workaround', False):
1c088fa8
PH
619 try:
620 import pty
621 master, slave = pty.openpty()
003c69a8 622 width = compat_get_terminal_size().columns
1c088fa8
PH
623 if width is None:
624 width_args = []
625 else:
626 width_args = ['-w', str(width)]
5d681e96 627 sp_kwargs = dict(
1c088fa8
PH
628 stdin=subprocess.PIPE,
629 stdout=slave,
cf4f42cb 630 stderr=self._out_files['error'])
5d681e96 631 try:
d3c93ec2 632 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 633 except OSError:
d3c93ec2 634 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 635 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 636 except OSError as ose:
66e7ace1 637 if ose.errno == errno.ENOENT:
49a57e70 638 self.report_warning(
639 'Could not find fribidi executable, ignoring --bidi-workaround. '
640 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
641 else:
642 raise
0783b09b 643
3089bc74
S
644 if (sys.platform != 'win32'
645 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 646 and not self.params.get('restrictfilenames', False)):
e9137224 647 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 648 self.report_warning(
6febd1c1 649 'Assuming --restrict-filenames since file system encoding '
1b725173 650 'cannot encode all characters. '
6febd1c1 651 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 652 self.params['restrictfilenames'] = True
34308b30 653
de6000d9 654 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 655
187986a8 656 # Creating format selector here allows us to catch syntax errors before the extraction
657 self.format_selector = (
fa9f30b8 658 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 659 else self.params['format'] if callable(self.params['format'])
187986a8 660 else self.build_format_selector(self.params['format']))
661
8b7539d2 662 # Set http_headers defaults according to std_headers
663 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
664
dca08720
PH
665 self._setup_opener()
666
3511266b 667 if auto_init:
883d4b1e 668 if auto_init != 'no_verbose_header':
669 self.print_debug_header()
3511266b
PH
670 self.add_default_info_extractors()
671
013b50b7 672 hooks = {
673 'post_hooks': self.add_post_hook,
674 'progress_hooks': self.add_progress_hook,
675 'postprocessor_hooks': self.add_postprocessor_hook,
676 }
677 for opt, fn in hooks.items():
678 for ph in self.params.get(opt, []):
679 fn(ph)
71b640cc 680
5bfc8bee 681 for pp_def_raw in self.params.get('postprocessors', []):
682 pp_def = dict(pp_def_raw)
683 when = pp_def.pop('when', 'post_process')
684 self.add_post_processor(
685 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
686 when=when)
687
51fb4995
YCH
688 register_socks_protocols()
689
ed39cac5 690 def preload_download_archive(fn):
691 """Preload the archive, if any is specified"""
692 if fn is None:
693 return False
49a57e70 694 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 695 try:
696 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
697 for line in archive_file:
698 self.archive.add(line.strip())
699 except IOError as ioe:
700 if ioe.errno != errno.ENOENT:
701 raise
702 return False
703 return True
704
705 self.archive = set()
706 preload_download_archive(self.params.get('download_archive'))
707
7d4111ed
PH
708 def warn_if_short_id(self, argv):
709 # short YouTube ID starting with dash?
710 idxs = [
711 i for i, a in enumerate(argv)
712 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
713 if idxs:
714 correct_argv = (
7a5c1cfe 715 ['yt-dlp']
3089bc74
S
716 + [a for i, a in enumerate(argv) if i not in idxs]
717 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
718 )
719 self.report_warning(
720 'Long argument string detected. '
49a57e70 721 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
722 args_to_str(correct_argv))
723
8222d8de
JMF
724 def add_info_extractor(self, ie):
725 """Add an InfoExtractor object to the end of the list."""
8b7491c8 726 ie_key = ie.ie_key()
727 self._ies[ie_key] = ie
e52d7f85 728 if not isinstance(ie, type):
8b7491c8 729 self._ies_instances[ie_key] = ie
e52d7f85 730 ie.set_downloader(self)
8222d8de 731
8b7491c8 732 def _get_info_extractor_class(self, ie_key):
733 ie = self._ies.get(ie_key)
734 if ie is None:
735 ie = get_info_extractor(ie_key)
736 self.add_info_extractor(ie)
737 return ie
738
56c73665
JMF
739 def get_info_extractor(self, ie_key):
740 """
741 Get an instance of an IE with name ie_key, it will try to get one from
742 the _ies list, if there's no instance it will create a new one and add
743 it to the extractor list.
744 """
745 ie = self._ies_instances.get(ie_key)
746 if ie is None:
747 ie = get_info_extractor(ie_key)()
748 self.add_info_extractor(ie)
749 return ie
750
023fa8c4
JMF
751 def add_default_info_extractors(self):
752 """
753 Add the InfoExtractors returned by gen_extractors to the end of the list
754 """
e52d7f85 755 for ie in gen_extractor_classes():
023fa8c4
JMF
756 self.add_info_extractor(ie)
757
56d868db 758 def add_post_processor(self, pp, when='post_process'):
8222d8de 759 """Add a PostProcessor object to the end of the chain."""
5bfa4862 760 self._pps[when].append(pp)
8222d8de
JMF
761 pp.set_downloader(self)
762
ab8e5e51
AM
763 def add_post_hook(self, ph):
764 """Add the post hook"""
765 self._post_hooks.append(ph)
766
933605d7 767 def add_progress_hook(self, ph):
819e0531 768 """Add the download progress hook"""
933605d7 769 self._progress_hooks.append(ph)
8ab470f1 770
819e0531 771 def add_postprocessor_hook(self, ph):
772 """Add the postprocessing progress hook"""
773 self._postprocessor_hooks.append(ph)
5bfc8bee 774 for pps in self._pps.values():
775 for pp in pps:
776 pp.add_progress_hook(ph)
819e0531 777
1c088fa8 778 def _bidi_workaround(self, message):
5d681e96 779 if not hasattr(self, '_output_channel'):
1c088fa8
PH
780 return message
781
5d681e96 782 assert hasattr(self, '_output_process')
11b85ce6 783 assert isinstance(message, compat_str)
6febd1c1
PH
784 line_count = message.count('\n') + 1
785 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 786 self._output_process.stdin.flush()
6febd1c1 787 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 788 for _ in range(line_count))
6febd1c1 789 return res[:-len('\n')]
1c088fa8 790
b35496d8 791 def _write_string(self, message, out=None, only_once=False):
792 if only_once:
793 if message in self._printed_messages:
794 return
795 self._printed_messages.add(message)
796 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 797
cf4f42cb 798 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 799 """Print message to stdout"""
cf4f42cb 800 if quiet is not None:
ae6a1b95 801 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 802 self._write_string(
803 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
804 self._out_files['print'])
805
806 def to_screen(self, message, skip_eol=False, quiet=None):
807 """Print message to screen if not in quiet mode"""
8bf9319e 808 if self.params.get('logger'):
43afe285 809 self.params['logger'].debug(message)
cf4f42cb 810 return
811 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
812 return
813 self._write_string(
814 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
815 self._out_files['screen'])
8222d8de 816
b35496d8 817 def to_stderr(self, message, only_once=False):
0760b0a7 818 """Print message to stderr"""
11b85ce6 819 assert isinstance(message, compat_str)
8bf9319e 820 if self.params.get('logger'):
43afe285
IB
821 self.params['logger'].error(message)
822 else:
cf4f42cb 823 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
824
825 def _send_console_code(self, code):
826 if compat_os_name == 'nt' or not self._out_files['console']:
827 return
828 self._write_string(code, self._out_files['console'])
8222d8de 829
1e5b9a95
PH
830 def to_console_title(self, message):
831 if not self.params.get('consoletitle', False):
832 return
3efb96a6 833 message = remove_terminal_sequences(message)
4bede0d8
C
834 if compat_os_name == 'nt':
835 if ctypes.windll.kernel32.GetConsoleWindow():
836 # c_wchar_p() might not be necessary if `message` is
837 # already of type unicode()
838 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 839 else:
840 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 841
bdde425c 842 def save_console_title(self):
cf4f42cb 843 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 844 return
592b7485 845 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
846
847 def restore_console_title(self):
cf4f42cb 848 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 849 return
592b7485 850 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
851
852 def __enter__(self):
853 self.save_console_title()
854 return self
855
856 def __exit__(self, *args):
857 self.restore_console_title()
f89197d7 858
dca08720 859 if self.params.get('cookiefile') is not None:
1bab3437 860 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 861
fa9f30b8 862 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
863 """Determine action to take when a download problem appears.
864
865 Depending on if the downloader has been configured to ignore
866 download errors or not, this method may throw an exception or
867 not when errors are found, after printing the message.
868
fa9f30b8 869 @param tb If given, is additional traceback information
870 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
871 """
872 if message is not None:
873 self.to_stderr(message)
874 if self.params.get('verbose'):
875 if tb is None:
876 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 877 tb = ''
8222d8de 878 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 879 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 880 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
881 else:
882 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 883 tb = ''.join(tb_data)
c19bc311 884 if tb:
885 self.to_stderr(tb)
fa9f30b8 886 if not is_error:
887 return
b1940459 888 if not self.params.get('ignoreerrors'):
8222d8de
JMF
889 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
890 exc_info = sys.exc_info()[1].exc_info
891 else:
892 exc_info = sys.exc_info()
893 raise DownloadError(message, exc_info)
894 self._download_retcode = 1
895
ec11a9f4 896 class Styles(Enum):
897 HEADERS = 'yellow'
f304da8a 898 EMPHASIS = 'light blue'
ec11a9f4 899 ID = 'green'
900 DELIM = 'blue'
901 ERROR = 'red'
902 WARNING = 'yellow'
ff51ed58 903 SUPPRESS = 'light black'
ec11a9f4 904
7578d77d 905 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 906 if test_encoding:
907 original_text = text
5c104538 908 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
909 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 910 text = text.encode(encoding, 'ignore').decode(encoding)
911 if fallback is not None and text != original_text:
912 text = fallback
913 if isinstance(f, self.Styles):
f304da8a 914 f = f.value
7578d77d 915 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 916
917 def _format_screen(self, *args, **kwargs):
7578d77d 918 return self._format_text(
cf4f42cb 919 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 920
921 def _format_err(self, *args, **kwargs):
7578d77d 922 return self._format_text(
cf4f42cb 923 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 924
c84aeac6 925 def report_warning(self, message, only_once=False):
8222d8de
JMF
926 '''
927 Print the message to stderr, it will be prefixed with 'WARNING:'
928 If stderr is a tty file the 'WARNING:' will be colored
929 '''
6d07ce01
JMF
930 if self.params.get('logger') is not None:
931 self.params['logger'].warning(message)
8222d8de 932 else:
ad8915b7
PH
933 if self.params.get('no_warnings'):
934 return
ec11a9f4 935 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 936
ee8dd27a 937 def deprecation_warning(self, message):
938 if self.params.get('logger') is not None:
939 self.params['logger'].warning('DeprecationWarning: {message}')
940 else:
941 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
942
fa9f30b8 943 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
944 '''
945 Do the same as trouble, but prefixes the message with 'ERROR:', colored
946 in red if stderr is a tty file.
947 '''
fa9f30b8 948 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 949
b35496d8 950 def write_debug(self, message, only_once=False):
0760b0a7 951 '''Log debug message or Print message to stderr'''
952 if not self.params.get('verbose', False):
953 return
954 message = '[debug] %s' % message
955 if self.params.get('logger'):
956 self.params['logger'].debug(message)
957 else:
b35496d8 958 self.to_stderr(message, only_once)
0760b0a7 959
8222d8de
JMF
960 def report_file_already_downloaded(self, file_name):
961 """Report file has already been fully downloaded."""
962 try:
6febd1c1 963 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 964 except UnicodeEncodeError:
6febd1c1 965 self.to_screen('[download] The file has already been downloaded')
8222d8de 966
0c3d0f51 967 def report_file_delete(self, file_name):
968 """Report that existing file will be deleted."""
969 try:
c25228e5 970 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 971 except UnicodeEncodeError:
c25228e5 972 self.to_screen('Deleting existing file')
0c3d0f51 973
319b6059 974 def raise_no_formats(self, info, forced=False, *, msg=None):
1151c407 975 has_drm = info.get('__has_drm')
319b6059 976 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
977 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
978 if forced or not ignored:
1151c407 979 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 980 expected=has_drm or ignored or expected)
88acdbc2 981 else:
982 self.report_warning(msg)
983
de6000d9 984 def parse_outtmpl(self):
985 outtmpl_dict = self.params.get('outtmpl', {})
986 if not isinstance(outtmpl_dict, dict):
987 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 988 # Remove spaces in the default template
989 if self.params.get('restrictfilenames'):
990 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
991 else:
992 sanitize = lambda x: x
de6000d9 993 outtmpl_dict.update({
71ce444a 994 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 995 if outtmpl_dict.get(k) is None})
de6000d9 996 for key, val in outtmpl_dict.items():
997 if isinstance(val, bytes):
998 self.report_warning(
999 'Parameter outtmpl is bytes, but should be a unicode string. '
1000 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
1001 return outtmpl_dict
1002
21cd8fae 1003 def get_output_path(self, dir_type='', filename=None):
1004 paths = self.params.get('paths', {})
1005 assert isinstance(paths, dict)
1006 path = os.path.join(
1007 expand_path(paths.get('home', '').strip()),
1008 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1009 filename or '')
1010
1011 # Temporary fix for #4787
1012 # 'Treat' all problem characters by passing filename through preferredencoding
1013 # to workaround encoding issues with subprocess on python2 @ Windows
1014 if sys.version_info < (3, 0) and sys.platform == 'win32':
1015 path = encodeFilename(path, True).decode(preferredencoding())
1016 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1017
76a264ac 1018 @staticmethod
901130bb 1019 def _outtmpl_expandpath(outtmpl):
1020 # expand_path translates '%%' into '%' and '$$' into '$'
1021 # correspondingly that is not what we want since we need to keep
1022 # '%%' intact for template dict substitution step. Working around
1023 # with boundary-alike separator hack.
1024 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1025 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1026
1027 # outtmpl should be expand_path'ed before template dict substitution
1028 # because meta fields may contain env variables we don't want to
1029 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1030 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1031 return expand_path(outtmpl).replace(sep, '')
1032
1033 @staticmethod
1034 def escape_outtmpl(outtmpl):
1035 ''' Escape any remaining strings like %s, %abc% etc. '''
1036 return re.sub(
1037 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1038 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1039 outtmpl)
1040
1041 @classmethod
1042 def validate_outtmpl(cls, outtmpl):
76a264ac 1043 ''' @return None or Exception object '''
7d1eb38a 1044 outtmpl = re.sub(
37893bb0 1045 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1046 lambda mobj: f'{mobj.group(0)[:-1]}s',
1047 cls._outtmpl_expandpath(outtmpl))
76a264ac 1048 try:
7d1eb38a 1049 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1050 return None
1051 except ValueError as err:
1052 return err
1053
03b4de72 1054 @staticmethod
1055 def _copy_infodict(info_dict):
1056 info_dict = dict(info_dict)
09b49e1f 1057 info_dict.pop('__postprocessors', None)
03b4de72 1058 return info_dict
1059
e0fd9573 1060 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1061 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1062 @param sanitize Whether to sanitize the output as a filename.
1063 For backward compatibility, a function can also be passed
1064 """
1065
6e84b215 1066 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1067
03b4de72 1068 info_dict = self._copy_infodict(info_dict)
752cda38 1069 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1070 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1071 if info_dict.get('duration', None) is not None
1072 else None)
752cda38 1073 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
9c906919 1074 info_dict['video_autonumber'] = self._num_videos
752cda38 1075 if info_dict.get('resolution') is None:
1076 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1077
e6f21b3d 1078 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1079 # of %(field)s to %(field)0Nd for backward compatibility
1080 field_size_compat_map = {
ec11a9f4 1081 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1082 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1083 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1084 }
752cda38 1085
385a27fa 1086 TMPL_DICT = {}
37893bb0 1087 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1088 MATH_FUNCTIONS = {
1089 '+': float.__add__,
1090 '-': float.__sub__,
1091 }
e625be0d 1092 # Field is of the form key1.key2...
1093 # where keys (except first) can be string, int or slice
2b8a2973 1094 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1095 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1096 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1097 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1098 (?P<negate>-)?
385a27fa 1099 (?P<fields>{field})
1100 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1101 (?:>(?P<strf_format>.+?))?
34baa9fd 1102 (?P<remaining>
1103 (?P<alternate>(?<!\\),[^|&)]+)?
1104 (?:&(?P<replacement>.*?))?
1105 (?:\|(?P<default>.*?))?
1106 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1107
2b8a2973 1108 def _traverse_infodict(k):
1109 k = k.split('.')
1110 if k[0] == '':
1111 k.pop(0)
1112 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1113
752cda38 1114 def get_value(mdict):
1115 # Object traversal
2b8a2973 1116 value = _traverse_infodict(mdict['fields'])
752cda38 1117 # Negative
1118 if mdict['negate']:
1119 value = float_or_none(value)
1120 if value is not None:
1121 value *= -1
1122 # Do maths
385a27fa 1123 offset_key = mdict['maths']
1124 if offset_key:
752cda38 1125 value = float_or_none(value)
1126 operator = None
385a27fa 1127 while offset_key:
1128 item = re.match(
1129 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1130 offset_key).group(0)
1131 offset_key = offset_key[len(item):]
1132 if operator is None:
752cda38 1133 operator = MATH_FUNCTIONS[item]
385a27fa 1134 continue
1135 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1136 offset = float_or_none(item)
1137 if offset is None:
2b8a2973 1138 offset = float_or_none(_traverse_infodict(item))
385a27fa 1139 try:
1140 value = operator(value, multiplier * offset)
1141 except (TypeError, ZeroDivisionError):
1142 return None
1143 operator = None
752cda38 1144 # Datetime formatting
1145 if mdict['strf_format']:
7c37ff97 1146 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1147
1148 return value
1149
b868936c 1150 na = self.params.get('outtmpl_na_placeholder', 'NA')
1151
e0fd9573 1152 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1153 return sanitize_filename(str(value), restricted=restricted,
1154 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1155
1156 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1157 sanitize = bool(sanitize)
1158
6e84b215 1159 def _dumpjson_default(obj):
1160 if isinstance(obj, (set, LazyList)):
1161 return list(obj)
adbc4ec4 1162 return repr(obj)
6e84b215 1163
752cda38 1164 def create_key(outer_mobj):
1165 if not outer_mobj.group('has_key'):
b836dc94 1166 return outer_mobj.group(0)
752cda38 1167 key = outer_mobj.group('key')
752cda38 1168 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1169 initial_field = mobj.group('fields') if mobj else ''
e978789f 1170 value, replacement, default = None, None, na
7c37ff97 1171 while mobj:
e625be0d 1172 mobj = mobj.groupdict()
7c37ff97 1173 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1174 value = get_value(mobj)
e978789f 1175 replacement = mobj['replacement']
7c37ff97 1176 if value is None and mobj['alternate']:
34baa9fd 1177 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1178 else:
1179 break
752cda38 1180
b868936c 1181 fmt = outer_mobj.group('format')
752cda38 1182 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1183 fmt = '0{:d}d'.format(field_size_compat_map[key])
1184
e978789f 1185 value = default if value is None else value if replacement is None else replacement
752cda38 1186
4476d2c7 1187 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1188 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1189 if fmt[-1] == 'l': # list
4476d2c7 1190 delim = '\n' if '#' in flags else ', '
9e907ebd 1191 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1192 elif fmt[-1] == 'j': # json
4476d2c7 1193 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1194 elif fmt[-1] == 'q': # quoted
4476d2c7 1195 value = map(str, variadic(value) if '#' in flags else [value])
1196 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1197 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1198 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1199 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1200 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1201 value, fmt = unicodedata.normalize(
1202 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1203 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1204 value), str_fmt
e0fd9573 1205 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1206 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1207 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1208 factor=1024 if '#' in flags else 1000)
37893bb0 1209 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1210 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1211 elif fmt[-1] == 'c':
524e2e4f 1212 if value:
1213 value = str(value)[0]
76a264ac 1214 else:
524e2e4f 1215 fmt = str_fmt
76a264ac 1216 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1217 value = float_or_none(value)
752cda38 1218 if value is None:
1219 value, fmt = default, 's'
901130bb 1220
752cda38 1221 if sanitize:
1222 if fmt[-1] == 'r':
1223 # If value is an object, sanitize might convert it to a string
1224 # So we convert it to repr first
7d1eb38a 1225 value, fmt = repr(value), str_fmt
639f1cea 1226 if fmt[-1] in 'csr':
e0fd9573 1227 value = sanitizer(initial_field, value)
901130bb 1228
b868936c 1229 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1230 TMPL_DICT[key] = value
b868936c 1231 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1232
385a27fa 1233 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1234
819e0531 1235 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1236 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1237 return self.escape_outtmpl(outtmpl) % info_dict
1238
de6000d9 1239 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1240 try:
b836dc94 1241 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
e0fd9573 1242 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1243 if not filename:
1244 return None
15da37c7 1245
6a0546e3 1246 if tmpl_type in ('default', 'temp'):
1247 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1248 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1249 filename = replace_extension(filename, ext, final_ext)
1250 else:
1251 force_ext = OUTTMPL_TYPES[tmpl_type]
1252 if force_ext:
1253 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1254
bdc3fd2f
U
1255 # https://github.com/blackjack4494/youtube-dlc/issues/85
1256 trim_file_name = self.params.get('trim_file_name', False)
1257 if trim_file_name:
5c22c63d 1258 no_ext, *ext = filename.rsplit('.', 2)
1259 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1260
0202b52a 1261 return filename
8222d8de 1262 except ValueError as err:
6febd1c1 1263 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1264 return None
1265
de6000d9 1266 def prepare_filename(self, info_dict, dir_type='', warn=False):
1267 """Generate the output filename."""
21cd8fae 1268
de6000d9 1269 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1270 if not filename and dir_type not in ('', 'temp'):
1271 return ''
de6000d9 1272
c84aeac6 1273 if warn:
21cd8fae 1274 if not self.params.get('paths'):
de6000d9 1275 pass
1276 elif filename == '-':
c84aeac6 1277 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1278 elif os.path.isabs(filename):
c84aeac6 1279 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1280 if filename == '-' or not filename:
1281 return filename
1282
21cd8fae 1283 return self.get_output_path(dir_type, filename)
0202b52a 1284
120fe513 1285 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1286 """ Returns None if the file should be downloaded """
8222d8de 1287
c77495e3 1288 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1289
8b0d7497 1290 def check_filter():
8b0d7497 1291 if 'title' in info_dict:
1292 # This can happen when we're just evaluating the playlist
1293 title = info_dict['title']
1294 matchtitle = self.params.get('matchtitle', False)
1295 if matchtitle:
1296 if not re.search(matchtitle, title, re.IGNORECASE):
1297 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1298 rejecttitle = self.params.get('rejecttitle', False)
1299 if rejecttitle:
1300 if re.search(rejecttitle, title, re.IGNORECASE):
1301 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1302 date = info_dict.get('upload_date')
1303 if date is not None:
1304 dateRange = self.params.get('daterange', DateRange())
1305 if date not in dateRange:
1306 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1307 view_count = info_dict.get('view_count')
1308 if view_count is not None:
1309 min_views = self.params.get('min_views')
1310 if min_views is not None and view_count < min_views:
1311 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1312 max_views = self.params.get('max_views')
1313 if max_views is not None and view_count > max_views:
1314 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1315 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1316 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1317
8f18aca8 1318 match_filter = self.params.get('match_filter')
1319 if match_filter is not None:
1320 try:
1321 ret = match_filter(info_dict, incomplete=incomplete)
1322 except TypeError:
1323 # For backward compatibility
1324 ret = None if incomplete else match_filter(info_dict)
1325 if ret is not None:
1326 return ret
8b0d7497 1327 return None
1328
c77495e3 1329 if self.in_download_archive(info_dict):
1330 reason = '%s has already been recorded in the archive' % video_title
1331 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1332 else:
1333 reason = check_filter()
1334 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1335 if reason is not None:
120fe513 1336 if not silent:
1337 self.to_screen('[download] ' + reason)
c77495e3 1338 if self.params.get(break_opt, False):
1339 raise break_err()
8b0d7497 1340 return reason
fe7e0c98 1341
b6c45014
JMF
1342 @staticmethod
1343 def add_extra_info(info_dict, extra_info):
1344 '''Set the keys from extra_info in info dict if they are missing'''
1345 for key, value in extra_info.items():
1346 info_dict.setdefault(key, value)
1347
409e1828 1348 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1349 process=True, force_generic_extractor=False):
41d1cca3 1350 """
1351 Return a list with a dictionary for each video extracted.
1352
1353 Arguments:
1354 url -- URL to extract
1355
1356 Keyword arguments:
1357 download -- whether to download videos during extraction
1358 ie_key -- extractor key hint
1359 extra_info -- dictionary containing the extra values to add to each result
1360 process -- whether to resolve all unresolved references (URLs, playlist items),
1361 must be True for download to work.
1362 force_generic_extractor -- force using the generic extractor
1363 """
fe7e0c98 1364
409e1828 1365 if extra_info is None:
1366 extra_info = {}
1367
61aa5ba3 1368 if not ie_key and force_generic_extractor:
d22dec74
S
1369 ie_key = 'Generic'
1370
8222d8de 1371 if ie_key:
8b7491c8 1372 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1373 else:
1374 ies = self._ies
1375
8b7491c8 1376 for ie_key, ie in ies.items():
8222d8de
JMF
1377 if not ie.suitable(url):
1378 continue
1379
1380 if not ie.working():
6febd1c1
PH
1381 self.report_warning('The program functionality for this site has been marked as broken, '
1382 'and will probably not work.')
8222d8de 1383
1151c407 1384 temp_id = ie.get_temp_id(url)
a0566bbf 1385 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1386 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1387 if self.params.get('break_on_existing', False):
1388 raise ExistingVideoReached()
a0566bbf 1389 break
8b7491c8 1390 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1391 else:
1392 self.report_error('no suitable InfoExtractor for URL %s' % url)
1393
8e5fecc8 1394 def __handle_extraction_exceptions(func):
b5ae35ee 1395 @functools.wraps(func)
a0566bbf 1396 def wrapper(self, *args, **kwargs):
6da22e7d 1397 while True:
1398 try:
1399 return func(self, *args, **kwargs)
1400 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1401 raise
6da22e7d 1402 except ReExtractInfo as e:
1403 if e.expected:
1404 self.to_screen(f'{e}; Re-extracting data')
1405 else:
1406 self.to_stderr('\r')
1407 self.report_warning(f'{e}; Re-extracting data')
1408 continue
1409 except GeoRestrictedError as e:
1410 msg = e.msg
1411 if e.countries:
1412 msg += '\nThis video is available in %s.' % ', '.join(
1413 map(ISO3166Utils.short2full, e.countries))
1414 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1415 self.report_error(msg)
1416 except ExtractorError as e: # An error we somewhat expected
1417 self.report_error(str(e), e.format_traceback())
1418 except Exception as e:
1419 if self.params.get('ignoreerrors'):
1420 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1421 else:
1422 raise
1423 break
a0566bbf 1424 return wrapper
1425
f2ebc5c7 1426 def _wait_for_video(self, ie_result):
1427 if (not self.params.get('wait_for_video')
1428 or ie_result.get('_type', 'video') != 'video'
1429 or ie_result.get('formats') or ie_result.get('url')):
1430 return
1431
1432 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1433 last_msg = ''
1434
1435 def progress(msg):
1436 nonlocal last_msg
1437 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1438 last_msg = msg
1439
1440 min_wait, max_wait = self.params.get('wait_for_video')
1441 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1442 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1443 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1444 self.report_warning('Release time of video is not known')
1445 elif (diff or 0) <= 0:
1446 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1447 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1448 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1449
1450 wait_till = time.time() + diff
1451 try:
1452 while True:
1453 diff = wait_till - time.time()
1454 if diff <= 0:
1455 progress('')
1456 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1457 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1458 time.sleep(1)
1459 except KeyboardInterrupt:
1460 progress('')
1461 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1462 except BaseException as e:
1463 if not isinstance(e, ReExtractInfo):
1464 self.to_screen('')
1465 raise
1466
a0566bbf 1467 @__handle_extraction_exceptions
58f197b7 1468 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1469 ie_result = ie.extract(url)
1470 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1471 return
1472 if isinstance(ie_result, list):
1473 # Backwards compatibility: old IE result format
1474 ie_result = {
1475 '_type': 'compat_list',
1476 'entries': ie_result,
1477 }
e37d0efb 1478 if extra_info.get('original_url'):
1479 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1480 self.add_default_extra_info(ie_result, ie, url)
1481 if process:
f2ebc5c7 1482 self._wait_for_video(ie_result)
a0566bbf 1483 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1484 else:
a0566bbf 1485 return ie_result
fe7e0c98 1486
ea38e55f 1487 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1488 if url is not None:
1489 self.add_extra_info(ie_result, {
1490 'webpage_url': url,
1491 'original_url': url,
57ebfca3 1492 })
1493 webpage_url = ie_result.get('webpage_url')
1494 if webpage_url:
1495 self.add_extra_info(ie_result, {
1496 'webpage_url_basename': url_basename(webpage_url),
1497 'webpage_url_domain': get_domain(webpage_url),
6033d980 1498 })
1499 if ie is not None:
1500 self.add_extra_info(ie_result, {
1501 'extractor': ie.IE_NAME,
1502 'extractor_key': ie.ie_key(),
1503 })
ea38e55f 1504
58adec46 1505 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1506 """
1507 Take the result of the ie(may be modified) and resolve all unresolved
1508 references (URLs, playlist items).
1509
1510 It will also download the videos if 'download'.
1511 Returns the resolved ie_result.
1512 """
58adec46 1513 if extra_info is None:
1514 extra_info = {}
e8ee972c
PH
1515 result_type = ie_result.get('_type', 'video')
1516
057a5206 1517 if result_type in ('url', 'url_transparent'):
134c6ea8 1518 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1519 if ie_result.get('original_url'):
1520 extra_info.setdefault('original_url', ie_result['original_url'])
1521
057a5206 1522 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1523 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1524 or extract_flat is True):
ecb54191 1525 info_copy = ie_result.copy()
6033d980 1526 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1527 if ie and not ie_result.get('id'):
4614bc22 1528 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1529 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1530 self.add_extra_info(info_copy, extra_info)
b5475f11 1531 info_copy, _ = self.pre_process(info_copy)
ecb54191 1532 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1533 if self.params.get('force_write_download_archive', False):
1534 self.record_download_archive(info_copy)
e8ee972c
PH
1535 return ie_result
1536
8222d8de 1537 if result_type == 'video':
b6c45014 1538 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1539 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1540 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1541 if additional_urls:
e9f4ccd1 1542 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1543 if isinstance(additional_urls, compat_str):
1544 additional_urls = [additional_urls]
1545 self.to_screen(
1546 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1547 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1548 ie_result['additional_entries'] = [
1549 self.extract_info(
b69fd25c 1550 url, download, extra_info=extra_info,
9c2b75b5 1551 force_generic_extractor=self.params.get('force_generic_extractor'))
1552 for url in additional_urls
1553 ]
1554 return ie_result
8222d8de
JMF
1555 elif result_type == 'url':
1556 # We have to add extra_info to the results because it may be
1557 # contained in a playlist
07cce701 1558 return self.extract_info(
1559 ie_result['url'], download,
1560 ie_key=ie_result.get('ie_key'),
1561 extra_info=extra_info)
7fc3fa05
PH
1562 elif result_type == 'url_transparent':
1563 # Use the information from the embedding page
1564 info = self.extract_info(
1565 ie_result['url'], ie_key=ie_result.get('ie_key'),
1566 extra_info=extra_info, download=False, process=False)
1567
1640eb09
S
1568 # extract_info may return None when ignoreerrors is enabled and
1569 # extraction failed with an error, don't crash and return early
1570 # in this case
1571 if not info:
1572 return info
1573
412c617d
PH
1574 force_properties = dict(
1575 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1576 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1577 if f in force_properties:
1578 del force_properties[f]
1579 new_result = info.copy()
1580 new_result.update(force_properties)
7fc3fa05 1581
0563f7ac
S
1582 # Extracted info may not be a video result (i.e.
1583 # info.get('_type', 'video') != video) but rather an url or
1584 # url_transparent. In such cases outer metadata (from ie_result)
1585 # should be propagated to inner one (info). For this to happen
1586 # _type of info should be overridden with url_transparent. This
067aa17e 1587 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1588 if new_result.get('_type') == 'url':
1589 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1590
1591 return self.process_ie_result(
1592 new_result, download=download, extra_info=extra_info)
40fcba5e 1593 elif result_type in ('playlist', 'multi_video'):
30a074c2 1594 # Protect from infinite recursion due to recursively nested playlists
1595 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1596 webpage_url = ie_result['webpage_url']
1597 if webpage_url in self._playlist_urls:
7e85e872 1598 self.to_screen(
30a074c2 1599 '[download] Skipping already downloaded playlist: %s'
1600 % ie_result.get('title') or ie_result.get('id'))
1601 return
7e85e872 1602
30a074c2 1603 self._playlist_level += 1
1604 self._playlist_urls.add(webpage_url)
03f83004 1605 self._fill_common_fields(ie_result, False)
bc516a3f 1606 self._sanitize_thumbnails(ie_result)
30a074c2 1607 try:
1608 return self.__process_playlist(ie_result, download)
1609 finally:
1610 self._playlist_level -= 1
1611 if not self._playlist_level:
1612 self._playlist_urls.clear()
8222d8de 1613 elif result_type == 'compat_list':
c9bf4114
PH
1614 self.report_warning(
1615 'Extractor %s returned a compat_list result. '
1616 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1617
8222d8de 1618 def _fixup(r):
b868936c 1619 self.add_extra_info(r, {
1620 'extractor': ie_result['extractor'],
1621 'webpage_url': ie_result['webpage_url'],
1622 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1623 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1624 'extractor_key': ie_result['extractor_key'],
1625 })
8222d8de
JMF
1626 return r
1627 ie_result['entries'] = [
b6c45014 1628 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1629 for r in ie_result['entries']
1630 ]
1631 return ie_result
1632 else:
1633 raise Exception('Invalid result type: %s' % result_type)
1634
e92caff5 1635 def _ensure_dir_exists(self, path):
1636 return make_dir(path, self.report_error)
1637
3b603dbd 1638 @staticmethod
1639 def _playlist_infodict(ie_result, **kwargs):
1640 return {
1641 **ie_result,
1642 'playlist': ie_result.get('title') or ie_result.get('id'),
1643 'playlist_id': ie_result.get('id'),
1644 'playlist_title': ie_result.get('title'),
1645 'playlist_uploader': ie_result.get('uploader'),
1646 'playlist_uploader_id': ie_result.get('uploader_id'),
1647 'playlist_index': 0,
1648 **kwargs,
1649 }
1650
30a074c2 1651 def __process_playlist(self, ie_result, download):
1652 # We process each entry in the playlist
1653 playlist = ie_result.get('title') or ie_result.get('id')
1654 self.to_screen('[download] Downloading playlist: %s' % playlist)
1655
498f5606 1656 if 'entries' not in ie_result:
aa9369a2 1657 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1658
1659 MissingEntry = object()
498f5606 1660 incomplete_entries = bool(ie_result.get('requested_entries'))
1661 if incomplete_entries:
bf5f605e 1662 def fill_missing_entries(entries, indices):
7c7f7161 1663 ret = [MissingEntry] * max(indices)
bf5f605e 1664 for i, entry in zip(indices, entries):
498f5606 1665 ret[i - 1] = entry
1666 return ret
1667 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1668
30a074c2 1669 playlist_results = []
1670
56a8fb4f 1671 playliststart = self.params.get('playliststart', 1)
30a074c2 1672 playlistend = self.params.get('playlistend')
1673 # For backwards compatibility, interpret -1 as whole list
1674 if playlistend == -1:
1675 playlistend = None
1676
1677 playlistitems_str = self.params.get('playlist_items')
1678 playlistitems = None
1679 if playlistitems_str is not None:
1680 def iter_playlistitems(format):
1681 for string_segment in format.split(','):
1682 if '-' in string_segment:
1683 start, end = string_segment.split('-')
1684 for item in range(int(start), int(end) + 1):
1685 yield int(item)
1686 else:
1687 yield int(string_segment)
1688 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1689
1690 ie_entries = ie_result['entries']
8e5fecc8 1691 if isinstance(ie_entries, list):
ed8d87f9 1692 playlist_count = len(ie_entries)
f0d785d3 1693 msg = f'Collected {playlist_count} videos; downloading %d of them'
1694 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1695
8e5fecc8 1696 def get_entry(i):
1697 return ie_entries[i - 1]
1698 else:
f0d785d3 1699 msg = 'Downloading %d videos'
c586f9e8 1700 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1701 ie_entries = LazyList(ie_entries)
d37707bd 1702 elif isinstance(ie_entries, InAdvancePagedList):
1703 if ie_entries._pagesize == 1:
1704 playlist_count = ie_entries._pagecount
8e5fecc8 1705
1706 def get_entry(i):
1707 return YoutubeDL.__handle_extraction_exceptions(
1708 lambda self, i: ie_entries[i - 1]
1709 )(self, i)
50fed816 1710
f0d785d3 1711 entries, broken = [], False
ff1c7fc9 1712 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1713 for i in items:
1714 if i == 0:
1715 continue
56a8fb4f 1716 if playlistitems is None and playlistend is not None and playlistend < i:
1717 break
1718 entry = None
1719 try:
50fed816 1720 entry = get_entry(i)
7c7f7161 1721 if entry is MissingEntry:
498f5606 1722 raise EntryNotInPlaylist()
56a8fb4f 1723 except (IndexError, EntryNotInPlaylist):
1724 if incomplete_entries:
aa9369a2 1725 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1726 elif not playlistitems:
1727 break
1728 entries.append(entry)
120fe513 1729 try:
1730 if entry is not None:
1731 self._match_entry(entry, incomplete=True, silent=True)
1732 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1733 broken = True
120fe513 1734 break
56a8fb4f 1735 ie_result['entries'] = entries
30a074c2 1736
56a8fb4f 1737 # Save playlist_index before re-ordering
1738 entries = [
9e598870 1739 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1740 for i, entry in enumerate(entries, 1)
1741 if entry is not None]
1742 n_entries = len(entries)
498f5606 1743
f0d785d3 1744 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1745 ie_result['playlist_count'] = n_entries
1746
e08a85d8 1747 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1748 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1749 ie_result['requested_entries'] = playlistitems
1750
e08a85d8 1751 _infojson_written = False
0bfc53d0 1752 write_playlist_files = self.params.get('allow_playlist_files', True)
1753 if write_playlist_files and self.params.get('list_thumbnails'):
1754 self.list_thumbnails(ie_result)
1755 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1756 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1757 _infojson_written = self._write_info_json(
1758 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1759 if _infojson_written is None:
80c03fa9 1760 return
1761 if self._write_description('playlist', ie_result,
1762 self.prepare_filename(ie_copy, 'pl_description')) is None:
1763 return
681de68e 1764 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1765 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1766
1767 if self.params.get('playlistreverse', False):
1768 entries = entries[::-1]
30a074c2 1769 if self.params.get('playlistrandom', False):
1770 random.shuffle(entries)
1771
1772 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1773
56a8fb4f 1774 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1775 failures = 0
1776 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1777 for i, entry_tuple in enumerate(entries, 1):
1778 playlist_index, entry = entry_tuple
81139999 1779 if 'playlist-index' in self.params.get('compat_opts', []):
1780 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1781 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1782 # This __x_forwarded_for_ip thing is a bit ugly but requires
1783 # minimal changes
1784 if x_forwarded_for:
1785 entry['__x_forwarded_for_ip'] = x_forwarded_for
1786 extra = {
1787 'n_entries': n_entries,
f59ae581 1788 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1789 'playlist_count': ie_result.get('playlist_count'),
71729754 1790 'playlist_index': playlist_index,
1791 'playlist_autonumber': i,
30a074c2 1792 'playlist': playlist,
1793 'playlist_id': ie_result.get('id'),
1794 'playlist_title': ie_result.get('title'),
1795 'playlist_uploader': ie_result.get('uploader'),
1796 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1797 'extractor': ie_result['extractor'],
1798 'webpage_url': ie_result['webpage_url'],
1799 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1800 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1801 'extractor_key': ie_result['extractor_key'],
1802 }
1803
1804 if self._match_entry(entry, incomplete=True) is not None:
1805 continue
1806
1807 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1808 if not entry_result:
1809 failures += 1
1810 if failures >= max_failures:
1811 self.report_error(
1812 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1813 break
30a074c2 1814 playlist_results.append(entry_result)
1815 ie_result['entries'] = playlist_results
e08a85d8 1816
1817 # Write the updated info to json
1818 if _infojson_written and self._write_info_json(
1819 'updated playlist', ie_result,
1820 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1821 return
ca30f449 1822
ed5835b4 1823 ie_result = self.run_all_pps('playlist', ie_result)
1824 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1825 return ie_result
1826
a0566bbf 1827 @__handle_extraction_exceptions
1828 def __process_iterable_entry(self, entry, download, extra_info):
1829 return self.process_ie_result(
1830 entry, download=download, extra_info=extra_info)
1831
67134eab
JMF
1832 def _build_format_filter(self, filter_spec):
1833 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1834
1835 OPERATORS = {
1836 '<': operator.lt,
1837 '<=': operator.le,
1838 '>': operator.gt,
1839 '>=': operator.ge,
1840 '=': operator.eq,
1841 '!=': operator.ne,
1842 }
67134eab 1843 operator_rex = re.compile(r'''(?x)\s*
187986a8 1844 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1845 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1846 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1847 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1848 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1849 if m:
1850 try:
1851 comparison_value = int(m.group('value'))
1852 except ValueError:
1853 comparison_value = parse_filesize(m.group('value'))
1854 if comparison_value is None:
1855 comparison_value = parse_filesize(m.group('value') + 'B')
1856 if comparison_value is None:
1857 raise ValueError(
1858 'Invalid value %r in format specification %r' % (
67134eab 1859 m.group('value'), filter_spec))
9ddb6925
S
1860 op = OPERATORS[m.group('op')]
1861
083c9df9 1862 if not m:
9ddb6925
S
1863 STR_OPERATORS = {
1864 '=': operator.eq,
10d33b34
YCH
1865 '^=': lambda attr, value: attr.startswith(value),
1866 '$=': lambda attr, value: attr.endswith(value),
1867 '*=': lambda attr, value: value in attr,
1ce9a3cb 1868 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1869 }
187986a8 1870 str_operator_rex = re.compile(r'''(?x)\s*
1871 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1872 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1873 (?P<quote>["'])?
1874 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1875 (?(quote)(?P=quote))\s*
9ddb6925 1876 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1877 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1878 if m:
1ce9a3cb
LF
1879 if m.group('op') == '~=':
1880 comparison_value = re.compile(m.group('value'))
1881 else:
1882 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1883 str_op = STR_OPERATORS[m.group('op')]
1884 if m.group('negation'):
e118a879 1885 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1886 else:
1887 op = str_op
083c9df9 1888
9ddb6925 1889 if not m:
187986a8 1890 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1891
1892 def _filter(f):
1893 actual_value = f.get(m.group('key'))
1894 if actual_value is None:
1895 return m.group('none_inclusive')
1896 return op(actual_value, comparison_value)
67134eab
JMF
1897 return _filter
1898
9f1a1c36 1899 def _check_formats(self, formats):
1900 for f in formats:
1901 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1902 path = self.get_output_path('temp')
1903 if not self._ensure_dir_exists(f'{path}/'):
1904 continue
1905 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1906 temp_file.close()
1907 try:
1908 success, _ = self.dl(temp_file.name, f, test=True)
1909 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1910 success = False
1911 finally:
1912 if os.path.exists(temp_file.name):
1913 try:
1914 os.remove(temp_file.name)
1915 except OSError:
1916 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1917 if success:
1918 yield f
1919 else:
1920 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1921
0017d9ad 1922 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1923
af0f7428
S
1924 def can_merge():
1925 merger = FFmpegMergerPP(self)
1926 return merger.available and merger.can_merge()
1927
91ebc640 1928 prefer_best = (
b7b04c78 1929 not self.params.get('simulate')
91ebc640 1930 and download
1931 and (
1932 not can_merge()
19807826 1933 or info_dict.get('is_live', False)
de6000d9 1934 or self.outtmpl_dict['default'] == '-'))
53ed7066 1935 compat = (
1936 prefer_best
1937 or self.params.get('allow_multiple_audio_streams', False)
1938 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1939
1940 return (
53ed7066 1941 'best/bestvideo+bestaudio' if prefer_best
1942 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1943 else 'bestvideo+bestaudio/best')
0017d9ad 1944
67134eab
JMF
1945 def build_format_selector(self, format_spec):
1946 def syntax_error(note, start):
1947 message = (
1948 'Invalid format specification: '
1949 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1950 return SyntaxError(message)
1951
1952 PICKFIRST = 'PICKFIRST'
1953 MERGE = 'MERGE'
1954 SINGLE = 'SINGLE'
0130afb7 1955 GROUP = 'GROUP'
67134eab
JMF
1956 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1957
91ebc640 1958 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1959 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1960
9f1a1c36 1961 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1962
67134eab
JMF
1963 def _parse_filter(tokens):
1964 filter_parts = []
1965 for type, string, start, _, _ in tokens:
1966 if type == tokenize.OP and string == ']':
1967 return ''.join(filter_parts)
1968 else:
1969 filter_parts.append(string)
1970
232541df 1971 def _remove_unused_ops(tokens):
17cc1534 1972 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1973 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1974 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1975 last_string, last_start, last_end, last_line = None, None, None, None
1976 for type, string, start, end, line in tokens:
1977 if type == tokenize.OP and string == '[':
1978 if last_string:
1979 yield tokenize.NAME, last_string, last_start, last_end, last_line
1980 last_string = None
1981 yield type, string, start, end, line
1982 # everything inside brackets will be handled by _parse_filter
1983 for type, string, start, end, line in tokens:
1984 yield type, string, start, end, line
1985 if type == tokenize.OP and string == ']':
1986 break
1987 elif type == tokenize.OP and string in ALLOWED_OPS:
1988 if last_string:
1989 yield tokenize.NAME, last_string, last_start, last_end, last_line
1990 last_string = None
1991 yield type, string, start, end, line
1992 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1993 if not last_string:
1994 last_string = string
1995 last_start = start
1996 last_end = end
1997 else:
1998 last_string += string
1999 if last_string:
2000 yield tokenize.NAME, last_string, last_start, last_end, last_line
2001
cf2ac6df 2002 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2003 selectors = []
2004 current_selector = None
2005 for type, string, start, _, _ in tokens:
2006 # ENCODING is only defined in python 3.x
2007 if type == getattr(tokenize, 'ENCODING', None):
2008 continue
2009 elif type in [tokenize.NAME, tokenize.NUMBER]:
2010 current_selector = FormatSelector(SINGLE, string, [])
2011 elif type == tokenize.OP:
cf2ac6df
JMF
2012 if string == ')':
2013 if not inside_group:
2014 # ')' will be handled by the parentheses group
2015 tokens.restore_last_token()
67134eab 2016 break
cf2ac6df 2017 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2018 tokens.restore_last_token()
2019 break
cf2ac6df
JMF
2020 elif inside_choice and string == ',':
2021 tokens.restore_last_token()
2022 break
2023 elif string == ',':
0a31a350
JMF
2024 if not current_selector:
2025 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2026 selectors.append(current_selector)
2027 current_selector = None
2028 elif string == '/':
d96d604e
JMF
2029 if not current_selector:
2030 raise syntax_error('"/" must follow a format selector', start)
67134eab 2031 first_choice = current_selector
cf2ac6df 2032 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2033 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2034 elif string == '[':
2035 if not current_selector:
2036 current_selector = FormatSelector(SINGLE, 'best', [])
2037 format_filter = _parse_filter(tokens)
2038 current_selector.filters.append(format_filter)
0130afb7
JMF
2039 elif string == '(':
2040 if current_selector:
2041 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2042 group = _parse_format_selection(tokens, inside_group=True)
2043 current_selector = FormatSelector(GROUP, group, [])
67134eab 2044 elif string == '+':
d03cfdce 2045 if not current_selector:
2046 raise syntax_error('Unexpected "+"', start)
2047 selector_1 = current_selector
2048 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2049 if not selector_2:
2050 raise syntax_error('Expected a selector', start)
2051 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
2052 else:
2053 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2054 elif type == tokenize.ENDMARKER:
2055 break
2056 if current_selector:
2057 selectors.append(current_selector)
2058 return selectors
2059
f8d4ad9a 2060 def _merge(formats_pair):
2061 format_1, format_2 = formats_pair
2062
2063 formats_info = []
2064 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2065 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2066
2067 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2068 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2069 for (i, fmt_info) in enumerate(formats_info):
551f9388 2070 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2071 formats_info.pop(i)
2072 continue
2073 for aud_vid in ['audio', 'video']:
f8d4ad9a 2074 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2075 if get_no_more[aud_vid]:
2076 formats_info.pop(i)
f5510afe 2077 break
f8d4ad9a 2078 get_no_more[aud_vid] = True
2079
2080 if len(formats_info) == 1:
2081 return formats_info[0]
2082
2083 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2084 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2085
2086 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2087 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2088
2089 output_ext = self.params.get('merge_output_format')
2090 if not output_ext:
2091 if the_only_video:
2092 output_ext = the_only_video['ext']
2093 elif the_only_audio and not video_fmts:
2094 output_ext = the_only_audio['ext']
2095 else:
2096 output_ext = 'mkv'
2097
975a0d0d 2098 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2099
f8d4ad9a 2100 new_dict = {
2101 'requested_formats': formats_info,
975a0d0d 2102 'format': '+'.join(filtered('format')),
2103 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2104 'ext': output_ext,
975a0d0d 2105 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2106 'language': '+'.join(orderedSet(filtered('language'))) or None,
2107 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2108 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2109 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2110 }
2111
2112 if the_only_video:
2113 new_dict.update({
2114 'width': the_only_video.get('width'),
2115 'height': the_only_video.get('height'),
2116 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2117 'fps': the_only_video.get('fps'),
49a57e70 2118 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2119 'vcodec': the_only_video.get('vcodec'),
2120 'vbr': the_only_video.get('vbr'),
2121 'stretched_ratio': the_only_video.get('stretched_ratio'),
2122 })
2123
2124 if the_only_audio:
2125 new_dict.update({
2126 'acodec': the_only_audio.get('acodec'),
2127 'abr': the_only_audio.get('abr'),
975a0d0d 2128 'asr': the_only_audio.get('asr'),
f8d4ad9a 2129 })
2130
2131 return new_dict
2132
e8e73840 2133 def _check_formats(formats):
981052c9 2134 if not check_formats:
2135 yield from formats
b5ac45b1 2136 return
9f1a1c36 2137 yield from self._check_formats(formats)
e8e73840 2138
67134eab 2139 def _build_selector_function(selector):
909d24dd 2140 if isinstance(selector, list): # ,
67134eab
JMF
2141 fs = [_build_selector_function(s) for s in selector]
2142
317f7ab6 2143 def selector_function(ctx):
67134eab 2144 for f in fs:
981052c9 2145 yield from f(ctx)
67134eab 2146 return selector_function
909d24dd 2147
2148 elif selector.type == GROUP: # ()
0130afb7 2149 selector_function = _build_selector_function(selector.selector)
909d24dd 2150
2151 elif selector.type == PICKFIRST: # /
67134eab
JMF
2152 fs = [_build_selector_function(s) for s in selector.selector]
2153
317f7ab6 2154 def selector_function(ctx):
67134eab 2155 for f in fs:
317f7ab6 2156 picked_formats = list(f(ctx))
67134eab
JMF
2157 if picked_formats:
2158 return picked_formats
2159 return []
67134eab 2160
981052c9 2161 elif selector.type == MERGE: # +
2162 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2163
2164 def selector_function(ctx):
adbc4ec4 2165 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2166 yield _merge(pair)
2167
909d24dd 2168 elif selector.type == SINGLE: # atom
598d185d 2169 format_spec = selector.selector or 'best'
909d24dd 2170
f8d4ad9a 2171 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2172 if format_spec == 'all':
2173 def selector_function(ctx):
9222c381 2174 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2175 elif format_spec == 'mergeall':
2176 def selector_function(ctx):
dd2a987d 2177 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2178 if not formats:
2179 return
921b76ca 2180 merged_format = formats[-1]
2181 for f in formats[-2::-1]:
f8d4ad9a 2182 merged_format = _merge((merged_format, f))
2183 yield merged_format
909d24dd 2184
2185 else:
e8e73840 2186 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2187 mobj = re.match(
2188 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2189 format_spec)
2190 if mobj is not None:
2191 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2192 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2193 format_type = (mobj.group('type') or [None])[0]
2194 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2195 format_modified = mobj.group('mod') is not None
909d24dd 2196
2197 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2198 _filter_f = (
eff63539 2199 (lambda f: f.get('%scodec' % format_type) != 'none')
2200 if format_type and format_modified # bv*, ba*, wv*, wa*
2201 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2202 if format_type # bv, ba, wv, wa
2203 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2204 if not format_modified # b, w
8326b00a 2205 else lambda f: True) # b*, w*
2206 filter_f = lambda f: _filter_f(f) and (
2207 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2208 else:
48ee10ee 2209 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2210 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2211 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2212 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2213 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2214 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2215 else:
b5ae35ee 2216 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2217
2218 def selector_function(ctx):
2219 formats = list(ctx['formats'])
909d24dd 2220 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2221 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2222 # for extractors with incomplete formats (audio only (soundcloud)
2223 # or video only (imgur)) best/worst will fallback to
2224 # best/worst {video,audio}-only format
e8e73840 2225 matches = formats
981052c9 2226 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2227 try:
e8e73840 2228 yield matches[format_idx - 1]
981052c9 2229 except IndexError:
2230 return
083c9df9 2231
67134eab 2232 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2233
317f7ab6 2234 def final_selector(ctx):
adbc4ec4 2235 ctx_copy = dict(ctx)
67134eab 2236 for _filter in filters:
317f7ab6
S
2237 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2238 return selector_function(ctx_copy)
67134eab 2239 return final_selector
083c9df9 2240
67134eab 2241 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2242 try:
232541df 2243 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2244 except tokenize.TokenError:
2245 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2246
2247 class TokenIterator(object):
2248 def __init__(self, tokens):
2249 self.tokens = tokens
2250 self.counter = 0
2251
2252 def __iter__(self):
2253 return self
2254
2255 def __next__(self):
2256 if self.counter >= len(self.tokens):
2257 raise StopIteration()
2258 value = self.tokens[self.counter]
2259 self.counter += 1
2260 return value
2261
2262 next = __next__
2263
2264 def restore_last_token(self):
2265 self.counter -= 1
2266
2267 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2268 return _build_selector_function(parsed_selector)
a9c58ad9 2269
e5660ee6 2270 def _calc_headers(self, info_dict):
8b7539d2 2271 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2272
2273 cookies = self._calc_cookies(info_dict)
2274 if cookies:
2275 res['Cookie'] = cookies
2276
0016b84e
S
2277 if 'X-Forwarded-For' not in res:
2278 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2279 if x_forwarded_for_ip:
2280 res['X-Forwarded-For'] = x_forwarded_for_ip
2281
e5660ee6
JMF
2282 return res
2283
2284 def _calc_cookies(self, info_dict):
5c2266df 2285 pr = sanitized_Request(info_dict['url'])
e5660ee6 2286 self.cookiejar.add_cookie_header(pr)
662435f7 2287 return pr.get_header('Cookie')
e5660ee6 2288
9f1a1c36 2289 def _sort_thumbnails(self, thumbnails):
2290 thumbnails.sort(key=lambda t: (
2291 t.get('preference') if t.get('preference') is not None else -1,
2292 t.get('width') if t.get('width') is not None else -1,
2293 t.get('height') if t.get('height') is not None else -1,
2294 t.get('id') if t.get('id') is not None else '',
2295 t.get('url')))
2296
b0249bca 2297 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2298 thumbnails = info_dict.get('thumbnails')
2299 if thumbnails is None:
2300 thumbnail = info_dict.get('thumbnail')
2301 if thumbnail:
2302 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2303 if not thumbnails:
2304 return
2305
2306 def check_thumbnails(thumbnails):
2307 for t in thumbnails:
2308 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2309 try:
2310 self.urlopen(HEADRequest(t['url']))
2311 except network_exceptions as err:
2312 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2313 continue
2314 yield t
2315
2316 self._sort_thumbnails(thumbnails)
2317 for i, t in enumerate(thumbnails):
2318 if t.get('id') is None:
2319 t['id'] = '%d' % i
2320 if t.get('width') and t.get('height'):
2321 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2322 t['url'] = sanitize_url(t['url'])
2323
2324 if self.params.get('check_formats') is True:
282f5709 2325 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2326 else:
2327 info_dict['thumbnails'] = thumbnails
bc516a3f 2328
03f83004
LNO
2329 def _fill_common_fields(self, info_dict, is_video=True):
2330 # TODO: move sanitization here
2331 if is_video:
2332 # playlists are allowed to lack "title"
2333 info_dict['fulltitle'] = info_dict.get('title')
2334 if 'title' not in info_dict:
2335 raise ExtractorError('Missing "title" field in extractor result',
2336 video_id=info_dict['id'], ie=info_dict['extractor'])
2337 elif not info_dict.get('title'):
2338 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2339 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2340
2341 if info_dict.get('duration') is not None:
2342 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2343
2344 for ts_key, date_key in (
2345 ('timestamp', 'upload_date'),
2346 ('release_timestamp', 'release_date'),
2347 ('modified_timestamp', 'modified_date'),
2348 ):
2349 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2350 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2351 # see http://bugs.python.org/issue1646728)
2352 try:
2353 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2354 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2355 except (ValueError, OverflowError, OSError):
2356 pass
2357
2358 live_keys = ('is_live', 'was_live')
2359 live_status = info_dict.get('live_status')
2360 if live_status is None:
2361 for key in live_keys:
2362 if info_dict.get(key) is False:
2363 continue
2364 if info_dict.get(key):
2365 live_status = key
2366 break
2367 if all(info_dict.get(key) is False for key in live_keys):
2368 live_status = 'not_live'
2369 if live_status:
2370 info_dict['live_status'] = live_status
2371 for key in live_keys:
2372 if info_dict.get(key) is None:
2373 info_dict[key] = (live_status == key)
2374
2375 # Auto generate title fields corresponding to the *_number fields when missing
2376 # in order to always have clean titles. This is very common for TV series.
2377 for field in ('chapter', 'season', 'episode'):
2378 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2379 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2380
dd82ffea
JMF
2381 def process_video_result(self, info_dict, download=True):
2382 assert info_dict.get('_type', 'video') == 'video'
9c906919 2383 self._num_videos += 1
dd82ffea 2384
bec1fad2 2385 if 'id' not in info_dict:
fc08bdd6 2386 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2387 elif not info_dict.get('id'):
2388 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2389
c9969434
S
2390 def report_force_conversion(field, field_not, conversion):
2391 self.report_warning(
2392 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2393 % (field, field_not, conversion))
2394
2395 def sanitize_string_field(info, string_field):
2396 field = info.get(string_field)
2397 if field is None or isinstance(field, compat_str):
2398 return
2399 report_force_conversion(string_field, 'a string', 'string')
2400 info[string_field] = compat_str(field)
2401
2402 def sanitize_numeric_fields(info):
2403 for numeric_field in self._NUMERIC_FIELDS:
2404 field = info.get(numeric_field)
2405 if field is None or isinstance(field, compat_numeric_types):
2406 continue
2407 report_force_conversion(numeric_field, 'numeric', 'int')
2408 info[numeric_field] = int_or_none(field)
2409
2410 sanitize_string_field(info_dict, 'id')
2411 sanitize_numeric_fields(info_dict)
4c3f8c3f 2412 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2413 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2414
dd82ffea
JMF
2415 if 'playlist' not in info_dict:
2416 # It isn't part of a playlist
2417 info_dict['playlist'] = None
2418 info_dict['playlist_index'] = None
2419
bc516a3f 2420 self._sanitize_thumbnails(info_dict)
d5519808 2421
536a55da 2422 thumbnail = info_dict.get('thumbnail')
bc516a3f 2423 thumbnails = info_dict.get('thumbnails')
536a55da
S
2424 if thumbnail:
2425 info_dict['thumbnail'] = sanitize_url(thumbnail)
2426 elif thumbnails:
d5519808
PH
2427 info_dict['thumbnail'] = thumbnails[-1]['url']
2428
ae30b840 2429 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2430 info_dict['display_id'] = info_dict['id']
2431
03f83004 2432 self._fill_common_fields(info_dict)
33d2fc2f 2433
05108a49
S
2434 for cc_kind in ('subtitles', 'automatic_captions'):
2435 cc = info_dict.get(cc_kind)
2436 if cc:
2437 for _, subtitle in cc.items():
2438 for subtitle_format in subtitle:
2439 if subtitle_format.get('url'):
2440 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2441 if subtitle_format.get('ext') is None:
2442 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2443
2444 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2445 subtitles = info_dict.get('subtitles')
4bba3716 2446
360e1ca5 2447 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2448 info_dict['id'], subtitles, automatic_captions)
a504ced0 2449
dd82ffea
JMF
2450 if info_dict.get('formats') is None:
2451 # There's only one format available
2452 formats = [info_dict]
2453 else:
2454 formats = info_dict['formats']
2455
e0493e90 2456 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2457 if not self.params.get('allow_unplayable_formats'):
2458 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2459
319b6059 2460 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2461 if not get_from_start:
2462 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2463 if info_dict.get('is_live') and formats:
adbc4ec4 2464 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2465 if get_from_start and not formats:
2466 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2467 'If you want to download from the current time, pass --no-live-from-start')
adbc4ec4 2468
db95dc13 2469 if not formats:
1151c407 2470 self.raise_no_formats(info_dict)
db95dc13 2471
73af5cc8
S
2472 def is_wellformed(f):
2473 url = f.get('url')
a5ac0c47 2474 if not url:
73af5cc8
S
2475 self.report_warning(
2476 '"url" field is missing or empty - skipping format, '
2477 'there is an error in extractor')
a5ac0c47
S
2478 return False
2479 if isinstance(url, bytes):
2480 sanitize_string_field(f, 'url')
2481 return True
73af5cc8
S
2482
2483 # Filter out malformed formats for better extraction robustness
2484 formats = list(filter(is_wellformed, formats))
2485
181c7053
S
2486 formats_dict = {}
2487
dd82ffea 2488 # We check that all the formats have the format and format_id fields
db95dc13 2489 for i, format in enumerate(formats):
c9969434
S
2490 sanitize_string_field(format, 'format_id')
2491 sanitize_numeric_fields(format)
dcf77cf1 2492 format['url'] = sanitize_url(format['url'])
e74e3b63 2493 if not format.get('format_id'):
8016c922 2494 format['format_id'] = compat_str(i)
e2effb08
S
2495 else:
2496 # Sanitize format_id from characters used in format selector expression
ec85ded8 2497 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2498 format_id = format['format_id']
2499 if format_id not in formats_dict:
2500 formats_dict[format_id] = []
2501 formats_dict[format_id].append(format)
2502
2503 # Make sure all formats have unique format_id
03b4de72 2504 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2505 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2506 ambigious_id = len(ambiguous_formats) > 1
2507 for i, format in enumerate(ambiguous_formats):
2508 if ambigious_id:
181c7053 2509 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2510 if format.get('ext') is None:
2511 format['ext'] = determine_ext(format['url']).lower()
2512 # Ensure there is no conflict between id and ext in format selection
2513 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2514 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2515 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2516
2517 for i, format in enumerate(formats):
8c51aa65 2518 if format.get('format') is None:
6febd1c1 2519 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2520 id=format['format_id'],
2521 res=self.format_resolution(format),
b868936c 2522 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2523 )
6f0be937 2524 if format.get('protocol') is None:
b5559424 2525 format['protocol'] = determine_protocol(format)
239df021 2526 if format.get('resolution') is None:
2527 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2528 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2529 format['dynamic_range'] = 'SDR'
f2fe69c7 2530 if (info_dict.get('duration') and format.get('tbr')
2531 and not format.get('filesize') and not format.get('filesize_approx')):
2532 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2533
e5660ee6
JMF
2534 # Add HTTP headers, so that external programs can use them from the
2535 # json output
2536 full_format_info = info_dict.copy()
2537 full_format_info.update(format)
2538 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2539 # Remove private housekeeping stuff
2540 if '__x_forwarded_for_ip' in info_dict:
2541 del info_dict['__x_forwarded_for_ip']
dd82ffea 2542
9f1a1c36 2543 if self.params.get('check_formats') is True:
282f5709 2544 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2545
88acdbc2 2546 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2547 # only set the 'formats' fields if the original info_dict list them
2548 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2549 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2550 # which can't be exported to json
b3d9ef88 2551 info_dict['formats'] = formats
4ec82a72 2552
2553 info_dict, _ = self.pre_process(info_dict)
2554
6db9c4d5 2555 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2556 return info_dict
2557
2558 self.post_extract(info_dict)
2559 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2560
093a1710 2561 # The pre-processors may have modified the formats
2562 formats = info_dict.get('formats', [info_dict])
2563
fa9f30b8 2564 list_only = self.params.get('simulate') is None and (
2565 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2566 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2567 if self.params.get('list_thumbnails'):
2568 self.list_thumbnails(info_dict)
b7b04c78 2569 if self.params.get('listsubtitles'):
2570 if 'automatic_captions' in info_dict:
2571 self.list_subtitles(
2572 info_dict['id'], automatic_captions, 'automatic captions')
2573 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2574 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2575 self.list_formats(info_dict)
169dbde9 2576 if list_only:
b7b04c78 2577 # Without this printing, -F --print-json will not work
169dbde9 2578 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2579 return
2580
187986a8 2581 format_selector = self.format_selector
2582 if format_selector is None:
0017d9ad 2583 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2584 self.write_debug('Default format spec: %s' % req_format)
187986a8 2585 format_selector = self.build_format_selector(req_format)
317f7ab6 2586
fa9f30b8 2587 while True:
2588 if interactive_format_selection:
2589 req_format = input(
2590 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2591 try:
2592 format_selector = self.build_format_selector(req_format)
2593 except SyntaxError as err:
2594 self.report_error(err, tb=False, is_error=False)
2595 continue
2596
2597 # While in format selection we may need to have an access to the original
2598 # format set in order to calculate some metrics or do some processing.
2599 # For now we need to be able to guess whether original formats provided
2600 # by extractor are incomplete or not (i.e. whether extractor provides only
2601 # video-only or audio-only formats) for proper formats selection for
2602 # extractors with such incomplete formats (see
2603 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2604 # Since formats may be filtered during format selection and may not match
2605 # the original formats the results may be incorrect. Thus original formats
2606 # or pre-calculated metrics should be passed to format selection routines
2607 # as well.
2608 # We will pass a context object containing all necessary additional data
2609 # instead of just formats.
2610 # This fixes incorrect format selection issue (see
2611 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2612 incomplete_formats = (
2613 # All formats are video-only or
2614 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2615 # all formats are audio-only
2616 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2617
2618 ctx = {
2619 'formats': formats,
2620 'incomplete_formats': incomplete_formats,
2621 }
2622
2623 formats_to_download = list(format_selector(ctx))
2624 if interactive_format_selection and not formats_to_download:
2625 self.report_error('Requested format is not available', tb=False, is_error=False)
2626 continue
2627 break
317f7ab6 2628
dd82ffea 2629 if not formats_to_download:
b7da73eb 2630 if not self.params.get('ignore_no_formats_error'):
1151c407 2631 raise ExtractorError('Requested format is not available', expected=True,
2632 video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2633 self.report_warning('Requested format is not available')
2634 # Process what we can, even without any available formats.
2635 formats_to_download = [{}]
a13e6848 2636
b62fa6d7 2637 best_format = formats_to_download[-1]
2638 if download:
2639 if best_format:
2640 self.to_screen(
2641 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2642 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2643 max_downloads_reached = False
f46e2f9d 2644 for i, fmt in enumerate(formats_to_download):
09b49e1f 2645 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2646 new_info.update(fmt)
a13e6848 2647 try:
2648 self.process_info(new_info)
2649 except MaxDownloadsReached:
2650 max_downloads_reached = True
f46e2f9d 2651 # Remove copied info
2652 for key, val in tuple(new_info.items()):
2653 if info_dict.get(key) == val:
2654 new_info.pop(key)
a13e6848 2655 if max_downloads_reached:
2656 break
ebed8b37 2657
9e907ebd 2658 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
a13e6848 2659 assert write_archive.issubset({True, False, 'ignore'})
2660 if True in write_archive and False not in write_archive:
2661 self.record_download_archive(info_dict)
be72c624 2662
2663 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2664 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2665 if max_downloads_reached:
2666 raise MaxDownloadsReached()
ebed8b37 2667
49a57e70 2668 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2669 info_dict.update(best_format)
dd82ffea
JMF
2670 return info_dict
2671
98c70d6f 2672 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2673 """Select the requested subtitles and their format"""
98c70d6f
JMF
2674 available_subs = {}
2675 if normal_subtitles and self.params.get('writesubtitles'):
2676 available_subs.update(normal_subtitles)
2677 if automatic_captions and self.params.get('writeautomaticsub'):
2678 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2679 if lang not in available_subs:
2680 available_subs[lang] = cap_info
2681
4d171848
JMF
2682 if (not self.params.get('writesubtitles') and not
2683 self.params.get('writeautomaticsub') or not
2684 available_subs):
2685 return None
a504ced0 2686
c32b0aab 2687 all_sub_langs = available_subs.keys()
a504ced0 2688 if self.params.get('allsubtitles', False):
c32b0aab 2689 requested_langs = all_sub_langs
2690 elif self.params.get('subtitleslangs', False):
77c4a9ef 2691 # A list is used so that the order of languages will be the same as
2692 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2693 requested_langs = []
2694 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2695 discard = lang_re[0] == '-'
c32b0aab 2696 if discard:
77c4a9ef 2697 lang_re = lang_re[1:]
3aa91540 2698 if lang_re == 'all':
2699 if discard:
2700 requested_langs = []
2701 else:
2702 requested_langs.extend(all_sub_langs)
2703 continue
77c4a9ef 2704 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2705 if discard:
2706 for lang in current_langs:
77c4a9ef 2707 while lang in requested_langs:
2708 requested_langs.remove(lang)
c32b0aab 2709 else:
77c4a9ef 2710 requested_langs.extend(current_langs)
2711 requested_langs = orderedSet(requested_langs)
c32b0aab 2712 elif 'en' in available_subs:
2713 requested_langs = ['en']
a504ced0 2714 else:
c32b0aab 2715 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2716 if requested_langs:
2717 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2718
2719 formats_query = self.params.get('subtitlesformat', 'best')
2720 formats_preference = formats_query.split('/') if formats_query else []
2721 subs = {}
2722 for lang in requested_langs:
2723 formats = available_subs.get(lang)
2724 if formats is None:
2725 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2726 continue
a504ced0
JMF
2727 for ext in formats_preference:
2728 if ext == 'best':
2729 f = formats[-1]
2730 break
2731 matches = list(filter(lambda f: f['ext'] == ext, formats))
2732 if matches:
2733 f = matches[-1]
2734 break
2735 else:
2736 f = formats[-1]
2737 self.report_warning(
2738 'No subtitle format found matching "%s" for language %s, '
2739 'using %s' % (formats_query, lang, f['ext']))
2740 subs[lang] = f
2741 return subs
2742
bb66c247 2743 def _forceprint(self, key, info_dict):
2744 if info_dict is None:
2745 return
2746 info_copy = info_dict.copy()
2747 info_copy['formats_table'] = self.render_formats_table(info_dict)
2748 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2749 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2750 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2751
2752 def format_tmpl(tmpl):
2753 mobj = re.match(r'\w+(=?)$', tmpl)
2754 if mobj and mobj.group(1):
2755 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2756 elif mobj:
2757 return f'%({tmpl})s'
2758 return tmpl
8130779d 2759
bb66c247 2760 for tmpl in self.params['forceprint'].get(key, []):
2761 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2762
2763 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2764 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2765 tmpl = format_tmpl(tmpl)
2766 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2767 if self._ensure_dir_exists(filename):
2768 with io.open(filename, 'a', encoding='utf-8') as f:
2769 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2770
d06daf23 2771 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2772 def print_mandatory(field, actual_field=None):
2773 if actual_field is None:
2774 actual_field = field
d06daf23 2775 if (self.params.get('force%s' % field, False)
53c18592 2776 and (not incomplete or info_dict.get(actual_field) is not None)):
2777 self.to_stdout(info_dict[actual_field])
d06daf23
S
2778
2779 def print_optional(field):
2780 if (self.params.get('force%s' % field, False)
2781 and info_dict.get(field) is not None):
2782 self.to_stdout(info_dict[field])
2783
53c18592 2784 info_dict = info_dict.copy()
2785 if filename is not None:
2786 info_dict['filename'] = filename
2787 if info_dict.get('requested_formats') is not None:
2788 # For RTMP URLs, also include the playpath
2789 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2790 elif info_dict.get('url'):
53c18592 2791 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2792
bb66c247 2793 if (self.params.get('forcejson')
2794 or self.params['forceprint'].get('video')
2795 or self.params['print_to_file'].get('video')):
2b8a2973 2796 self.post_extract(info_dict)
bb66c247 2797 self._forceprint('video', info_dict)
53c18592 2798
d06daf23
S
2799 print_mandatory('title')
2800 print_mandatory('id')
53c18592 2801 print_mandatory('url', 'urls')
d06daf23
S
2802 print_optional('thumbnail')
2803 print_optional('description')
53c18592 2804 print_optional('filename')
b868936c 2805 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2806 self.to_stdout(formatSeconds(info_dict['duration']))
2807 print_mandatory('format')
53c18592 2808
2b8a2973 2809 if self.params.get('forcejson'):
6e84b215 2810 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2811
e8e73840 2812 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2813 if not info.get('url'):
1151c407 2814 self.raise_no_formats(info, True)
e8e73840 2815
2816 if test:
2817 verbose = self.params.get('verbose')
2818 params = {
2819 'test': True,
a169858f 2820 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2821 'verbose': verbose,
2822 'noprogress': not verbose,
2823 'nopart': True,
2824 'skip_unavailable_fragments': False,
2825 'keep_fragments': False,
2826 'overwrites': True,
2827 '_no_ytdl_file': True,
2828 }
2829 else:
2830 params = self.params
96fccc10 2831 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2832 if not test:
2833 for ph in self._progress_hooks:
2834 fd.add_progress_hook(ph)
42676437
M
2835 urls = '", "'.join(
2836 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2837 for f in info.get('requested_formats', []) or [info])
18e674b4 2838 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2839
adbc4ec4
THD
2840 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2841 # But it may contain objects that are not deep-copyable
2842 new_info = self._copy_infodict(info)
e8e73840 2843 if new_info.get('http_headers') is None:
2844 new_info['http_headers'] = self._calc_headers(new_info)
2845 return fd.download(name, new_info, subtitle)
2846
e04938ab 2847 def existing_file(self, filepaths, *, default_overwrite=True):
2848 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2849 if existing_files and not self.params.get('overwrites', default_overwrite):
2850 return existing_files[0]
2851
2852 for file in existing_files:
2853 self.report_file_delete(file)
2854 os.remove(file)
2855 return None
2856
8222d8de 2857 def process_info(self, info_dict):
09b49e1f 2858 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2859
2860 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2861 original_infodict = info_dict
fd288278 2862
4513a41a 2863 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2864 info_dict['format'] = info_dict['ext']
2865
09b49e1f 2866 # This is mostly just for backward compatibility of process_info
2867 # As a side-effect, this allows for format-specific filters
c77495e3 2868 if self._match_entry(info_dict) is not None:
9e907ebd 2869 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2870 return
2871
09b49e1f 2872 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2873 self.post_extract(info_dict)
0c14d66a 2874 self._num_downloads += 1
8222d8de 2875
dcf64d43 2876 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2877 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2878 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2879 files_to_move = {}
8222d8de
JMF
2880
2881 # Forced printings
4513a41a 2882 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2883
b7b04c78 2884 if self.params.get('simulate'):
9e907ebd 2885 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2886 return
2887
de6000d9 2888 if full_filename is None:
8222d8de 2889 return
e92caff5 2890 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2891 return
e92caff5 2892 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2893 return
2894
80c03fa9 2895 if self._write_description('video', info_dict,
2896 self.prepare_filename(info_dict, 'description')) is None:
2897 return
2898
2899 sub_files = self._write_subtitles(info_dict, temp_filename)
2900 if sub_files is None:
2901 return
2902 files_to_move.update(dict(sub_files))
2903
2904 thumb_files = self._write_thumbnails(
2905 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2906 if thumb_files is None:
2907 return
2908 files_to_move.update(dict(thumb_files))
8222d8de 2909
80c03fa9 2910 infofn = self.prepare_filename(info_dict, 'infojson')
2911 _infojson_written = self._write_info_json('video', info_dict, infofn)
2912 if _infojson_written:
dac5df5a 2913 info_dict['infojson_filename'] = infofn
e75bb0d6 2914 # For backward compatibility, even though it was a private field
80c03fa9 2915 info_dict['__infojson_filename'] = infofn
2916 elif _infojson_written is None:
2917 return
2918
2919 # Note: Annotations are deprecated
2920 annofn = None
1fb07d10 2921 if self.params.get('writeannotations', False):
de6000d9 2922 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2923 if annofn:
e92caff5 2924 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2925 return
0c3d0f51 2926 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2927 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2928 elif not info_dict.get('annotations'):
2929 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2930 else:
2931 try:
6febd1c1 2932 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2933 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2934 annofile.write(info_dict['annotations'])
2935 except (KeyError, TypeError):
6febd1c1 2936 self.report_warning('There are no annotations to write.')
7b6fefc9 2937 except (OSError, IOError):
6febd1c1 2938 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2939 return
1fb07d10 2940
732044af 2941 # Write internet shortcut files
08438d2c 2942 def _write_link_file(link_type):
60f3e995 2943 url = try_get(info_dict['webpage_url'], iri_to_uri)
2944 if not url:
2945 self.report_warning(
2946 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2947 return True
08438d2c 2948 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2949 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2950 return False
10e3742e 2951 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2952 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2953 return True
2954 try:
2955 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2956 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2957 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2958 template_vars = {'url': url}
08438d2c 2959 if link_type == 'desktop':
2960 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2961 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2962 except (OSError, IOError):
2963 self.report_error(f'Cannot write internet shortcut {linkfn}')
2964 return False
732044af 2965 return True
2966
08438d2c 2967 write_links = {
2968 'url': self.params.get('writeurllink'),
2969 'webloc': self.params.get('writewebloclink'),
2970 'desktop': self.params.get('writedesktoplink'),
2971 }
2972 if self.params.get('writelink'):
2973 link_type = ('webloc' if sys.platform == 'darwin'
2974 else 'desktop' if sys.platform.startswith('linux')
2975 else 'url')
2976 write_links[link_type] = True
2977
2978 if any(should_write and not _write_link_file(link_type)
2979 for link_type, should_write in write_links.items()):
2980 return
732044af 2981
f46e2f9d 2982 def replace_info_dict(new_info):
2983 nonlocal info_dict
2984 if new_info == info_dict:
2985 return
2986 info_dict.clear()
2987 info_dict.update(new_info)
2988
56d868db 2989 try:
f46e2f9d 2990 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2991 replace_info_dict(new_info)
56d868db 2992 except PostProcessingError as err:
2993 self.report_error('Preprocessing: %s' % str(err))
2994 return
2995
a13e6848 2996 if self.params.get('skip_download'):
56d868db 2997 info_dict['filepath'] = temp_filename
2998 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2999 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3000 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3001 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3002 else:
3003 # Download
b868936c 3004 info_dict.setdefault('__postprocessors', [])
4340deca 3005 try:
0202b52a 3006
e04938ab 3007 def existing_video_file(*filepaths):
6b591b29 3008 ext = info_dict.get('ext')
e04938ab 3009 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3010 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3011 default_overwrite=False)
3012 if file:
3013 info_dict['ext'] = os.path.splitext(file)[1][1:]
3014 return file
0202b52a 3015
3016 success = True
4340deca 3017 if info_dict.get('requested_formats') is not None:
81cd954a
S
3018
3019 def compatible_formats(formats):
d03cfdce 3020 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3021 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3022 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3023 if len(video_formats) > 2 or len(audio_formats) > 2:
3024 return False
3025
81cd954a 3026 # Check extension
d03cfdce 3027 exts = set(format.get('ext') for format in formats)
3028 COMPATIBLE_EXTS = (
3029 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3030 set(('webm',)),
3031 )
3032 for ext_sets in COMPATIBLE_EXTS:
3033 if ext_sets.issuperset(exts):
3034 return True
81cd954a
S
3035 # TODO: Check acodec/vcodec
3036 return False
3037
3038 requested_formats = info_dict['requested_formats']
0202b52a 3039 old_ext = info_dict['ext']
4e3b637d 3040 if self.params.get('merge_output_format') is None:
3041 if not compatible_formats(requested_formats):
3042 info_dict['ext'] = 'mkv'
3043 self.report_warning(
3044 'Requested formats are incompatible for merge and will be merged into mkv')
3045 if (info_dict['ext'] == 'webm'
3046 and info_dict.get('thumbnails')
3047 # check with type instead of pp_key, __name__, or isinstance
3048 # since we dont want any custom PPs to trigger this
3049 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3050 info_dict['ext'] = 'mkv'
3051 self.report_warning(
3052 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3053 new_ext = info_dict['ext']
0202b52a 3054
124bc071 3055 def correct_ext(filename, ext=new_ext):
96fccc10 3056 if filename == '-':
3057 return filename
0202b52a 3058 filename_real_ext = os.path.splitext(filename)[1][1:]
3059 filename_wo_ext = (
3060 os.path.splitext(filename)[0]
124bc071 3061 if filename_real_ext in (old_ext, new_ext)
0202b52a 3062 else filename)
124bc071 3063 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 3064
38c6902b 3065 # Ensure filename always has a correct extension for successful merge
0202b52a 3066 full_filename = correct_ext(full_filename)
3067 temp_filename = correct_ext(temp_filename)
e04938ab 3068 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3069 info_dict['__real_download'] = False
18e674b4 3070
adbc4ec4
THD
3071 downloaded = []
3072 merger = FFmpegMergerPP(self)
3073
3074 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3075 if dl_filename is not None:
6c7274ec 3076 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3077 elif fd:
3078 for f in requested_formats if fd != FFmpegFD else []:
3079 f['filepath'] = fname = prepend_extension(
3080 correct_ext(temp_filename, info_dict['ext']),
3081 'f%s' % f['format_id'], info_dict['ext'])
3082 downloaded.append(fname)
dbf5416a 3083 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3084 success, real_download = self.dl(temp_filename, info_dict)
3085 info_dict['__real_download'] = real_download
18e674b4 3086 else:
18e674b4 3087 if self.params.get('allow_unplayable_formats'):
3088 self.report_warning(
3089 'You have requested merging of multiple formats '
3090 'while also allowing unplayable formats to be downloaded. '
3091 'The formats won\'t be merged to prevent data corruption.')
3092 elif not merger.available:
e8969bda 3093 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3094 if not self.params.get('ignoreerrors'):
3095 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3096 return
3097 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3098
96fccc10 3099 if temp_filename == '-':
adbc4ec4 3100 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3101 else 'but the formats are incompatible for simultaneous download' if merger.available
3102 else 'but ffmpeg is not installed')
3103 self.report_warning(
3104 f'You have requested downloading multiple formats to stdout {reason}. '
3105 'The formats will be streamed one after the other')
3106 fname = temp_filename
dbf5416a 3107 for f in requested_formats:
3108 new_info = dict(info_dict)
3109 del new_info['requested_formats']
3110 new_info.update(f)
96fccc10 3111 if temp_filename != '-':
124bc071 3112 fname = prepend_extension(
3113 correct_ext(temp_filename, new_info['ext']),
3114 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3115 if not self._ensure_dir_exists(fname):
3116 return
a21e0ab1 3117 f['filepath'] = fname
96fccc10 3118 downloaded.append(fname)
dbf5416a 3119 partial_success, real_download = self.dl(fname, new_info)
3120 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3121 success = success and partial_success
adbc4ec4
THD
3122
3123 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3124 info_dict['__postprocessors'].append(merger)
3125 info_dict['__files_to_merge'] = downloaded
3126 # Even if there were no downloads, it is being merged only now
3127 info_dict['__real_download'] = True
3128 else:
3129 for file in downloaded:
3130 files_to_move[file] = None
4340deca
P
3131 else:
3132 # Just a single file
e04938ab 3133 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3134 if dl_filename is None or dl_filename == temp_filename:
3135 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3136 # So we should try to resume the download
e8e73840 3137 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3138 info_dict['__real_download'] = real_download
6c7274ec 3139 else:
3140 self.report_file_already_downloaded(dl_filename)
0202b52a 3141
0202b52a 3142 dl_filename = dl_filename or temp_filename
c571435f 3143 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3144
3158150c 3145 except network_exceptions as err:
7960b056 3146 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
3147 return
3148 except (OSError, IOError) as err:
3149 raise UnavailableVideoError(err)
3150 except (ContentTooShortError, ) as err:
3151 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3152 return
8222d8de 3153
de6000d9 3154 if success and full_filename != '-':
f17f8651 3155
fd7cfb64 3156 def fixup():
3157 do_fixup = True
3158 fixup_policy = self.params.get('fixup')
3159 vid = info_dict['id']
3160
3161 if fixup_policy in ('ignore', 'never'):
3162 return
3163 elif fixup_policy == 'warn':
3164 do_fixup = False
f89b3e2d 3165 elif fixup_policy != 'force':
3166 assert fixup_policy in ('detect_or_warn', None)
3167 if not info_dict.get('__real_download'):
3168 do_fixup = False
fd7cfb64 3169
3170 def ffmpeg_fixup(cndn, msg, cls):
3171 if not cndn:
3172 return
3173 if not do_fixup:
3174 self.report_warning(f'{vid}: {msg}')
3175 return
3176 pp = cls(self)
3177 if pp.available:
3178 info_dict['__postprocessors'].append(pp)
3179 else:
3180 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3181
3182 stretched_ratio = info_dict.get('stretched_ratio')
3183 ffmpeg_fixup(
3184 stretched_ratio not in (1, None),
3185 f'Non-uniform pixel ratio {stretched_ratio}',
3186 FFmpegFixupStretchedPP)
3187
3188 ffmpeg_fixup(
3189 (info_dict.get('requested_formats') is None
3190 and info_dict.get('container') == 'm4a_dash'
3191 and info_dict.get('ext') == 'm4a'),
3192 'writing DASH m4a. Only some players support this container',
3193 FFmpegFixupM4aPP)
3194
993191c0 3195 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3196 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3197
3198 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3199 ffmpeg_fixup(downloader == 'HlsFD',
3200 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3201 FFmpegFixupM3u8PP)
3202 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3203 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3204
e04b003e 3205 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3206 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3207
3208 fixup()
8222d8de 3209 try:
f46e2f9d 3210 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3211 except PostProcessingError as err:
3212 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3213 return
ab8e5e51
AM
3214 try:
3215 for ph in self._post_hooks:
23c1a667 3216 ph(info_dict['filepath'])
ab8e5e51
AM
3217 except Exception as err:
3218 self.report_error('post hooks: %s' % str(err))
3219 return
9e907ebd 3220 info_dict['__write_download_archive'] = True
2d30509f 3221
a13e6848 3222 if self.params.get('force_write_download_archive'):
9e907ebd 3223 info_dict['__write_download_archive'] = True
a13e6848 3224
3225 # Make sure the info_dict was modified in-place
f46e2f9d 3226 assert info_dict is original_infodict
a13e6848 3227
c3e6ffba 3228 max_downloads = self.params.get('max_downloads')
3229 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3230 raise MaxDownloadsReached()
8222d8de 3231
aa9369a2 3232 def __download_wrapper(self, func):
3233 @functools.wraps(func)
3234 def wrapper(*args, **kwargs):
3235 try:
3236 res = func(*args, **kwargs)
3237 except UnavailableVideoError as e:
3238 self.report_error(e)
b222c271 3239 except MaxDownloadsReached as e:
aa9369a2 3240 self.to_screen(f'[info] {e}')
3241 raise
b222c271 3242 except DownloadCancelled as e:
3243 self.to_screen(f'[info] {e}')
3244 if not self.params.get('break_per_url'):
3245 raise
aa9369a2 3246 else:
3247 if self.params.get('dump_single_json', False):
3248 self.post_extract(res)
3249 self.to_stdout(json.dumps(self.sanitize_info(res)))
3250 return wrapper
3251
8222d8de
JMF
3252 def download(self, url_list):
3253 """Download a given list of URLs."""
aa9369a2 3254 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3255 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3256 if (len(url_list) > 1
3257 and outtmpl != '-'
3258 and '%' not in outtmpl
3259 and self.params.get('max_downloads') != 1):
acd69589 3260 raise SameFileError(outtmpl)
8222d8de
JMF
3261
3262 for url in url_list:
aa9369a2 3263 self.__download_wrapper(self.extract_info)(
3264 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3265
3266 return self._download_retcode
3267
1dcc4c0c 3268 def download_with_info_file(self, info_filename):
31bd3925
JMF
3269 with contextlib.closing(fileinput.FileInput(
3270 [info_filename], mode='r',
3271 openhook=fileinput.hook_encoded('utf-8'))) as f:
3272 # FileInput doesn't have a read method, we can't call json.load
8012d892 3273 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3274 try:
aa9369a2 3275 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3276 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3277 if not isinstance(e, EntryNotInPlaylist):
3278 self.to_stderr('\r')
d4943898
JMF
3279 webpage_url = info.get('webpage_url')
3280 if webpage_url is not None:
aa9369a2 3281 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3282 return self.download([webpage_url])
3283 else:
3284 raise
3285 return self._download_retcode
1dcc4c0c 3286
cb202fd2 3287 @staticmethod
8012d892 3288 def sanitize_info(info_dict, remove_private_keys=False):
3289 ''' Sanitize the infodict for converting to json '''
3ad56b42 3290 if info_dict is None:
3291 return info_dict
6e84b215 3292 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3293 info_dict.setdefault('_type', 'video')
09b49e1f 3294
8012d892 3295 if remove_private_keys:
09b49e1f 3296 reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
f46e2f9d 3297 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3298 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3299 }
ae8f99e6 3300 else:
09b49e1f 3301 reject = lambda k, v: False
adbc4ec4
THD
3302
3303 def filter_fn(obj):
3304 if isinstance(obj, dict):
3305 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3306 elif isinstance(obj, (list, tuple, set, LazyList)):
3307 return list(map(filter_fn, obj))
3308 elif obj is None or isinstance(obj, (str, int, float, bool)):
3309 return obj
3310 else:
3311 return repr(obj)
3312
5226731e 3313 return filter_fn(info_dict)
cb202fd2 3314
8012d892 3315 @staticmethod
3316 def filter_requested_info(info_dict, actually_filter=True):
3317 ''' Alias of sanitize_info for backward compatibility '''
3318 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3319
ed5835b4 3320 @staticmethod
3321 def post_extract(info_dict):
3322 def actual_post_extract(info_dict):
3323 if info_dict.get('_type') in ('playlist', 'multi_video'):
3324 for video_dict in info_dict.get('entries', {}):
3325 actual_post_extract(video_dict or {})
3326 return
3327
09b49e1f 3328 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3329 info_dict.update(post_extractor())
ed5835b4 3330
3331 actual_post_extract(info_dict or {})
3332
dcf64d43 3333 def run_pp(self, pp, infodict):
5bfa4862 3334 files_to_delete = []
dcf64d43 3335 if '__files_to_move' not in infodict:
3336 infodict['__files_to_move'] = {}
b1940459 3337 try:
3338 files_to_delete, infodict = pp.run(infodict)
3339 except PostProcessingError as e:
3340 # Must be True and not 'only_download'
3341 if self.params.get('ignoreerrors') is True:
3342 self.report_error(e)
3343 return infodict
3344 raise
3345
5bfa4862 3346 if not files_to_delete:
dcf64d43 3347 return infodict
5bfa4862 3348 if self.params.get('keepvideo', False):
3349 for f in files_to_delete:
dcf64d43 3350 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3351 else:
3352 for old_filename in set(files_to_delete):
3353 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3354 try:
3355 os.remove(encodeFilename(old_filename))
3356 except (IOError, OSError):
3357 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3358 if old_filename in infodict['__files_to_move']:
3359 del infodict['__files_to_move'][old_filename]
3360 return infodict
5bfa4862 3361
ed5835b4 3362 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3363 self._forceprint(key, info)
ed5835b4 3364 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3365 info = self.run_pp(pp, info)
ed5835b4 3366 return info
277d6ff5 3367
56d868db 3368 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3369 info = dict(ie_info)
56d868db 3370 info['__files_to_move'] = files_to_move or {}
ed5835b4 3371 info = self.run_all_pps(key, info)
56d868db 3372 return info, info.pop('__files_to_move', None)
5bfa4862 3373
f46e2f9d 3374 def post_process(self, filename, info, files_to_move=None):
8222d8de 3375 """Run all the postprocessors on the given file."""
8222d8de 3376 info['filepath'] = filename
dcf64d43 3377 info['__files_to_move'] = files_to_move or {}
ed5835b4 3378 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3379 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3380 del info['__files_to_move']
ed5835b4 3381 return self.run_all_pps('after_move', info)
c1c9a79c 3382
5db07df6 3383 def _make_archive_id(self, info_dict):
e9fef7ee
S
3384 video_id = info_dict.get('id')
3385 if not video_id:
3386 return
5db07df6
PH
3387 # Future-proof against any change in case
3388 # and backwards compatibility with prior versions
e9fef7ee 3389 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3390 if extractor is None:
1211bb6d
S
3391 url = str_or_none(info_dict.get('url'))
3392 if not url:
3393 return
e9fef7ee 3394 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3395 for ie_key, ie in self._ies.items():
1211bb6d 3396 if ie.suitable(url):
8b7491c8 3397 extractor = ie_key
e9fef7ee
S
3398 break
3399 else:
3400 return
d0757229 3401 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3402
3403 def in_download_archive(self, info_dict):
3404 fn = self.params.get('download_archive')
3405 if fn is None:
3406 return False
3407
3408 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3409 if not vid_id:
7012b23c 3410 return False # Incomplete video information
5db07df6 3411
a45e8619 3412 return vid_id in self.archive
c1c9a79c
PH
3413
3414 def record_download_archive(self, info_dict):
3415 fn = self.params.get('download_archive')
3416 if fn is None:
3417 return
5db07df6
PH
3418 vid_id = self._make_archive_id(info_dict)
3419 assert vid_id
a13e6848 3420 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3421 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3422 archive_file.write(vid_id + '\n')
a45e8619 3423 self.archive.add(vid_id)
dd82ffea 3424
8c51aa65 3425 @staticmethod
8abeeb94 3426 def format_resolution(format, default='unknown'):
9359f3d4 3427 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3428 return 'audio only'
f49d89ee
PH
3429 if format.get('resolution') is not None:
3430 return format['resolution']
35615307 3431 if format.get('width') and format.get('height'):
ff51ed58 3432 return '%dx%d' % (format['width'], format['height'])
35615307 3433 elif format.get('height'):
ff51ed58 3434 return '%sp' % format['height']
35615307 3435 elif format.get('width'):
ff51ed58 3436 return '%dx?' % format['width']
3437 return default
8c51aa65 3438
8130779d 3439 def _list_format_headers(self, *headers):
3440 if self.params.get('listformats_table', True) is not False:
3441 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3442 return headers
3443
c57f7757
PH
3444 def _format_note(self, fdict):
3445 res = ''
3446 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3447 res += '(unsupported)'
32f90364
PH
3448 if fdict.get('language'):
3449 if res:
3450 res += ' '
f304da8a 3451 res += '[%s]' % fdict['language']
c57f7757 3452 if fdict.get('format_note') is not None:
f304da8a 3453 if res:
3454 res += ' '
3455 res += fdict['format_note']
c57f7757 3456 if fdict.get('tbr') is not None:
f304da8a 3457 if res:
3458 res += ', '
3459 res += '%4dk' % fdict['tbr']
c57f7757
PH
3460 if fdict.get('container') is not None:
3461 if res:
3462 res += ', '
3463 res += '%s container' % fdict['container']
3089bc74
S
3464 if (fdict.get('vcodec') is not None
3465 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3466 if res:
3467 res += ', '
3468 res += fdict['vcodec']
91c7271a 3469 if fdict.get('vbr') is not None:
c57f7757
PH
3470 res += '@'
3471 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3472 res += 'video@'
3473 if fdict.get('vbr') is not None:
3474 res += '%4dk' % fdict['vbr']
fbb21cf5 3475 if fdict.get('fps') is not None:
5d583bdf
S
3476 if res:
3477 res += ', '
3478 res += '%sfps' % fdict['fps']
c57f7757
PH
3479 if fdict.get('acodec') is not None:
3480 if res:
3481 res += ', '
3482 if fdict['acodec'] == 'none':
3483 res += 'video only'
3484 else:
3485 res += '%-5s' % fdict['acodec']
3486 elif fdict.get('abr') is not None:
3487 if res:
3488 res += ', '
3489 res += 'audio'
3490 if fdict.get('abr') is not None:
3491 res += '@%3dk' % fdict['abr']
3492 if fdict.get('asr') is not None:
3493 res += ' (%5dHz)' % fdict['asr']
3494 if fdict.get('filesize') is not None:
3495 if res:
3496 res += ', '
3497 res += format_bytes(fdict['filesize'])
9732d77e
PH
3498 elif fdict.get('filesize_approx') is not None:
3499 if res:
3500 res += ', '
3501 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3502 return res
91c7271a 3503
8130779d 3504 def render_formats_table(self, info_dict):
b69fd25c 3505 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3506 return None
b69fd25c 3507
94badb25 3508 formats = info_dict.get('formats', [info_dict])
8130779d 3509 if not self.params.get('listformats_table', True) is not False:
76d321f6 3510 table = [
3511 [
3512 format_field(f, 'format_id'),
3513 format_field(f, 'ext'),
3514 self.format_resolution(f),
8130779d 3515 self._format_note(f)
3516 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3517 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3518
3519 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3520 table = [
3521 [
3522 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3523 format_field(f, 'ext'),
3524 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3525 format_field(f, 'fps', '\t%d'),
3526 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3527 delim,
3528 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3529 format_field(f, 'tbr', '\t%dk'),
3530 shorten_protocol_name(f.get('protocol', '')),
3531 delim,
3532 format_field(f, 'vcodec', default='unknown').replace(
3533 'none', 'images' if f.get('acodec') == 'none'
3534 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3535 format_field(f, 'vbr', '\t%dk'),
3536 format_field(f, 'acodec', default='unknown').replace(
3537 'none', '' if f.get('vcodec') == 'none'
3538 else self._format_screen('video only', self.Styles.SUPPRESS)),
3539 format_field(f, 'abr', '\t%dk'),
3540 format_field(f, 'asr', '\t%dHz'),
3541 join_nonempty(
3542 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3543 format_field(f, 'language', '[%s]'),
3544 join_nonempty(format_field(f, 'format_note'),
3545 format_field(f, 'container', ignore=(None, f.get('ext'))),
3546 delim=', '),
3547 delim=' '),
3548 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3549 header_line = self._list_format_headers(
3550 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3551 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3552
3553 return render_table(
3554 header_line, table, hide_empty=True,
3555 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3556
3557 def render_thumbnails_table(self, info_dict):
88f23a18 3558 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3559 if not thumbnails:
8130779d 3560 return None
3561 return render_table(
ec11a9f4 3562 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3563 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3564
8130779d 3565 def render_subtitles_table(self, video_id, subtitles):
2412044c 3566 def _row(lang, formats):
49c258e1 3567 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3568 if len(set(names)) == 1:
7aee40c1 3569 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3570 return [lang, ', '.join(names), ', '.join(exts)]
3571
8130779d 3572 if not subtitles:
3573 return None
3574 return render_table(
ec11a9f4 3575 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3576 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3577 hide_empty=True)
3578
3579 def __list_table(self, video_id, name, func, *args):
3580 table = func(*args)
3581 if not table:
3582 self.to_screen(f'{video_id} has no {name}')
3583 return
3584 self.to_screen(f'[info] Available {name} for {video_id}:')
3585 self.to_stdout(table)
3586
3587 def list_formats(self, info_dict):
3588 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3589
3590 def list_thumbnails(self, info_dict):
3591 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3592
3593 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3594 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3595
dca08720
PH
3596 def urlopen(self, req):
3597 """ Start an HTTP download """
82d8a8b6 3598 if isinstance(req, compat_basestring):
67dda517 3599 req = sanitized_Request(req)
19a41fc6 3600 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3601
3602 def print_debug_header(self):
3603 if not self.params.get('verbose'):
3604 return
49a57e70 3605
3606 def get_encoding(stream):
2a938746 3607 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3608 if not supports_terminal_sequences(stream):
e3c7d495 3609 from .compat import WINDOWS_VT_MODE
3610 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3611 return ret
3612
3613 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3614 locale.getpreferredencoding(),
3615 sys.getfilesystemencoding(),
cf4f42cb 3616 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3617 self.get_encoding())
883d4b1e 3618
3619 logger = self.params.get('logger')
3620 if logger:
3621 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3622 write_debug(encoding_str)
3623 else:
96565c7e 3624 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3625 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3626
4c88ff87 3627 source = detect_variant()
36eaf303 3628 write_debug(join_nonempty(
3629 'yt-dlp version', __version__,
3630 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3631 '' if source == 'unknown' else f'({source})',
3632 delim=' '))
6e21fdd2 3633 if not _LAZY_LOADER:
3634 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3635 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3636 else:
49a57e70 3637 write_debug('Lazy loading extractors is disabled')
3ae5e797 3638 if plugin_extractors or plugin_postprocessors:
49a57e70 3639 write_debug('Plugins: %s' % [
3ae5e797 3640 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3641 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3642 if self.params.get('compat_opts'):
49a57e70 3643 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3644
3645 if source == 'source':
dca08720 3646 try:
36eaf303 3647 sp = Popen(
3648 ['git', 'rev-parse', '--short', 'HEAD'],
3649 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3650 cwd=os.path.dirname(os.path.abspath(__file__)))
3651 out, err = sp.communicate_or_kill()
3652 out = out.decode().strip()
3653 if re.match('[0-9a-f]+', out):
3654 write_debug('Git HEAD: %s' % out)
70a1165b 3655 except Exception:
36eaf303 3656 try:
3657 sys.exc_clear()
3658 except Exception:
3659 pass
b300cda4
S
3660
3661 def python_implementation():
3662 impl_name = platform.python_implementation()
3663 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3664 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3665 return impl_name
3666
49a57e70 3667 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3668 platform.python_version(),
3669 python_implementation(),
3670 platform.architecture()[0],
b300cda4 3671 platform_name()))
d28b5171 3672
8913ef74 3673 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3674 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3675 if ffmpeg_features:
a4211baf 3676 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3677
4c83c967 3678 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3679 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3680 exe_str = ', '.join(
2831b468 3681 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3682 ) or 'none'
49a57e70 3683 write_debug('exe versions: %s' % exe_str)
dca08720 3684
2831b468 3685 from .downloader.websocket import has_websockets
3686 from .postprocessor.embedthumbnail import has_mutagen
f59f5ef8 3687 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
2831b468 3688
c586f9e8 3689 lib_str = join_nonempty(
4390d5ec 3690 compat_brotli and compat_brotli.__name__,
d5820461 3691 has_certifi and 'certifi',
edf65256 3692 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
f59f5ef8 3693 SECRETSTORAGE_AVAILABLE and 'secretstorage',
2831b468 3694 has_mutagen and 'mutagen',
3695 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3696 has_websockets and 'websockets',
3697 delim=', ') or 'none'
49a57e70 3698 write_debug('Optional libraries: %s' % lib_str)
2831b468 3699
dca08720
PH
3700 proxy_map = {}
3701 for handler in self._opener.handlers:
3702 if hasattr(handler, 'proxies'):
3703 proxy_map.update(handler.proxies)
49a57e70 3704 write_debug(f'Proxy map: {proxy_map}')
dca08720 3705
49a57e70 3706 # Not implemented
3707 if False and self.params.get('call_home'):
58b1f00d 3708 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3709 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3710 latest_version = self.urlopen(
3711 'https://yt-dl.org/latest/version').read().decode('utf-8')
3712 if version_tuple(latest_version) > version_tuple(__version__):
3713 self.report_warning(
3714 'You are using an outdated version (newest version: %s)! '
3715 'See https://yt-dl.org/update if you need help updating.' %
3716 latest_version)
3717
e344693b 3718 def _setup_opener(self):
6ad14cab 3719 timeout_val = self.params.get('socket_timeout')
17bddf3e 3720 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3721
982ee69a 3722 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3723 opts_cookiefile = self.params.get('cookiefile')
3724 opts_proxy = self.params.get('proxy')
3725
982ee69a 3726 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3727
6a3f4c3f 3728 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3729 if opts_proxy is not None:
3730 if opts_proxy == '':
3731 proxies = {}
3732 else:
3733 proxies = {'http': opts_proxy, 'https': opts_proxy}
3734 else:
3735 proxies = compat_urllib_request.getproxies()
067aa17e 3736 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3737 if 'http' in proxies and 'https' not in proxies:
3738 proxies['https'] = proxies['http']
91410c9b 3739 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3740
3741 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3742 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3743 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3744 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3745 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3746
3747 # When passing our own FileHandler instance, build_opener won't add the
3748 # default FileHandler and allows us to disable the file protocol, which
3749 # can be used for malicious purposes (see
067aa17e 3750 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3751 file_handler = compat_urllib_request.FileHandler()
3752
3753 def file_open(*args, **kwargs):
7a5c1cfe 3754 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3755 file_handler.file_open = file_open
3756
3757 opener = compat_urllib_request.build_opener(
fca6dba8 3758 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3759
dca08720
PH
3760 # Delete the default user-agent header, which would otherwise apply in
3761 # cases where our custom HTTP handler doesn't come into play
067aa17e 3762 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3763 opener.addheaders = []
3764 self._opener = opener
62fec3b2
PH
3765
3766 def encode(self, s):
3767 if isinstance(s, bytes):
3768 return s # Already encoded
3769
3770 try:
3771 return s.encode(self.get_encoding())
3772 except UnicodeEncodeError as err:
3773 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3774 raise
3775
3776 def get_encoding(self):
3777 encoding = self.params.get('encoding')
3778 if encoding is None:
3779 encoding = preferredencoding()
3780 return encoding
ec82d85a 3781
e08a85d8 3782 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
80c03fa9 3783 ''' Write infojson and returns True = written, False = skip, None = error '''
e08a85d8 3784 if overwrite is None:
3785 overwrite = self.params.get('overwrites', True)
80c03fa9 3786 if not self.params.get('writeinfojson'):
3787 return False
3788 elif not infofn:
3789 self.write_debug(f'Skipping writing {label} infojson')
3790 return False
3791 elif not self._ensure_dir_exists(infofn):
3792 return None
e08a85d8 3793 elif not overwrite and os.path.exists(infofn):
80c03fa9 3794 self.to_screen(f'[info] {label.title()} metadata is already present')
3795 else:
3796 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3797 try:
3798 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3799 except (OSError, IOError):
3800 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3801 return None
3802 return True
3803
3804 def _write_description(self, label, ie_result, descfn):
3805 ''' Write description and returns True = written, False = skip, None = error '''
3806 if not self.params.get('writedescription'):
3807 return False
3808 elif not descfn:
3809 self.write_debug(f'Skipping writing {label} description')
3810 return False
3811 elif not self._ensure_dir_exists(descfn):
3812 return None
3813 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3814 self.to_screen(f'[info] {label.title()} description is already present')
3815 elif ie_result.get('description') is None:
3816 self.report_warning(f'There\'s no {label} description to write')
3817 return False
3818 else:
3819 try:
3820 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3821 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3822 descfile.write(ie_result['description'])
3823 except (OSError, IOError):
3824 self.report_error(f'Cannot write {label} description file {descfn}')
3825 return None
3826 return True
3827
3828 def _write_subtitles(self, info_dict, filename):
3829 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3830 ret = []
3831 subtitles = info_dict.get('requested_subtitles')
3832 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3833 # subtitles download errors are already managed as troubles in relevant IE
3834 # that way it will silently go on when used with unsupporting IE
3835 return ret
3836
3837 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3838 if not sub_filename_base:
3839 self.to_screen('[info] Skipping writing video subtitles')
3840 return ret
3841 for sub_lang, sub_info in subtitles.items():
3842 sub_format = sub_info['ext']
3843 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3844 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3845 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3846 if existing_sub:
80c03fa9 3847 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3848 sub_info['filepath'] = existing_sub
3849 ret.append((existing_sub, sub_filename_final))
80c03fa9 3850 continue
3851
3852 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3853 if sub_info.get('data') is not None:
3854 try:
3855 # Use newline='' to prevent conversion of newline characters
3856 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3857 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3858 subfile.write(sub_info['data'])
3859 sub_info['filepath'] = sub_filename
3860 ret.append((sub_filename, sub_filename_final))
3861 continue
3862 except (OSError, IOError):
3863 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3864 return None
3865
3866 try:
3867 sub_copy = sub_info.copy()
3868 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3869 self.dl(sub_filename, sub_copy, subtitle=True)
3870 sub_info['filepath'] = sub_filename
3871 ret.append((sub_filename, sub_filename_final))
6020e05d 3872 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3873 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3874 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3875 if not self.params.get('ignoreerrors'):
3876 self.report_error(msg)
3877 raise DownloadError(msg)
3878 self.report_warning(msg)
519804a9 3879 return ret
80c03fa9 3880
3881 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3882 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3883 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3884 thumbnails, ret = [], []
6c4fd172 3885 if write_all or self.params.get('writethumbnail', False):
0202b52a 3886 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3887 multiple = write_all and len(thumbnails) > 1
ec82d85a 3888
80c03fa9 3889 if thumb_filename_base is None:
3890 thumb_filename_base = filename
3891 if thumbnails and not thumb_filename_base:
3892 self.write_debug(f'Skipping writing {label} thumbnail')
3893 return ret
3894
dd0228ce 3895 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3896 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3897 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3898 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3899 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3900
e04938ab 3901 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3902 if existing_thumb:
aa9369a2 3903 self.to_screen('[info] %s is already present' % (
3904 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3905 t['filepath'] = existing_thumb
3906 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3907 else:
80c03fa9 3908 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3909 try:
297e9952 3910 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3911 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3912 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3913 shutil.copyfileobj(uf, thumbf)
80c03fa9 3914 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3915 t['filepath'] = thumb_filename
3158150c 3916 except network_exceptions as err:
dd0228ce 3917 thumbnails.pop(idx)
80c03fa9 3918 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3919 if ret and not write_all:
3920 break
0202b52a 3921 return ret