]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[cookies] Report progress when importing cookies
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
4390d5ec 35 compat_brotli,
003c69a8 36 compat_get_terminal_size,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
edf65256 40 compat_pycrypto_AES,
7d1eb38a 41 compat_shlex_quote,
ce02ed60 42 compat_str,
67134eab 43 compat_tokenize_tokenize,
ce02ed60
PH
44 compat_urllib_error,
45 compat_urllib_request,
8b172c2e 46 compat_urllib_request_DataHandler,
819e0531 47 windows_enable_vt_mode,
8c25f81b 48)
982ee69a 49from .cookies import load_cookies
8c25f81b 50from .utils import (
eedb7ba5
S
51 age_restricted,
52 args_to_str,
ce02ed60
PH
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
acd69589 56 DEFAULT_OUTTMPL,
ce02ed60 57 determine_ext,
b5559424 58 determine_protocol,
48f79687 59 DownloadCancelled,
ce02ed60 60 DownloadError,
c0384f22 61 encode_compat_str,
ce02ed60 62 encodeFilename,
498f5606 63 EntryNotInPlaylist,
a06916d9 64 error_to_compat_str,
8b0d7497 65 ExistingVideoReached,
590bc6f6 66 expand_path,
ce02ed60 67 ExtractorError,
90137ca4 68 filter_dict,
e29663c6 69 float_or_none,
02dbf93f 70 format_bytes,
76d321f6 71 format_field,
e0fd9573 72 format_decimal_suffix,
525ef922 73 formatSeconds,
773f291d 74 GeoRestrictedError,
0bb322b9 75 get_domain,
d5820461 76 has_certifi,
b0249bca 77 HEADRequest,
d37707bd 78 InAdvancePagedList,
c9969434 79 int_or_none,
732044af 80 iri_to_uri,
773f291d 81 ISO3166Utils,
34921b43 82 join_nonempty,
56a8fb4f 83 LazyList,
08438d2c 84 LINK_TEMPLATES,
ce02ed60 85 locked_file,
0202b52a 86 make_dir,
dca08720 87 make_HTTPS_handler,
ce02ed60 88 MaxDownloadsReached,
8b7539d2 89 merge_headers,
3158150c 90 network_exceptions,
5c3895ff 91 NO_DEFAULT,
ec11a9f4 92 number_of_digits,
cd6fc19e 93 orderedSet,
a06916d9 94 OUTTMPL_TYPES,
b7ab0590 95 PagedList,
083c9df9 96 parse_filesize,
91410c9b 97 PerRequestProxyHandler,
dca08720 98 platform_name,
d3c93ec2 99 Popen,
1e43a6f7 100 POSTPROCESS_WHEN,
eedb7ba5 101 PostProcessingError,
ce02ed60 102 preferredencoding,
eedb7ba5 103 prepend_extension,
f2ebc5c7 104 ReExtractInfo,
51fb4995 105 register_socks_protocols,
a06916d9 106 RejectedVideoReached,
3efb96a6 107 remove_terminal_sequences,
cfb56d1a 108 render_table,
eedb7ba5 109 replace_extension,
ce02ed60
PH
110 SameFileError,
111 sanitize_filename,
1bb5c511 112 sanitize_path,
dcf77cf1 113 sanitize_url,
67dda517 114 sanitized_Request,
e5660ee6 115 std_headers,
819e0531 116 STR_FORMAT_RE_TMPL,
117 STR_FORMAT_TYPES,
1211bb6d 118 str_or_none,
e29663c6 119 strftime_or_none,
ce02ed60 120 subtitles_filename,
819e0531 121 supports_terminal_sequences,
f2ebc5c7 122 timetuple_from_msec,
732044af 123 to_high_limit_path,
324ad820 124 traverse_obj,
6033d980 125 try_get,
ce02ed60 126 UnavailableVideoError,
29eb5174 127 url_basename,
7d1eb38a 128 variadic,
58b1f00d 129 version_tuple,
ce02ed60
PH
130 write_json_file,
131 write_string,
6a3f4c3f 132 YoutubeDLCookieProcessor,
dca08720 133 YoutubeDLHandler,
fca6dba8 134 YoutubeDLRedirectHandler,
ce02ed60 135)
a0e07d31 136from .cache import Cache
ec11a9f4 137from .minicurses import format_text
52a8a1e1 138from .extractor import (
139 gen_extractor_classes,
140 get_info_extractor,
141 _LAZY_LOADER,
3ae5e797 142 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 143)
4c54b89e 144from .extractor.openload import PhantomJSwrapper
52a8a1e1 145from .downloader import (
dbf5416a 146 FFmpegFD,
52a8a1e1 147 get_suitable_downloader,
148 shorten_protocol_name
149)
4c83c967 150from .downloader.rtmp import rtmpdump_version
4f026faf 151from .postprocessor import (
e36d50c5 152 get_postprocessor,
4e3b637d 153 EmbedThumbnailPP,
adbc4ec4 154 FFmpegFixupDuplicateMoovPP,
e36d50c5 155 FFmpegFixupDurationPP,
f17f8651 156 FFmpegFixupM3u8PP,
62cd676c 157 FFmpegFixupM4aPP,
6271f1ca 158 FFmpegFixupStretchedPP,
e36d50c5 159 FFmpegFixupTimestampPP,
4f026faf
PH
160 FFmpegMergerPP,
161 FFmpegPostProcessor,
0202b52a 162 MoveFilesAfterDownloadPP,
3ae5e797 163 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 164)
4c88ff87 165from .update import detect_variant
36eaf303 166from .version import __version__, RELEASE_GIT_HEAD
8222d8de 167
e9c0cdd3
YCH
168if compat_os_name == 'nt':
169 import ctypes
170
2459b6e1 171
8222d8de
JMF
172class YoutubeDL(object):
173 """YoutubeDL class.
174
175 YoutubeDL objects are the ones responsible of downloading the
176 actual video file and writing it to disk if the user has requested
177 it, among some other tasks. In most cases there should be one per
178 program. As, given a video URL, the downloader doesn't know how to
179 extract all the needed information, task that InfoExtractors do, it
180 has to pass the URL to one of them.
181
182 For this, YoutubeDL objects have a method that allows
183 InfoExtractors to be registered in a given order. When it is passed
184 a URL, the YoutubeDL object handles it to the first InfoExtractor it
185 finds that reports being able to handle it. The InfoExtractor extracts
186 all the information about the video or videos the URL refers to, and
187 YoutubeDL process the extracted information, possibly using a File
188 Downloader to download the video.
189
190 YoutubeDL objects accept a lot of parameters. In order not to saturate
191 the object constructor with arguments, it receives a dictionary of
192 options instead. These options are available through the params
193 attribute for the InfoExtractors to use. The YoutubeDL also
194 registers itself as the downloader in charge for the InfoExtractors
195 that are added to it, so this is a "mutual registration".
196
197 Available options:
198
199 username: Username for authentication purposes.
200 password: Password for authentication purposes.
180940e0 201 videopassword: Password for accessing a video.
1da50aa3
S
202 ap_mso: Adobe Pass multiple-system operator identifier.
203 ap_username: Multiple-system operator account username.
204 ap_password: Multiple-system operator account password.
8222d8de
JMF
205 usenetrc: Use netrc for authentication instead.
206 verbose: Print additional info to stdout.
207 quiet: Do not print messages to stdout.
ad8915b7 208 no_warnings: Do not print out anything for warnings.
bb66c247 209 forceprint: A dict with keys WHEN mapped to a list of templates to
210 print to stdout. The allowed keys are video or any of the
211 items in utils.POSTPROCESS_WHEN.
ca30f449 212 For compatibility, a single list is also accepted
bb66c247 213 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
214 a list of tuples with (template, filename)
53c18592 215 forceurl: Force printing final URL. (Deprecated)
216 forcetitle: Force printing title. (Deprecated)
217 forceid: Force printing ID. (Deprecated)
218 forcethumbnail: Force printing thumbnail URL. (Deprecated)
219 forcedescription: Force printing description. (Deprecated)
220 forcefilename: Force printing final filename. (Deprecated)
221 forceduration: Force printing duration. (Deprecated)
8694c600 222 forcejson: Force printing info_dict as JSON.
63e0be34
PH
223 dump_single_json: Force printing the info_dict of the whole playlist
224 (or video) as a single JSON line.
c25228e5 225 force_write_download_archive: Force writing download archive regardless
226 of 'skip_download' or 'simulate'.
b7b04c78 227 simulate: Do not download the video files. If unset (or None),
228 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 229 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 230 You can also pass a function. The function takes 'ctx' as
231 argument and returns the formats to download.
232 See "build_format_selector" for an implementation
63ad4d43 233 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 234 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
235 extracting metadata even if the video is not actually
236 available for download (experimental)
0930b11f 237 format_sort: A list of fields by which to sort the video formats.
238 See "Sorting Formats" for more details.
c25228e5 239 format_sort_force: Force the given format_sort. see "Sorting Formats"
240 for more details.
08d30158 241 prefer_free_formats: Whether to prefer video formats with free containers
242 over non-free ones of same quality.
c25228e5 243 allow_multiple_video_streams: Allow multiple video streams to be merged
244 into a single file
245 allow_multiple_audio_streams: Allow multiple audio streams to be merged
246 into a single file
0ba692ac 247 check_formats Whether to test if the formats are downloadable.
9f1a1c36 248 Can be True (check all), False (check none),
249 'selected' (check selected formats),
0ba692ac 250 or None (check only if requested by extractor)
4524baf0 251 paths: Dictionary of output paths. The allowed keys are 'home'
252 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 253 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 254 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 255 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
256 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
257 restrictfilenames: Do not allow "&" and spaces in file names
258 trim_file_name: Limit length of filename (extension excluded)
4524baf0 259 windowsfilenames: Force the filenames to be windows compatible
b1940459 260 ignoreerrors: Do not stop on download/postprocessing errors.
261 Can be 'only_download' to ignore only download errors.
262 Default is 'only_download' for CLI, but False for API
26e2805c 263 skip_playlist_after_errors: Number of allowed failures until the rest of
264 the playlist is skipped
d22dec74 265 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 266 overwrites: Overwrite all video and metadata files if True,
267 overwrite only non-video files if None
268 and don't overwrite any file if False
34488702 269 For compatibility with youtube-dl,
270 "nooverwrites" may also be used instead
8222d8de
JMF
271 playliststart: Playlist item to start at.
272 playlistend: Playlist item to end at.
c14e88f0 273 playlist_items: Specific indices of playlist to download.
ff815fe6 274 playlistreverse: Download playlist items in reverse order.
75822ca7 275 playlistrandom: Download playlist items in random order.
8222d8de
JMF
276 matchtitle: Download only matching titles.
277 rejecttitle: Reject downloads for matching titles.
8bf9319e 278 logger: Log messages to a logging.Logger instance.
8222d8de 279 logtostderr: Log messages to stderr instead of stdout.
819e0531 280 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
281 writedescription: Write the video description to a .description file
282 writeinfojson: Write the video description to a .info.json file
75d43ca0 283 clean_infojson: Remove private fields from the infojson
34488702 284 getcomments: Extract video comments. This will not be written to disk
06167fbb 285 unless writeinfojson is also given
1fb07d10 286 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 287 writethumbnail: Write the thumbnail image to a file
c25228e5 288 allow_playlist_files: Whether to write playlists' description, infojson etc
289 also to disk when using the 'write*' options
ec82d85a 290 write_all_thumbnails: Write all thumbnail formats to files
732044af 291 writelink: Write an internet shortcut file, depending on the
292 current platform (.url/.webloc/.desktop)
293 writeurllink: Write a Windows internet shortcut file (.url)
294 writewebloclink: Write a macOS internet shortcut file (.webloc)
295 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 296 writesubtitles: Write the video subtitles to a file
741dd8ea 297 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 298 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 299 Downloads all the subtitles of the video
0b7f3118 300 (requires writesubtitles or writeautomaticsub)
8222d8de 301 listsubtitles: Lists all available subtitles for the video
a504ced0 302 subtitlesformat: The format code for subtitles
c32b0aab 303 subtitleslangs: List of languages of the subtitles to download (can be regex).
304 The list may contain "all" to refer to all the available
305 subtitles. The language can be prefixed with a "-" to
306 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
307 keepvideo: Keep the video file after post-processing
308 daterange: A DateRange object, download only if the upload_date is in the range.
309 skip_download: Skip the actual download of the video file
c35f9e72 310 cachedir: Location of the cache files in the filesystem.
a0e07d31 311 False to disable filesystem cache.
47192f92 312 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
313 age_limit: An integer representing the user's age in years.
314 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
315 min_views: An integer representing the minimum view count the video
316 must have in order to not be skipped.
317 Videos without view count information are always
318 downloaded. None for no limit.
319 max_views: An integer representing the maximum view count.
320 Videos that are more popular than that are not
321 downloaded.
322 Videos without view count information are always
323 downloaded. None for no limit.
324 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
325 Videos already present in the file are not downloaded
326 again.
8a51f564 327 break_on_existing: Stop the download process after attempting to download a
328 file that is in the archive.
329 break_on_reject: Stop the download process when encountering a video that
330 has been filtered out.
b222c271 331 break_per_url: Whether break_on_reject and break_on_existing
332 should act on each input URL as opposed to for the entire queue
8a51f564 333 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
334 cookiesfrombrowser: A tuple containing the name of the browser, the profile
335 name/pathfrom where cookies are loaded, and the name of the
336 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 337 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
338 support RFC 5746 secure renegotiation
f59f5ef8 339 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
340 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
341 At the moment, this is only supported by YouTube.
8b7539d2 342 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 343 proxy: URL of the proxy server to use
38cce791 344 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 345 on geo-restricted sites.
e344693b 346 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
347 bidi_workaround: Work around buggy terminals without bidirectional text
348 support, using fridibi
a0ddb8a2 349 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 350 include_ads: Download ads as well (deprecated)
04b4d394
PH
351 default_search: Prepend this string if an input url is not valid.
352 'auto' for elaborate guessing
62fec3b2 353 encoding: Use this encoding instead of the system-specified.
e8ee972c 354 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
355 Pass in 'in_playlist' to only show this behavior for
356 playlist items.
f2ebc5c7 357 wait_for_video: If given, wait for scheduled streams to become available.
358 The value should be a tuple containing the range
359 (min_secs, max_secs) to wait between retries
4f026faf 360 postprocessors: A list of dictionaries, each with an entry
71b640cc 361 * key: The name of the postprocessor. See
7a5c1cfe 362 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 363 * when: When to run the postprocessor. Allowed values are
364 the entries of utils.POSTPROCESS_WHEN
56d868db 365 Assumed to be 'post_process' if not given
b5ae35ee 366 post_hooks: Deprecated - Register a custom postprocessor instead
367 A list of functions that get called as the final step
ab8e5e51
AM
368 for each video file, after all postprocessors have been
369 called. The filename will be passed as the only argument.
71b640cc
PH
370 progress_hooks: A list of functions that get called on download
371 progress, with a dictionary with the entries
5cda4eda 372 * status: One of "downloading", "error", or "finished".
ee69b99a 373 Check this first and ignore unknown values.
3ba7740d 374 * info_dict: The extracted info_dict
71b640cc 375
5cda4eda 376 If status is one of "downloading", or "finished", the
ee69b99a
PH
377 following properties may also be present:
378 * filename: The final filename (always present)
5cda4eda 379 * tmpfilename: The filename we're currently writing to
71b640cc
PH
380 * downloaded_bytes: Bytes on disk
381 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
382 * total_bytes_estimate: Guess of the eventual file size,
383 None if unavailable.
384 * elapsed: The number of seconds since download started.
71b640cc
PH
385 * eta: The estimated time in seconds, None if unknown
386 * speed: The download speed in bytes/second, None if
387 unknown
5cda4eda
PH
388 * fragment_index: The counter of the currently
389 downloaded video fragment.
390 * fragment_count: The number of fragments (= individual
391 files that will be merged)
71b640cc
PH
392
393 Progress hooks are guaranteed to be called at least once
394 (with status "finished") if the download is successful.
819e0531 395 postprocessor_hooks: A list of functions that get called on postprocessing
396 progress, with a dictionary with the entries
397 * status: One of "started", "processing", or "finished".
398 Check this first and ignore unknown values.
399 * postprocessor: Name of the postprocessor
400 * info_dict: The extracted info_dict
401
402 Progress hooks are guaranteed to be called at least twice
403 (with status "started" and "finished") if the processing is successful.
45598f15 404 merge_output_format: Extension to use when merging formats.
6b591b29 405 final_ext: Expected final extension; used to detect when the file was
59a7a13e 406 already downloaded and converted
6271f1ca
PH
407 fixup: Automatically correct known faults of the file.
408 One of:
409 - "never": do nothing
410 - "warn": only emit a warning
411 - "detect_or_warn": check whether we can do anything
62cd676c 412 about it, warn otherwise (default)
504f20dd 413 source_address: Client-side IP address to bind to.
6ec6cb4e 414 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 415 yt-dlp servers for debugging. (BROKEN)
1cf376f5 416 sleep_interval_requests: Number of seconds to sleep between requests
417 during extraction
7aa589a5
S
418 sleep_interval: Number of seconds to sleep before each download when
419 used alone or a lower bound of a range for randomized
420 sleep before each download (minimum possible number
421 of seconds to sleep) when used along with
422 max_sleep_interval.
423 max_sleep_interval:Upper bound of a range for randomized sleep before each
424 download (maximum possible number of seconds to sleep).
425 Must only be used along with sleep_interval.
426 Actual sleep time will be a random float from range
427 [sleep_interval; max_sleep_interval].
1cf376f5 428 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
429 listformats: Print an overview of available video formats and exit.
430 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
431 match_filter: A function that gets called with the info_dict of
432 every video.
433 If it returns a message, the video is ignored.
434 If it returns None, the video is downloaded.
435 match_filter_func in utils.py is one example for this.
7e5db8c9 436 no_color: Do not emit color codes in output.
0a840f58 437 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 438 HTTP header
0a840f58 439 geo_bypass_country:
773f291d
S
440 Two-letter ISO 3166-2 country code that will be used for
441 explicit geographic restriction bypassing via faking
504f20dd 442 X-Forwarded-For HTTP header
5f95927a
S
443 geo_bypass_ip_block:
444 IP range in CIDR notation that will be used similarly to
504f20dd 445 geo_bypass_country
71b640cc 446
85729c51 447 The following options determine which downloader is picked:
52a8a1e1 448 external_downloader: A dictionary of protocol keys and the executable of the
449 external downloader to use for it. The allowed protocols
450 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
451 Set the value to 'native' to use the native downloader
452 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
453 or {'m3u8': 'ffmpeg'} instead.
454 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
455 if True, otherwise use ffmpeg/avconv if False, otherwise
456 use downloader suggested by extractor if None.
53ed7066 457 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 458 The following options do not work when used through the API:
b5ae35ee 459 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 460 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 461 Refer __init__.py for their implementation
819e0531 462 progress_template: Dictionary of templates for progress outputs.
463 Allowed keys are 'download', 'postprocess',
464 'download-title' (console title) and 'postprocess-title'.
465 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 466
8222d8de 467 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 468 the downloader (see yt_dlp/downloader/common.py):
51d9739f 469 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
470 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
471 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 472 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
473
474 The following options are used by the post processors:
d4a24f40 475 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 476 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
43820c03 479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
485 can also be used
e409895f 486
487 The following options are used by the extractors:
62bff2c1 488 extractor_retries: Number of times to retry for known errors
489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 490 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 491 discontinuities such as ad breaks (default: False)
5d3a0e79 492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
494 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 496 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
497 If True (default), DASH manifests and related
62bff2c1 498 data will be downloaded and processed by extractor.
499 You can reduce network I/O by disabling it if you don't
500 care about DASH. (only for youtube)
5d3a0e79 501 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
502 If True (default), HLS manifests and related
62bff2c1 503 data will be downloaded and processed by extractor.
504 You can reduce network I/O by disabling it if you don't
505 care about HLS. (only for youtube)
8222d8de
JMF
506 """
507
c9969434
S
508 _NUMERIC_FIELDS = set((
509 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 510 'timestamp', 'release_timestamp',
c9969434
S
511 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
512 'average_rating', 'comment_count', 'age_limit',
513 'start_time', 'end_time',
514 'chapter_number', 'season_number', 'episode_number',
515 'track_number', 'disc_number', 'release_year',
c9969434
S
516 ))
517
6db9c4d5 518 _format_fields = {
519 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 520 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
6db9c4d5 521 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
522 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
523 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
524 'preference', 'language', 'language_preference', 'quality', 'source_preference',
525 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
526 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
527 }
48ee10ee 528 _format_selection_exts = {
529 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
530 'video': {'mp4', 'flv', 'webm', '3gp'},
531 'storyboards': {'mhtml'},
532 }
533
3511266b 534 def __init__(self, params=None, auto_init=True):
883d4b1e 535 """Create a FileDownloader object with the given options.
536 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 537 Set to 'no_verbose_header' to not print the header
883d4b1e 538 """
e9f9a10f
JMF
539 if params is None:
540 params = {}
592b7485 541 self.params = params
8b7491c8 542 self._ies = {}
56c73665 543 self._ies_instances = {}
1e43a6f7 544 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 545 self._printed_messages = set()
1cf376f5 546 self._first_webpage_request = True
ab8e5e51 547 self._post_hooks = []
933605d7 548 self._progress_hooks = []
819e0531 549 self._postprocessor_hooks = []
8222d8de
JMF
550 self._download_retcode = 0
551 self._num_downloads = 0
9c906919 552 self._num_videos = 0
592b7485 553 self._playlist_level = 0
554 self._playlist_urls = set()
a0e07d31 555 self.cache = Cache(self)
34308b30 556
819e0531 557 windows_enable_vt_mode()
cf4f42cb 558 self._out_files = {
559 'error': sys.stderr,
560 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
561 'console': None if compat_os_name == 'nt' else next(
562 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
563 }
564 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 565 self._allow_colors = {
cf4f42cb 566 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
567 for type_ in ('screen', 'error')
ec11a9f4 568 }
819e0531 569
a61f4b28 570 if sys.version_info < (3, 6):
571 self.report_warning(
0181adef 572 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 573
88acdbc2 574 if self.params.get('allow_unplayable_formats'):
575 self.report_warning(
ec11a9f4 576 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 577 'This is a developer option intended for debugging. \n'
578 ' If you experience any issues while using this option, '
ec11a9f4 579 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 580
be5df5ee
S
581 def check_deprecated(param, option, suggestion):
582 if self.params.get(param) is not None:
53ed7066 583 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
584 return True
585 return False
586
587 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
588 if self.params.get('geo_verification_proxy') is None:
589 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
590
0d1bb027 591 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
592 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 593 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 594
49a57e70 595 for msg in self.params.get('_warnings', []):
0d1bb027 596 self.report_warning(msg)
ee8dd27a 597 for msg in self.params.get('_deprecation_warnings', []):
598 self.deprecation_warning(msg)
0d1bb027 599
ec11a9f4 600 if 'list-formats' in self.params.get('compat_opts', []):
601 self.params['listformats_table'] = False
602
b5ae35ee 603 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 604 # nooverwrites was unnecessarily changed to overwrites
605 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
606 # This ensures compatibility with both keys
607 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 608 elif self.params.get('overwrites') is None:
609 self.params.pop('overwrites', None)
b868936c 610 else:
611 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 612
455a15e2 613 self.params.setdefault('forceprint', {})
614 self.params.setdefault('print_to_file', {})
bb66c247 615
616 # Compatibility with older syntax
ca30f449 617 if not isinstance(params['forceprint'], dict):
455a15e2 618 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 619
455a15e2 620 if self.params.get('bidi_workaround', False):
1c088fa8
PH
621 try:
622 import pty
623 master, slave = pty.openpty()
003c69a8 624 width = compat_get_terminal_size().columns
1c088fa8
PH
625 if width is None:
626 width_args = []
627 else:
628 width_args = ['-w', str(width)]
5d681e96 629 sp_kwargs = dict(
1c088fa8
PH
630 stdin=subprocess.PIPE,
631 stdout=slave,
cf4f42cb 632 stderr=self._out_files['error'])
5d681e96 633 try:
d3c93ec2 634 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 635 except OSError:
d3c93ec2 636 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 637 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 638 except OSError as ose:
66e7ace1 639 if ose.errno == errno.ENOENT:
49a57e70 640 self.report_warning(
641 'Could not find fribidi executable, ignoring --bidi-workaround. '
642 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
643 else:
644 raise
0783b09b 645
97ec5bc5 646 if auto_init:
647 if auto_init != 'no_verbose_header':
648 self.print_debug_header()
649 self.add_default_info_extractors()
650
3089bc74
S
651 if (sys.platform != 'win32'
652 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 653 and not self.params.get('restrictfilenames', False)):
e9137224 654 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 655 self.report_warning(
6febd1c1 656 'Assuming --restrict-filenames since file system encoding '
1b725173 657 'cannot encode all characters. '
6febd1c1 658 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 659 self.params['restrictfilenames'] = True
34308b30 660
de6000d9 661 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 662
187986a8 663 # Creating format selector here allows us to catch syntax errors before the extraction
664 self.format_selector = (
fa9f30b8 665 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 666 else self.params['format'] if callable(self.params['format'])
187986a8 667 else self.build_format_selector(self.params['format']))
668
8b7539d2 669 # Set http_headers defaults according to std_headers
670 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
671
013b50b7 672 hooks = {
673 'post_hooks': self.add_post_hook,
674 'progress_hooks': self.add_progress_hook,
675 'postprocessor_hooks': self.add_postprocessor_hook,
676 }
677 for opt, fn in hooks.items():
678 for ph in self.params.get(opt, []):
679 fn(ph)
71b640cc 680
5bfc8bee 681 for pp_def_raw in self.params.get('postprocessors', []):
682 pp_def = dict(pp_def_raw)
683 when = pp_def.pop('when', 'post_process')
684 self.add_post_processor(
685 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
686 when=when)
687
97ec5bc5 688 self._setup_opener()
51fb4995
YCH
689 register_socks_protocols()
690
ed39cac5 691 def preload_download_archive(fn):
692 """Preload the archive, if any is specified"""
693 if fn is None:
694 return False
49a57e70 695 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 696 try:
697 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
698 for line in archive_file:
699 self.archive.add(line.strip())
700 except IOError as ioe:
701 if ioe.errno != errno.ENOENT:
702 raise
703 return False
704 return True
705
706 self.archive = set()
707 preload_download_archive(self.params.get('download_archive'))
708
7d4111ed
PH
709 def warn_if_short_id(self, argv):
710 # short YouTube ID starting with dash?
711 idxs = [
712 i for i, a in enumerate(argv)
713 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
714 if idxs:
715 correct_argv = (
7a5c1cfe 716 ['yt-dlp']
3089bc74
S
717 + [a for i, a in enumerate(argv) if i not in idxs]
718 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
719 )
720 self.report_warning(
721 'Long argument string detected. '
49a57e70 722 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
723 args_to_str(correct_argv))
724
8222d8de
JMF
725 def add_info_extractor(self, ie):
726 """Add an InfoExtractor object to the end of the list."""
8b7491c8 727 ie_key = ie.ie_key()
728 self._ies[ie_key] = ie
e52d7f85 729 if not isinstance(ie, type):
8b7491c8 730 self._ies_instances[ie_key] = ie
e52d7f85 731 ie.set_downloader(self)
8222d8de 732
8b7491c8 733 def _get_info_extractor_class(self, ie_key):
734 ie = self._ies.get(ie_key)
735 if ie is None:
736 ie = get_info_extractor(ie_key)
737 self.add_info_extractor(ie)
738 return ie
739
56c73665
JMF
740 def get_info_extractor(self, ie_key):
741 """
742 Get an instance of an IE with name ie_key, it will try to get one from
743 the _ies list, if there's no instance it will create a new one and add
744 it to the extractor list.
745 """
746 ie = self._ies_instances.get(ie_key)
747 if ie is None:
748 ie = get_info_extractor(ie_key)()
749 self.add_info_extractor(ie)
750 return ie
751
023fa8c4
JMF
752 def add_default_info_extractors(self):
753 """
754 Add the InfoExtractors returned by gen_extractors to the end of the list
755 """
e52d7f85 756 for ie in gen_extractor_classes():
023fa8c4
JMF
757 self.add_info_extractor(ie)
758
56d868db 759 def add_post_processor(self, pp, when='post_process'):
8222d8de 760 """Add a PostProcessor object to the end of the chain."""
5bfa4862 761 self._pps[when].append(pp)
8222d8de
JMF
762 pp.set_downloader(self)
763
ab8e5e51
AM
764 def add_post_hook(self, ph):
765 """Add the post hook"""
766 self._post_hooks.append(ph)
767
933605d7 768 def add_progress_hook(self, ph):
819e0531 769 """Add the download progress hook"""
933605d7 770 self._progress_hooks.append(ph)
8ab470f1 771
819e0531 772 def add_postprocessor_hook(self, ph):
773 """Add the postprocessing progress hook"""
774 self._postprocessor_hooks.append(ph)
5bfc8bee 775 for pps in self._pps.values():
776 for pp in pps:
777 pp.add_progress_hook(ph)
819e0531 778
1c088fa8 779 def _bidi_workaround(self, message):
5d681e96 780 if not hasattr(self, '_output_channel'):
1c088fa8
PH
781 return message
782
5d681e96 783 assert hasattr(self, '_output_process')
11b85ce6 784 assert isinstance(message, compat_str)
6febd1c1
PH
785 line_count = message.count('\n') + 1
786 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 787 self._output_process.stdin.flush()
6febd1c1 788 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 789 for _ in range(line_count))
6febd1c1 790 return res[:-len('\n')]
1c088fa8 791
b35496d8 792 def _write_string(self, message, out=None, only_once=False):
793 if only_once:
794 if message in self._printed_messages:
795 return
796 self._printed_messages.add(message)
797 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 798
cf4f42cb 799 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 800 """Print message to stdout"""
cf4f42cb 801 if quiet is not None:
ae6a1b95 802 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 803 self._write_string(
804 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
805 self._out_files['print'])
806
807 def to_screen(self, message, skip_eol=False, quiet=None):
808 """Print message to screen if not in quiet mode"""
8bf9319e 809 if self.params.get('logger'):
43afe285 810 self.params['logger'].debug(message)
cf4f42cb 811 return
812 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
813 return
814 self._write_string(
815 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
816 self._out_files['screen'])
8222d8de 817
b35496d8 818 def to_stderr(self, message, only_once=False):
0760b0a7 819 """Print message to stderr"""
11b85ce6 820 assert isinstance(message, compat_str)
8bf9319e 821 if self.params.get('logger'):
43afe285
IB
822 self.params['logger'].error(message)
823 else:
cf4f42cb 824 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
825
826 def _send_console_code(self, code):
827 if compat_os_name == 'nt' or not self._out_files['console']:
828 return
829 self._write_string(code, self._out_files['console'])
8222d8de 830
1e5b9a95
PH
831 def to_console_title(self, message):
832 if not self.params.get('consoletitle', False):
833 return
3efb96a6 834 message = remove_terminal_sequences(message)
4bede0d8
C
835 if compat_os_name == 'nt':
836 if ctypes.windll.kernel32.GetConsoleWindow():
837 # c_wchar_p() might not be necessary if `message` is
838 # already of type unicode()
839 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 840 else:
841 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 842
bdde425c 843 def save_console_title(self):
cf4f42cb 844 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 845 return
592b7485 846 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
847
848 def restore_console_title(self):
cf4f42cb 849 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 850 return
592b7485 851 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
852
853 def __enter__(self):
854 self.save_console_title()
855 return self
856
857 def __exit__(self, *args):
858 self.restore_console_title()
f89197d7 859
dca08720 860 if self.params.get('cookiefile') is not None:
1bab3437 861 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 862
fa9f30b8 863 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
864 """Determine action to take when a download problem appears.
865
866 Depending on if the downloader has been configured to ignore
867 download errors or not, this method may throw an exception or
868 not when errors are found, after printing the message.
869
fa9f30b8 870 @param tb If given, is additional traceback information
871 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
872 """
873 if message is not None:
874 self.to_stderr(message)
875 if self.params.get('verbose'):
876 if tb is None:
877 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 878 tb = ''
8222d8de 879 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 880 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 881 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
882 else:
883 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 884 tb = ''.join(tb_data)
c19bc311 885 if tb:
886 self.to_stderr(tb)
fa9f30b8 887 if not is_error:
888 return
b1940459 889 if not self.params.get('ignoreerrors'):
8222d8de
JMF
890 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
891 exc_info = sys.exc_info()[1].exc_info
892 else:
893 exc_info = sys.exc_info()
894 raise DownloadError(message, exc_info)
895 self._download_retcode = 1
896
ec11a9f4 897 class Styles(Enum):
898 HEADERS = 'yellow'
f304da8a 899 EMPHASIS = 'light blue'
ec11a9f4 900 ID = 'green'
901 DELIM = 'blue'
902 ERROR = 'red'
903 WARNING = 'yellow'
ff51ed58 904 SUPPRESS = 'light black'
ec11a9f4 905
7578d77d 906 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 907 if test_encoding:
908 original_text = text
5c104538 909 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
910 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 911 text = text.encode(encoding, 'ignore').decode(encoding)
912 if fallback is not None and text != original_text:
913 text = fallback
914 if isinstance(f, self.Styles):
f304da8a 915 f = f.value
7578d77d 916 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 917
918 def _format_screen(self, *args, **kwargs):
7578d77d 919 return self._format_text(
cf4f42cb 920 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 921
922 def _format_err(self, *args, **kwargs):
7578d77d 923 return self._format_text(
cf4f42cb 924 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 925
c84aeac6 926 def report_warning(self, message, only_once=False):
8222d8de
JMF
927 '''
928 Print the message to stderr, it will be prefixed with 'WARNING:'
929 If stderr is a tty file the 'WARNING:' will be colored
930 '''
6d07ce01
JMF
931 if self.params.get('logger') is not None:
932 self.params['logger'].warning(message)
8222d8de 933 else:
ad8915b7
PH
934 if self.params.get('no_warnings'):
935 return
ec11a9f4 936 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 937
ee8dd27a 938 def deprecation_warning(self, message):
939 if self.params.get('logger') is not None:
a44ca5a4 940 self.params['logger'].warning(f'DeprecationWarning: {message}')
ee8dd27a 941 else:
942 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
943
fa9f30b8 944 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
945 '''
946 Do the same as trouble, but prefixes the message with 'ERROR:', colored
947 in red if stderr is a tty file.
948 '''
fa9f30b8 949 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 950
b35496d8 951 def write_debug(self, message, only_once=False):
0760b0a7 952 '''Log debug message or Print message to stderr'''
953 if not self.params.get('verbose', False):
954 return
955 message = '[debug] %s' % message
956 if self.params.get('logger'):
957 self.params['logger'].debug(message)
958 else:
b35496d8 959 self.to_stderr(message, only_once)
0760b0a7 960
8222d8de
JMF
961 def report_file_already_downloaded(self, file_name):
962 """Report file has already been fully downloaded."""
963 try:
6febd1c1 964 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 965 except UnicodeEncodeError:
6febd1c1 966 self.to_screen('[download] The file has already been downloaded')
8222d8de 967
0c3d0f51 968 def report_file_delete(self, file_name):
969 """Report that existing file will be deleted."""
970 try:
c25228e5 971 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 972 except UnicodeEncodeError:
c25228e5 973 self.to_screen('Deleting existing file')
0c3d0f51 974
319b6059 975 def raise_no_formats(self, info, forced=False, *, msg=None):
1151c407 976 has_drm = info.get('__has_drm')
319b6059 977 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
978 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
979 if forced or not ignored:
1151c407 980 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 981 expected=has_drm or ignored or expected)
88acdbc2 982 else:
983 self.report_warning(msg)
984
de6000d9 985 def parse_outtmpl(self):
986 outtmpl_dict = self.params.get('outtmpl', {})
987 if not isinstance(outtmpl_dict, dict):
988 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 989 # Remove spaces in the default template
990 if self.params.get('restrictfilenames'):
991 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
992 else:
993 sanitize = lambda x: x
de6000d9 994 outtmpl_dict.update({
71ce444a 995 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 996 if outtmpl_dict.get(k) is None})
de6000d9 997 for key, val in outtmpl_dict.items():
998 if isinstance(val, bytes):
999 self.report_warning(
1000 'Parameter outtmpl is bytes, but should be a unicode string. '
1001 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
1002 return outtmpl_dict
1003
21cd8fae 1004 def get_output_path(self, dir_type='', filename=None):
1005 paths = self.params.get('paths', {})
1006 assert isinstance(paths, dict)
1007 path = os.path.join(
1008 expand_path(paths.get('home', '').strip()),
1009 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1010 filename or '')
1011
1012 # Temporary fix for #4787
1013 # 'Treat' all problem characters by passing filename through preferredencoding
1014 # to workaround encoding issues with subprocess on python2 @ Windows
1015 if sys.version_info < (3, 0) and sys.platform == 'win32':
1016 path = encodeFilename(path, True).decode(preferredencoding())
1017 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1018
76a264ac 1019 @staticmethod
901130bb 1020 def _outtmpl_expandpath(outtmpl):
1021 # expand_path translates '%%' into '%' and '$$' into '$'
1022 # correspondingly that is not what we want since we need to keep
1023 # '%%' intact for template dict substitution step. Working around
1024 # with boundary-alike separator hack.
1025 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1026 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1027
1028 # outtmpl should be expand_path'ed before template dict substitution
1029 # because meta fields may contain env variables we don't want to
1030 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1031 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1032 return expand_path(outtmpl).replace(sep, '')
1033
1034 @staticmethod
1035 def escape_outtmpl(outtmpl):
1036 ''' Escape any remaining strings like %s, %abc% etc. '''
1037 return re.sub(
1038 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1039 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1040 outtmpl)
1041
1042 @classmethod
1043 def validate_outtmpl(cls, outtmpl):
76a264ac 1044 ''' @return None or Exception object '''
7d1eb38a 1045 outtmpl = re.sub(
37893bb0 1046 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1047 lambda mobj: f'{mobj.group(0)[:-1]}s',
1048 cls._outtmpl_expandpath(outtmpl))
76a264ac 1049 try:
7d1eb38a 1050 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1051 return None
1052 except ValueError as err:
1053 return err
1054
03b4de72 1055 @staticmethod
1056 def _copy_infodict(info_dict):
1057 info_dict = dict(info_dict)
09b49e1f 1058 info_dict.pop('__postprocessors', None)
03b4de72 1059 return info_dict
1060
e0fd9573 1061 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1062 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1063 @param sanitize Whether to sanitize the output as a filename.
1064 For backward compatibility, a function can also be passed
1065 """
1066
6e84b215 1067 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1068
03b4de72 1069 info_dict = self._copy_infodict(info_dict)
752cda38 1070 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1071 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1072 if info_dict.get('duration', None) is not None
1073 else None)
752cda38 1074 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
9c906919 1075 info_dict['video_autonumber'] = self._num_videos
752cda38 1076 if info_dict.get('resolution') is None:
1077 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1078
e6f21b3d 1079 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1080 # of %(field)s to %(field)0Nd for backward compatibility
1081 field_size_compat_map = {
ec11a9f4 1082 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1083 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1084 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1085 }
752cda38 1086
385a27fa 1087 TMPL_DICT = {}
37893bb0 1088 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1089 MATH_FUNCTIONS = {
1090 '+': float.__add__,
1091 '-': float.__sub__,
1092 }
e625be0d 1093 # Field is of the form key1.key2...
1094 # where keys (except first) can be string, int or slice
2b8a2973 1095 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1096 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1097 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1098 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1099 (?P<negate>-)?
385a27fa 1100 (?P<fields>{field})
1101 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1102 (?:>(?P<strf_format>.+?))?
34baa9fd 1103 (?P<remaining>
1104 (?P<alternate>(?<!\\),[^|&)]+)?
1105 (?:&(?P<replacement>.*?))?
1106 (?:\|(?P<default>.*?))?
1107 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1108
2b8a2973 1109 def _traverse_infodict(k):
1110 k = k.split('.')
1111 if k[0] == '':
1112 k.pop(0)
1113 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1114
752cda38 1115 def get_value(mdict):
1116 # Object traversal
2b8a2973 1117 value = _traverse_infodict(mdict['fields'])
752cda38 1118 # Negative
1119 if mdict['negate']:
1120 value = float_or_none(value)
1121 if value is not None:
1122 value *= -1
1123 # Do maths
385a27fa 1124 offset_key = mdict['maths']
1125 if offset_key:
752cda38 1126 value = float_or_none(value)
1127 operator = None
385a27fa 1128 while offset_key:
1129 item = re.match(
1130 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1131 offset_key).group(0)
1132 offset_key = offset_key[len(item):]
1133 if operator is None:
752cda38 1134 operator = MATH_FUNCTIONS[item]
385a27fa 1135 continue
1136 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1137 offset = float_or_none(item)
1138 if offset is None:
2b8a2973 1139 offset = float_or_none(_traverse_infodict(item))
385a27fa 1140 try:
1141 value = operator(value, multiplier * offset)
1142 except (TypeError, ZeroDivisionError):
1143 return None
1144 operator = None
752cda38 1145 # Datetime formatting
1146 if mdict['strf_format']:
7c37ff97 1147 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1148
1149 return value
1150
b868936c 1151 na = self.params.get('outtmpl_na_placeholder', 'NA')
1152
e0fd9573 1153 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1154 return sanitize_filename(str(value), restricted=restricted, is_id=(
1155 bool(re.search(r'(^|[_.])id(\.|$)', key))
1156 if 'filename-sanitization' in self.params.get('compat_opts', [])
1157 else NO_DEFAULT))
e0fd9573 1158
1159 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1160 sanitize = bool(sanitize)
1161
6e84b215 1162 def _dumpjson_default(obj):
1163 if isinstance(obj, (set, LazyList)):
1164 return list(obj)
adbc4ec4 1165 return repr(obj)
6e84b215 1166
752cda38 1167 def create_key(outer_mobj):
1168 if not outer_mobj.group('has_key'):
b836dc94 1169 return outer_mobj.group(0)
752cda38 1170 key = outer_mobj.group('key')
752cda38 1171 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1172 initial_field = mobj.group('fields') if mobj else ''
e978789f 1173 value, replacement, default = None, None, na
7c37ff97 1174 while mobj:
e625be0d 1175 mobj = mobj.groupdict()
7c37ff97 1176 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1177 value = get_value(mobj)
e978789f 1178 replacement = mobj['replacement']
7c37ff97 1179 if value is None and mobj['alternate']:
34baa9fd 1180 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1181 else:
1182 break
752cda38 1183
b868936c 1184 fmt = outer_mobj.group('format')
752cda38 1185 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1186 fmt = '0{:d}d'.format(field_size_compat_map[key])
1187
e978789f 1188 value = default if value is None else value if replacement is None else replacement
752cda38 1189
4476d2c7 1190 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1191 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1192 if fmt[-1] == 'l': # list
4476d2c7 1193 delim = '\n' if '#' in flags else ', '
9e907ebd 1194 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1195 elif fmt[-1] == 'j': # json
4476d2c7 1196 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1197 elif fmt[-1] == 'q': # quoted
4476d2c7 1198 value = map(str, variadic(value) if '#' in flags else [value])
1199 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1200 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1201 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1202 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1203 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1204 value, fmt = unicodedata.normalize(
1205 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1206 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1207 value), str_fmt
e0fd9573 1208 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1209 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1210 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1211 factor=1024 if '#' in flags else 1000)
37893bb0 1212 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1213 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1214 elif fmt[-1] == 'c':
524e2e4f 1215 if value:
1216 value = str(value)[0]
76a264ac 1217 else:
524e2e4f 1218 fmt = str_fmt
76a264ac 1219 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1220 value = float_or_none(value)
752cda38 1221 if value is None:
1222 value, fmt = default, 's'
901130bb 1223
752cda38 1224 if sanitize:
1225 if fmt[-1] == 'r':
1226 # If value is an object, sanitize might convert it to a string
1227 # So we convert it to repr first
7d1eb38a 1228 value, fmt = repr(value), str_fmt
639f1cea 1229 if fmt[-1] in 'csr':
e0fd9573 1230 value = sanitizer(initial_field, value)
901130bb 1231
b868936c 1232 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1233 TMPL_DICT[key] = value
b868936c 1234 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1235
385a27fa 1236 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1237
819e0531 1238 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1239 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1240 return self.escape_outtmpl(outtmpl) % info_dict
1241
5127e92a 1242 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1243 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1244 if outtmpl is None:
1245 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
8222d8de 1246 try:
5127e92a 1247 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1248 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1249 if not filename:
1250 return None
15da37c7 1251
5127e92a 1252 if tmpl_type in ('', 'temp'):
6a0546e3 1253 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1254 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1255 filename = replace_extension(filename, ext, final_ext)
5127e92a 1256 elif tmpl_type:
6a0546e3 1257 force_ext = OUTTMPL_TYPES[tmpl_type]
1258 if force_ext:
1259 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1260
bdc3fd2f
U
1261 # https://github.com/blackjack4494/youtube-dlc/issues/85
1262 trim_file_name = self.params.get('trim_file_name', False)
1263 if trim_file_name:
5c22c63d 1264 no_ext, *ext = filename.rsplit('.', 2)
1265 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1266
0202b52a 1267 return filename
8222d8de 1268 except ValueError as err:
6febd1c1 1269 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1270 return None
1271
5127e92a 1272 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1273 """Generate the output filename"""
1274 if outtmpl:
1275 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1276 dir_type = None
1277 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1278 if not filename and dir_type not in ('', 'temp'):
1279 return ''
de6000d9 1280
c84aeac6 1281 if warn:
21cd8fae 1282 if not self.params.get('paths'):
de6000d9 1283 pass
1284 elif filename == '-':
c84aeac6 1285 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1286 elif os.path.isabs(filename):
c84aeac6 1287 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1288 if filename == '-' or not filename:
1289 return filename
1290
21cd8fae 1291 return self.get_output_path(dir_type, filename)
0202b52a 1292
120fe513 1293 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1294 """ Returns None if the file should be downloaded """
8222d8de 1295
c77495e3 1296 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1297
8b0d7497 1298 def check_filter():
8b0d7497 1299 if 'title' in info_dict:
1300 # This can happen when we're just evaluating the playlist
1301 title = info_dict['title']
1302 matchtitle = self.params.get('matchtitle', False)
1303 if matchtitle:
1304 if not re.search(matchtitle, title, re.IGNORECASE):
1305 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1306 rejecttitle = self.params.get('rejecttitle', False)
1307 if rejecttitle:
1308 if re.search(rejecttitle, title, re.IGNORECASE):
1309 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1310 date = info_dict.get('upload_date')
1311 if date is not None:
1312 dateRange = self.params.get('daterange', DateRange())
1313 if date not in dateRange:
1314 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1315 view_count = info_dict.get('view_count')
1316 if view_count is not None:
1317 min_views = self.params.get('min_views')
1318 if min_views is not None and view_count < min_views:
1319 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1320 max_views = self.params.get('max_views')
1321 if max_views is not None and view_count > max_views:
1322 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1323 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1324 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1325
8f18aca8 1326 match_filter = self.params.get('match_filter')
1327 if match_filter is not None:
1328 try:
1329 ret = match_filter(info_dict, incomplete=incomplete)
1330 except TypeError:
1331 # For backward compatibility
1332 ret = None if incomplete else match_filter(info_dict)
1333 if ret is not None:
1334 return ret
8b0d7497 1335 return None
1336
c77495e3 1337 if self.in_download_archive(info_dict):
1338 reason = '%s has already been recorded in the archive' % video_title
1339 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1340 else:
1341 reason = check_filter()
1342 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1343 if reason is not None:
120fe513 1344 if not silent:
1345 self.to_screen('[download] ' + reason)
c77495e3 1346 if self.params.get(break_opt, False):
1347 raise break_err()
8b0d7497 1348 return reason
fe7e0c98 1349
b6c45014
JMF
1350 @staticmethod
1351 def add_extra_info(info_dict, extra_info):
1352 '''Set the keys from extra_info in info dict if they are missing'''
1353 for key, value in extra_info.items():
1354 info_dict.setdefault(key, value)
1355
409e1828 1356 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1357 process=True, force_generic_extractor=False):
41d1cca3 1358 """
1359 Return a list with a dictionary for each video extracted.
1360
1361 Arguments:
1362 url -- URL to extract
1363
1364 Keyword arguments:
1365 download -- whether to download videos during extraction
1366 ie_key -- extractor key hint
1367 extra_info -- dictionary containing the extra values to add to each result
1368 process -- whether to resolve all unresolved references (URLs, playlist items),
1369 must be True for download to work.
1370 force_generic_extractor -- force using the generic extractor
1371 """
fe7e0c98 1372
409e1828 1373 if extra_info is None:
1374 extra_info = {}
1375
61aa5ba3 1376 if not ie_key and force_generic_extractor:
d22dec74
S
1377 ie_key = 'Generic'
1378
8222d8de 1379 if ie_key:
8b7491c8 1380 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1381 else:
1382 ies = self._ies
1383
8b7491c8 1384 for ie_key, ie in ies.items():
8222d8de
JMF
1385 if not ie.suitable(url):
1386 continue
1387
1388 if not ie.working():
6febd1c1
PH
1389 self.report_warning('The program functionality for this site has been marked as broken, '
1390 'and will probably not work.')
8222d8de 1391
1151c407 1392 temp_id = ie.get_temp_id(url)
a0566bbf 1393 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1394 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1395 if self.params.get('break_on_existing', False):
1396 raise ExistingVideoReached()
a0566bbf 1397 break
8b7491c8 1398 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1399 else:
1400 self.report_error('no suitable InfoExtractor for URL %s' % url)
1401
8e5fecc8 1402 def __handle_extraction_exceptions(func):
b5ae35ee 1403 @functools.wraps(func)
a0566bbf 1404 def wrapper(self, *args, **kwargs):
6da22e7d 1405 while True:
1406 try:
1407 return func(self, *args, **kwargs)
1408 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1409 raise
6da22e7d 1410 except ReExtractInfo as e:
1411 if e.expected:
1412 self.to_screen(f'{e}; Re-extracting data')
1413 else:
1414 self.to_stderr('\r')
1415 self.report_warning(f'{e}; Re-extracting data')
1416 continue
1417 except GeoRestrictedError as e:
1418 msg = e.msg
1419 if e.countries:
1420 msg += '\nThis video is available in %s.' % ', '.join(
1421 map(ISO3166Utils.short2full, e.countries))
1422 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1423 self.report_error(msg)
1424 except ExtractorError as e: # An error we somewhat expected
1425 self.report_error(str(e), e.format_traceback())
1426 except Exception as e:
1427 if self.params.get('ignoreerrors'):
1428 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1429 else:
1430 raise
1431 break
a0566bbf 1432 return wrapper
1433
f2ebc5c7 1434 def _wait_for_video(self, ie_result):
1435 if (not self.params.get('wait_for_video')
1436 or ie_result.get('_type', 'video') != 'video'
1437 or ie_result.get('formats') or ie_result.get('url')):
1438 return
1439
1440 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1441 last_msg = ''
1442
1443 def progress(msg):
1444 nonlocal last_msg
1445 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1446 last_msg = msg
1447
1448 min_wait, max_wait = self.params.get('wait_for_video')
1449 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1450 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1451 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1452 self.report_warning('Release time of video is not known')
1453 elif (diff or 0) <= 0:
1454 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1455 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1456 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1457
1458 wait_till = time.time() + diff
1459 try:
1460 while True:
1461 diff = wait_till - time.time()
1462 if diff <= 0:
1463 progress('')
1464 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1465 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1466 time.sleep(1)
1467 except KeyboardInterrupt:
1468 progress('')
1469 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1470 except BaseException as e:
1471 if not isinstance(e, ReExtractInfo):
1472 self.to_screen('')
1473 raise
1474
a0566bbf 1475 @__handle_extraction_exceptions
58f197b7 1476 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1477 ie_result = ie.extract(url)
1478 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1479 return
1480 if isinstance(ie_result, list):
1481 # Backwards compatibility: old IE result format
1482 ie_result = {
1483 '_type': 'compat_list',
1484 'entries': ie_result,
1485 }
e37d0efb 1486 if extra_info.get('original_url'):
1487 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1488 self.add_default_extra_info(ie_result, ie, url)
1489 if process:
f2ebc5c7 1490 self._wait_for_video(ie_result)
a0566bbf 1491 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1492 else:
a0566bbf 1493 return ie_result
fe7e0c98 1494
ea38e55f 1495 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1496 if url is not None:
1497 self.add_extra_info(ie_result, {
1498 'webpage_url': url,
1499 'original_url': url,
57ebfca3 1500 })
1501 webpage_url = ie_result.get('webpage_url')
1502 if webpage_url:
1503 self.add_extra_info(ie_result, {
1504 'webpage_url_basename': url_basename(webpage_url),
1505 'webpage_url_domain': get_domain(webpage_url),
6033d980 1506 })
1507 if ie is not None:
1508 self.add_extra_info(ie_result, {
1509 'extractor': ie.IE_NAME,
1510 'extractor_key': ie.ie_key(),
1511 })
ea38e55f 1512
58adec46 1513 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1514 """
1515 Take the result of the ie(may be modified) and resolve all unresolved
1516 references (URLs, playlist items).
1517
1518 It will also download the videos if 'download'.
1519 Returns the resolved ie_result.
1520 """
58adec46 1521 if extra_info is None:
1522 extra_info = {}
e8ee972c
PH
1523 result_type = ie_result.get('_type', 'video')
1524
057a5206 1525 if result_type in ('url', 'url_transparent'):
134c6ea8 1526 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1527 if ie_result.get('original_url'):
1528 extra_info.setdefault('original_url', ie_result['original_url'])
1529
057a5206 1530 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1531 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1532 or extract_flat is True):
ecb54191 1533 info_copy = ie_result.copy()
6033d980 1534 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1535 if ie and not ie_result.get('id'):
4614bc22 1536 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1537 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1538 self.add_extra_info(info_copy, extra_info)
b5475f11 1539 info_copy, _ = self.pre_process(info_copy)
ecb54191 1540 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1541 if self.params.get('force_write_download_archive', False):
1542 self.record_download_archive(info_copy)
e8ee972c
PH
1543 return ie_result
1544
8222d8de 1545 if result_type == 'video':
b6c45014 1546 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1547 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1548 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1549 if additional_urls:
e9f4ccd1 1550 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1551 if isinstance(additional_urls, compat_str):
1552 additional_urls = [additional_urls]
1553 self.to_screen(
1554 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1555 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1556 ie_result['additional_entries'] = [
1557 self.extract_info(
b69fd25c 1558 url, download, extra_info=extra_info,
9c2b75b5 1559 force_generic_extractor=self.params.get('force_generic_extractor'))
1560 for url in additional_urls
1561 ]
1562 return ie_result
8222d8de
JMF
1563 elif result_type == 'url':
1564 # We have to add extra_info to the results because it may be
1565 # contained in a playlist
07cce701 1566 return self.extract_info(
1567 ie_result['url'], download,
1568 ie_key=ie_result.get('ie_key'),
1569 extra_info=extra_info)
7fc3fa05
PH
1570 elif result_type == 'url_transparent':
1571 # Use the information from the embedding page
1572 info = self.extract_info(
1573 ie_result['url'], ie_key=ie_result.get('ie_key'),
1574 extra_info=extra_info, download=False, process=False)
1575
1640eb09
S
1576 # extract_info may return None when ignoreerrors is enabled and
1577 # extraction failed with an error, don't crash and return early
1578 # in this case
1579 if not info:
1580 return info
1581
412c617d 1582 new_result = info.copy()
90137ca4 1583 new_result.update(filter_dict(ie_result, lambda k, v: (
1584 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
7fc3fa05 1585
0563f7ac
S
1586 # Extracted info may not be a video result (i.e.
1587 # info.get('_type', 'video') != video) but rather an url or
1588 # url_transparent. In such cases outer metadata (from ie_result)
1589 # should be propagated to inner one (info). For this to happen
1590 # _type of info should be overridden with url_transparent. This
067aa17e 1591 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1592 if new_result.get('_type') == 'url':
1593 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1594
1595 return self.process_ie_result(
1596 new_result, download=download, extra_info=extra_info)
40fcba5e 1597 elif result_type in ('playlist', 'multi_video'):
30a074c2 1598 # Protect from infinite recursion due to recursively nested playlists
1599 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1600 webpage_url = ie_result['webpage_url']
1601 if webpage_url in self._playlist_urls:
7e85e872 1602 self.to_screen(
30a074c2 1603 '[download] Skipping already downloaded playlist: %s'
1604 % ie_result.get('title') or ie_result.get('id'))
1605 return
7e85e872 1606
30a074c2 1607 self._playlist_level += 1
1608 self._playlist_urls.add(webpage_url)
03f83004 1609 self._fill_common_fields(ie_result, False)
bc516a3f 1610 self._sanitize_thumbnails(ie_result)
30a074c2 1611 try:
1612 return self.__process_playlist(ie_result, download)
1613 finally:
1614 self._playlist_level -= 1
1615 if not self._playlist_level:
1616 self._playlist_urls.clear()
8222d8de 1617 elif result_type == 'compat_list':
c9bf4114
PH
1618 self.report_warning(
1619 'Extractor %s returned a compat_list result. '
1620 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1621
8222d8de 1622 def _fixup(r):
b868936c 1623 self.add_extra_info(r, {
1624 'extractor': ie_result['extractor'],
1625 'webpage_url': ie_result['webpage_url'],
1626 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1627 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1628 'extractor_key': ie_result['extractor_key'],
1629 })
8222d8de
JMF
1630 return r
1631 ie_result['entries'] = [
b6c45014 1632 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1633 for r in ie_result['entries']
1634 ]
1635 return ie_result
1636 else:
1637 raise Exception('Invalid result type: %s' % result_type)
1638
e92caff5 1639 def _ensure_dir_exists(self, path):
1640 return make_dir(path, self.report_error)
1641
3b603dbd 1642 @staticmethod
1643 def _playlist_infodict(ie_result, **kwargs):
1644 return {
1645 **ie_result,
1646 'playlist': ie_result.get('title') or ie_result.get('id'),
1647 'playlist_id': ie_result.get('id'),
1648 'playlist_title': ie_result.get('title'),
1649 'playlist_uploader': ie_result.get('uploader'),
1650 'playlist_uploader_id': ie_result.get('uploader_id'),
1651 'playlist_index': 0,
1652 **kwargs,
1653 }
1654
30a074c2 1655 def __process_playlist(self, ie_result, download):
1656 # We process each entry in the playlist
1657 playlist = ie_result.get('title') or ie_result.get('id')
1658 self.to_screen('[download] Downloading playlist: %s' % playlist)
1659
498f5606 1660 if 'entries' not in ie_result:
aa9369a2 1661 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1662
1663 MissingEntry = object()
498f5606 1664 incomplete_entries = bool(ie_result.get('requested_entries'))
1665 if incomplete_entries:
bf5f605e 1666 def fill_missing_entries(entries, indices):
7c7f7161 1667 ret = [MissingEntry] * max(indices)
bf5f605e 1668 for i, entry in zip(indices, entries):
498f5606 1669 ret[i - 1] = entry
1670 return ret
1671 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1672
30a074c2 1673 playlist_results = []
1674
56a8fb4f 1675 playliststart = self.params.get('playliststart', 1)
30a074c2 1676 playlistend = self.params.get('playlistend')
1677 # For backwards compatibility, interpret -1 as whole list
1678 if playlistend == -1:
1679 playlistend = None
1680
1681 playlistitems_str = self.params.get('playlist_items')
1682 playlistitems = None
1683 if playlistitems_str is not None:
1684 def iter_playlistitems(format):
1685 for string_segment in format.split(','):
1686 if '-' in string_segment:
1687 start, end = string_segment.split('-')
1688 for item in range(int(start), int(end) + 1):
1689 yield int(item)
1690 else:
1691 yield int(string_segment)
1692 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1693
1694 ie_entries = ie_result['entries']
8e5fecc8 1695 if isinstance(ie_entries, list):
ed8d87f9 1696 playlist_count = len(ie_entries)
f0d785d3 1697 msg = f'Collected {playlist_count} videos; downloading %d of them'
1698 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1699
8e5fecc8 1700 def get_entry(i):
1701 return ie_entries[i - 1]
1702 else:
f0d785d3 1703 msg = 'Downloading %d videos'
c586f9e8 1704 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1705 ie_entries = LazyList(ie_entries)
d37707bd 1706 elif isinstance(ie_entries, InAdvancePagedList):
1707 if ie_entries._pagesize == 1:
1708 playlist_count = ie_entries._pagecount
8e5fecc8 1709
1710 def get_entry(i):
1711 return YoutubeDL.__handle_extraction_exceptions(
1712 lambda self, i: ie_entries[i - 1]
1713 )(self, i)
50fed816 1714
f0d785d3 1715 entries, broken = [], False
ff1c7fc9 1716 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1717 for i in items:
1718 if i == 0:
1719 continue
56a8fb4f 1720 if playlistitems is None and playlistend is not None and playlistend < i:
1721 break
1722 entry = None
1723 try:
50fed816 1724 entry = get_entry(i)
7c7f7161 1725 if entry is MissingEntry:
498f5606 1726 raise EntryNotInPlaylist()
56a8fb4f 1727 except (IndexError, EntryNotInPlaylist):
1728 if incomplete_entries:
aa9369a2 1729 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1730 elif not playlistitems:
1731 break
1732 entries.append(entry)
120fe513 1733 try:
1734 if entry is not None:
1735 self._match_entry(entry, incomplete=True, silent=True)
1736 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1737 broken = True
120fe513 1738 break
56a8fb4f 1739 ie_result['entries'] = entries
30a074c2 1740
56a8fb4f 1741 # Save playlist_index before re-ordering
1742 entries = [
9e598870 1743 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1744 for i, entry in enumerate(entries, 1)
1745 if entry is not None]
1746 n_entries = len(entries)
498f5606 1747
f0d785d3 1748 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1749 ie_result['playlist_count'] = n_entries
1750
e08a85d8 1751 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1752 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1753 ie_result['requested_entries'] = playlistitems
1754
e08a85d8 1755 _infojson_written = False
0bfc53d0 1756 write_playlist_files = self.params.get('allow_playlist_files', True)
1757 if write_playlist_files and self.params.get('list_thumbnails'):
1758 self.list_thumbnails(ie_result)
1759 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1760 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1761 _infojson_written = self._write_info_json(
1762 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1763 if _infojson_written is None:
80c03fa9 1764 return
1765 if self._write_description('playlist', ie_result,
1766 self.prepare_filename(ie_copy, 'pl_description')) is None:
1767 return
681de68e 1768 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1769 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1770
1771 if self.params.get('playlistreverse', False):
1772 entries = entries[::-1]
30a074c2 1773 if self.params.get('playlistrandom', False):
1774 random.shuffle(entries)
1775
1776 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1777
56a8fb4f 1778 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1779 failures = 0
1780 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1781 for i, entry_tuple in enumerate(entries, 1):
1782 playlist_index, entry = entry_tuple
81139999 1783 if 'playlist-index' in self.params.get('compat_opts', []):
1784 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1785 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1786 # This __x_forwarded_for_ip thing is a bit ugly but requires
1787 # minimal changes
1788 if x_forwarded_for:
1789 entry['__x_forwarded_for_ip'] = x_forwarded_for
1790 extra = {
1791 'n_entries': n_entries,
f59ae581 1792 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1793 'playlist_count': ie_result.get('playlist_count'),
71729754 1794 'playlist_index': playlist_index,
1795 'playlist_autonumber': i,
30a074c2 1796 'playlist': playlist,
1797 'playlist_id': ie_result.get('id'),
1798 'playlist_title': ie_result.get('title'),
1799 'playlist_uploader': ie_result.get('uploader'),
1800 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1801 'extractor': ie_result['extractor'],
1802 'webpage_url': ie_result['webpage_url'],
1803 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1804 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1805 'extractor_key': ie_result['extractor_key'],
1806 }
1807
1808 if self._match_entry(entry, incomplete=True) is not None:
1809 continue
1810
1811 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1812 if not entry_result:
1813 failures += 1
1814 if failures >= max_failures:
1815 self.report_error(
1816 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1817 break
30a074c2 1818 playlist_results.append(entry_result)
1819 ie_result['entries'] = playlist_results
e08a85d8 1820
1821 # Write the updated info to json
cb96c5be 1822 if _infojson_written is True and self._write_info_json(
e08a85d8 1823 'updated playlist', ie_result,
1824 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1825 return
ca30f449 1826
ed5835b4 1827 ie_result = self.run_all_pps('playlist', ie_result)
1828 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1829 return ie_result
1830
a0566bbf 1831 @__handle_extraction_exceptions
1832 def __process_iterable_entry(self, entry, download, extra_info):
1833 return self.process_ie_result(
1834 entry, download=download, extra_info=extra_info)
1835
67134eab
JMF
1836 def _build_format_filter(self, filter_spec):
1837 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1838
1839 OPERATORS = {
1840 '<': operator.lt,
1841 '<=': operator.le,
1842 '>': operator.gt,
1843 '>=': operator.ge,
1844 '=': operator.eq,
1845 '!=': operator.ne,
1846 }
67134eab 1847 operator_rex = re.compile(r'''(?x)\s*
187986a8 1848 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1849 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1850 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1851 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1852 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1853 if m:
1854 try:
1855 comparison_value = int(m.group('value'))
1856 except ValueError:
1857 comparison_value = parse_filesize(m.group('value'))
1858 if comparison_value is None:
1859 comparison_value = parse_filesize(m.group('value') + 'B')
1860 if comparison_value is None:
1861 raise ValueError(
1862 'Invalid value %r in format specification %r' % (
67134eab 1863 m.group('value'), filter_spec))
9ddb6925
S
1864 op = OPERATORS[m.group('op')]
1865
083c9df9 1866 if not m:
9ddb6925
S
1867 STR_OPERATORS = {
1868 '=': operator.eq,
10d33b34
YCH
1869 '^=': lambda attr, value: attr.startswith(value),
1870 '$=': lambda attr, value: attr.endswith(value),
1871 '*=': lambda attr, value: value in attr,
1ce9a3cb 1872 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1873 }
187986a8 1874 str_operator_rex = re.compile(r'''(?x)\s*
1875 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1876 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1877 (?P<quote>["'])?
1878 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1879 (?(quote)(?P=quote))\s*
9ddb6925 1880 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1881 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1882 if m:
1ce9a3cb
LF
1883 if m.group('op') == '~=':
1884 comparison_value = re.compile(m.group('value'))
1885 else:
1886 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1887 str_op = STR_OPERATORS[m.group('op')]
1888 if m.group('negation'):
e118a879 1889 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1890 else:
1891 op = str_op
083c9df9 1892
9ddb6925 1893 if not m:
187986a8 1894 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1895
1896 def _filter(f):
1897 actual_value = f.get(m.group('key'))
1898 if actual_value is None:
1899 return m.group('none_inclusive')
1900 return op(actual_value, comparison_value)
67134eab
JMF
1901 return _filter
1902
9f1a1c36 1903 def _check_formats(self, formats):
1904 for f in formats:
1905 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1906 path = self.get_output_path('temp')
1907 if not self._ensure_dir_exists(f'{path}/'):
1908 continue
1909 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1910 temp_file.close()
1911 try:
1912 success, _ = self.dl(temp_file.name, f, test=True)
1913 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1914 success = False
1915 finally:
1916 if os.path.exists(temp_file.name):
1917 try:
1918 os.remove(temp_file.name)
1919 except OSError:
1920 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1921 if success:
1922 yield f
1923 else:
1924 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1925
0017d9ad 1926 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1927
af0f7428
S
1928 def can_merge():
1929 merger = FFmpegMergerPP(self)
1930 return merger.available and merger.can_merge()
1931
91ebc640 1932 prefer_best = (
b7b04c78 1933 not self.params.get('simulate')
91ebc640 1934 and download
1935 and (
1936 not can_merge()
19807826 1937 or info_dict.get('is_live', False)
de6000d9 1938 or self.outtmpl_dict['default'] == '-'))
53ed7066 1939 compat = (
1940 prefer_best
1941 or self.params.get('allow_multiple_audio_streams', False)
1942 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1943
1944 return (
53ed7066 1945 'best/bestvideo+bestaudio' if prefer_best
1946 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1947 else 'bestvideo+bestaudio/best')
0017d9ad 1948
67134eab
JMF
1949 def build_format_selector(self, format_spec):
1950 def syntax_error(note, start):
1951 message = (
1952 'Invalid format specification: '
1953 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1954 return SyntaxError(message)
1955
1956 PICKFIRST = 'PICKFIRST'
1957 MERGE = 'MERGE'
1958 SINGLE = 'SINGLE'
0130afb7 1959 GROUP = 'GROUP'
67134eab
JMF
1960 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1961
91ebc640 1962 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1963 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1964
9f1a1c36 1965 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1966
67134eab
JMF
1967 def _parse_filter(tokens):
1968 filter_parts = []
1969 for type, string, start, _, _ in tokens:
1970 if type == tokenize.OP and string == ']':
1971 return ''.join(filter_parts)
1972 else:
1973 filter_parts.append(string)
1974
232541df 1975 def _remove_unused_ops(tokens):
17cc1534 1976 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1977 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1978 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1979 last_string, last_start, last_end, last_line = None, None, None, None
1980 for type, string, start, end, line in tokens:
1981 if type == tokenize.OP and string == '[':
1982 if last_string:
1983 yield tokenize.NAME, last_string, last_start, last_end, last_line
1984 last_string = None
1985 yield type, string, start, end, line
1986 # everything inside brackets will be handled by _parse_filter
1987 for type, string, start, end, line in tokens:
1988 yield type, string, start, end, line
1989 if type == tokenize.OP and string == ']':
1990 break
1991 elif type == tokenize.OP and string in ALLOWED_OPS:
1992 if last_string:
1993 yield tokenize.NAME, last_string, last_start, last_end, last_line
1994 last_string = None
1995 yield type, string, start, end, line
1996 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1997 if not last_string:
1998 last_string = string
1999 last_start = start
2000 last_end = end
2001 else:
2002 last_string += string
2003 if last_string:
2004 yield tokenize.NAME, last_string, last_start, last_end, last_line
2005
cf2ac6df 2006 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2007 selectors = []
2008 current_selector = None
2009 for type, string, start, _, _ in tokens:
2010 # ENCODING is only defined in python 3.x
2011 if type == getattr(tokenize, 'ENCODING', None):
2012 continue
2013 elif type in [tokenize.NAME, tokenize.NUMBER]:
2014 current_selector = FormatSelector(SINGLE, string, [])
2015 elif type == tokenize.OP:
cf2ac6df
JMF
2016 if string == ')':
2017 if not inside_group:
2018 # ')' will be handled by the parentheses group
2019 tokens.restore_last_token()
67134eab 2020 break
cf2ac6df 2021 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2022 tokens.restore_last_token()
2023 break
cf2ac6df
JMF
2024 elif inside_choice and string == ',':
2025 tokens.restore_last_token()
2026 break
2027 elif string == ',':
0a31a350
JMF
2028 if not current_selector:
2029 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2030 selectors.append(current_selector)
2031 current_selector = None
2032 elif string == '/':
d96d604e
JMF
2033 if not current_selector:
2034 raise syntax_error('"/" must follow a format selector', start)
67134eab 2035 first_choice = current_selector
cf2ac6df 2036 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2037 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2038 elif string == '[':
2039 if not current_selector:
2040 current_selector = FormatSelector(SINGLE, 'best', [])
2041 format_filter = _parse_filter(tokens)
2042 current_selector.filters.append(format_filter)
0130afb7
JMF
2043 elif string == '(':
2044 if current_selector:
2045 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2046 group = _parse_format_selection(tokens, inside_group=True)
2047 current_selector = FormatSelector(GROUP, group, [])
67134eab 2048 elif string == '+':
d03cfdce 2049 if not current_selector:
2050 raise syntax_error('Unexpected "+"', start)
2051 selector_1 = current_selector
2052 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2053 if not selector_2:
2054 raise syntax_error('Expected a selector', start)
2055 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
2056 else:
2057 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2058 elif type == tokenize.ENDMARKER:
2059 break
2060 if current_selector:
2061 selectors.append(current_selector)
2062 return selectors
2063
f8d4ad9a 2064 def _merge(formats_pair):
2065 format_1, format_2 = formats_pair
2066
2067 formats_info = []
2068 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2069 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2070
2071 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2072 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2073 for (i, fmt_info) in enumerate(formats_info):
551f9388 2074 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2075 formats_info.pop(i)
2076 continue
2077 for aud_vid in ['audio', 'video']:
f8d4ad9a 2078 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2079 if get_no_more[aud_vid]:
2080 formats_info.pop(i)
f5510afe 2081 break
f8d4ad9a 2082 get_no_more[aud_vid] = True
2083
2084 if len(formats_info) == 1:
2085 return formats_info[0]
2086
2087 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2088 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2089
2090 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2091 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2092
2093 output_ext = self.params.get('merge_output_format')
2094 if not output_ext:
2095 if the_only_video:
2096 output_ext = the_only_video['ext']
2097 elif the_only_audio and not video_fmts:
2098 output_ext = the_only_audio['ext']
2099 else:
2100 output_ext = 'mkv'
2101
975a0d0d 2102 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2103
f8d4ad9a 2104 new_dict = {
2105 'requested_formats': formats_info,
975a0d0d 2106 'format': '+'.join(filtered('format')),
2107 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2108 'ext': output_ext,
975a0d0d 2109 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2110 'language': '+'.join(orderedSet(filtered('language'))) or None,
2111 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2112 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2113 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2114 }
2115
2116 if the_only_video:
2117 new_dict.update({
2118 'width': the_only_video.get('width'),
2119 'height': the_only_video.get('height'),
2120 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2121 'fps': the_only_video.get('fps'),
49a57e70 2122 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2123 'vcodec': the_only_video.get('vcodec'),
2124 'vbr': the_only_video.get('vbr'),
2125 'stretched_ratio': the_only_video.get('stretched_ratio'),
2126 })
2127
2128 if the_only_audio:
2129 new_dict.update({
2130 'acodec': the_only_audio.get('acodec'),
2131 'abr': the_only_audio.get('abr'),
975a0d0d 2132 'asr': the_only_audio.get('asr'),
f8d4ad9a 2133 })
2134
2135 return new_dict
2136
e8e73840 2137 def _check_formats(formats):
981052c9 2138 if not check_formats:
2139 yield from formats
b5ac45b1 2140 return
9f1a1c36 2141 yield from self._check_formats(formats)
e8e73840 2142
67134eab 2143 def _build_selector_function(selector):
909d24dd 2144 if isinstance(selector, list): # ,
67134eab
JMF
2145 fs = [_build_selector_function(s) for s in selector]
2146
317f7ab6 2147 def selector_function(ctx):
67134eab 2148 for f in fs:
981052c9 2149 yield from f(ctx)
67134eab 2150 return selector_function
909d24dd 2151
2152 elif selector.type == GROUP: # ()
0130afb7 2153 selector_function = _build_selector_function(selector.selector)
909d24dd 2154
2155 elif selector.type == PICKFIRST: # /
67134eab
JMF
2156 fs = [_build_selector_function(s) for s in selector.selector]
2157
317f7ab6 2158 def selector_function(ctx):
67134eab 2159 for f in fs:
317f7ab6 2160 picked_formats = list(f(ctx))
67134eab
JMF
2161 if picked_formats:
2162 return picked_formats
2163 return []
67134eab 2164
981052c9 2165 elif selector.type == MERGE: # +
2166 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2167
2168 def selector_function(ctx):
adbc4ec4 2169 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2170 yield _merge(pair)
2171
909d24dd 2172 elif selector.type == SINGLE: # atom
598d185d 2173 format_spec = selector.selector or 'best'
909d24dd 2174
f8d4ad9a 2175 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2176 if format_spec == 'all':
2177 def selector_function(ctx):
9222c381 2178 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2179 elif format_spec == 'mergeall':
2180 def selector_function(ctx):
316f2650 2181 formats = list(_check_formats(
2182 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2183 if not formats:
2184 return
921b76ca 2185 merged_format = formats[-1]
2186 for f in formats[-2::-1]:
f8d4ad9a 2187 merged_format = _merge((merged_format, f))
2188 yield merged_format
909d24dd 2189
2190 else:
85e801a9 2191 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2192 mobj = re.match(
2193 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2194 format_spec)
2195 if mobj is not None:
2196 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2197 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2198 format_type = (mobj.group('type') or [None])[0]
2199 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2200 format_modified = mobj.group('mod') is not None
909d24dd 2201
2202 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2203 _filter_f = (
eff63539 2204 (lambda f: f.get('%scodec' % format_type) != 'none')
2205 if format_type and format_modified # bv*, ba*, wv*, wa*
2206 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2207 if format_type # bv, ba, wv, wa
2208 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2209 if not format_modified # b, w
8326b00a 2210 else lambda f: True) # b*, w*
2211 filter_f = lambda f: _filter_f(f) and (
2212 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2213 else:
48ee10ee 2214 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2215 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2216 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2217 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2218 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2219 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2220 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2221 else:
b5ae35ee 2222 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2223
2224 def selector_function(ctx):
2225 formats = list(ctx['formats'])
909d24dd 2226 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2227 if not matches:
2228 if format_fallback and ctx['incomplete_formats']:
2229 # for extractors with incomplete formats (audio only (soundcloud)
2230 # or video only (imgur)) best/worst will fallback to
2231 # best/worst {video,audio}-only format
2232 matches = formats
2233 elif seperate_fallback and not ctx['has_merged_format']:
2234 # for compatibility with youtube-dl when there is no pre-merged format
2235 matches = list(filter(seperate_fallback, formats))
981052c9 2236 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2237 try:
e8e73840 2238 yield matches[format_idx - 1]
4abea8ca 2239 except LazyList.IndexError:
981052c9 2240 return
083c9df9 2241
67134eab 2242 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2243
317f7ab6 2244 def final_selector(ctx):
adbc4ec4 2245 ctx_copy = dict(ctx)
67134eab 2246 for _filter in filters:
317f7ab6
S
2247 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2248 return selector_function(ctx_copy)
67134eab 2249 return final_selector
083c9df9 2250
67134eab 2251 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2252 try:
232541df 2253 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2254 except tokenize.TokenError:
2255 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2256
2257 class TokenIterator(object):
2258 def __init__(self, tokens):
2259 self.tokens = tokens
2260 self.counter = 0
2261
2262 def __iter__(self):
2263 return self
2264
2265 def __next__(self):
2266 if self.counter >= len(self.tokens):
2267 raise StopIteration()
2268 value = self.tokens[self.counter]
2269 self.counter += 1
2270 return value
2271
2272 next = __next__
2273
2274 def restore_last_token(self):
2275 self.counter -= 1
2276
2277 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2278 return _build_selector_function(parsed_selector)
a9c58ad9 2279
e5660ee6 2280 def _calc_headers(self, info_dict):
8b7539d2 2281 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2282
2283 cookies = self._calc_cookies(info_dict)
2284 if cookies:
2285 res['Cookie'] = cookies
2286
0016b84e
S
2287 if 'X-Forwarded-For' not in res:
2288 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2289 if x_forwarded_for_ip:
2290 res['X-Forwarded-For'] = x_forwarded_for_ip
2291
e5660ee6
JMF
2292 return res
2293
2294 def _calc_cookies(self, info_dict):
5c2266df 2295 pr = sanitized_Request(info_dict['url'])
e5660ee6 2296 self.cookiejar.add_cookie_header(pr)
662435f7 2297 return pr.get_header('Cookie')
e5660ee6 2298
9f1a1c36 2299 def _sort_thumbnails(self, thumbnails):
2300 thumbnails.sort(key=lambda t: (
2301 t.get('preference') if t.get('preference') is not None else -1,
2302 t.get('width') if t.get('width') is not None else -1,
2303 t.get('height') if t.get('height') is not None else -1,
2304 t.get('id') if t.get('id') is not None else '',
2305 t.get('url')))
2306
b0249bca 2307 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2308 thumbnails = info_dict.get('thumbnails')
2309 if thumbnails is None:
2310 thumbnail = info_dict.get('thumbnail')
2311 if thumbnail:
2312 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2313 if not thumbnails:
2314 return
2315
2316 def check_thumbnails(thumbnails):
2317 for t in thumbnails:
2318 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2319 try:
2320 self.urlopen(HEADRequest(t['url']))
2321 except network_exceptions as err:
2322 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2323 continue
2324 yield t
2325
2326 self._sort_thumbnails(thumbnails)
2327 for i, t in enumerate(thumbnails):
2328 if t.get('id') is None:
2329 t['id'] = '%d' % i
2330 if t.get('width') and t.get('height'):
2331 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2332 t['url'] = sanitize_url(t['url'])
2333
2334 if self.params.get('check_formats') is True:
282f5709 2335 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2336 else:
2337 info_dict['thumbnails'] = thumbnails
bc516a3f 2338
03f83004
LNO
2339 def _fill_common_fields(self, info_dict, is_video=True):
2340 # TODO: move sanitization here
2341 if is_video:
2342 # playlists are allowed to lack "title"
2343 info_dict['fulltitle'] = info_dict.get('title')
2344 if 'title' not in info_dict:
2345 raise ExtractorError('Missing "title" field in extractor result',
2346 video_id=info_dict['id'], ie=info_dict['extractor'])
2347 elif not info_dict.get('title'):
2348 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2349 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2350
2351 if info_dict.get('duration') is not None:
2352 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2353
2354 for ts_key, date_key in (
2355 ('timestamp', 'upload_date'),
2356 ('release_timestamp', 'release_date'),
2357 ('modified_timestamp', 'modified_date'),
2358 ):
2359 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2360 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2361 # see http://bugs.python.org/issue1646728)
2362 try:
2363 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2364 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2365 except (ValueError, OverflowError, OSError):
2366 pass
2367
2368 live_keys = ('is_live', 'was_live')
2369 live_status = info_dict.get('live_status')
2370 if live_status is None:
2371 for key in live_keys:
2372 if info_dict.get(key) is False:
2373 continue
2374 if info_dict.get(key):
2375 live_status = key
2376 break
2377 if all(info_dict.get(key) is False for key in live_keys):
2378 live_status = 'not_live'
2379 if live_status:
2380 info_dict['live_status'] = live_status
2381 for key in live_keys:
2382 if info_dict.get(key) is None:
2383 info_dict[key] = (live_status == key)
2384
2385 # Auto generate title fields corresponding to the *_number fields when missing
2386 # in order to always have clean titles. This is very common for TV series.
2387 for field in ('chapter', 'season', 'episode'):
2388 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2389 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2390
dd82ffea
JMF
2391 def process_video_result(self, info_dict, download=True):
2392 assert info_dict.get('_type', 'video') == 'video'
9c906919 2393 self._num_videos += 1
dd82ffea 2394
bec1fad2 2395 if 'id' not in info_dict:
fc08bdd6 2396 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2397 elif not info_dict.get('id'):
2398 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2399
c9969434
S
2400 def report_force_conversion(field, field_not, conversion):
2401 self.report_warning(
2402 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2403 % (field, field_not, conversion))
2404
2405 def sanitize_string_field(info, string_field):
2406 field = info.get(string_field)
2407 if field is None or isinstance(field, compat_str):
2408 return
2409 report_force_conversion(string_field, 'a string', 'string')
2410 info[string_field] = compat_str(field)
2411
2412 def sanitize_numeric_fields(info):
2413 for numeric_field in self._NUMERIC_FIELDS:
2414 field = info.get(numeric_field)
2415 if field is None or isinstance(field, compat_numeric_types):
2416 continue
2417 report_force_conversion(numeric_field, 'numeric', 'int')
2418 info[numeric_field] = int_or_none(field)
2419
2420 sanitize_string_field(info_dict, 'id')
2421 sanitize_numeric_fields(info_dict)
4c3f8c3f 2422 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2423 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2424
dd82ffea
JMF
2425 if 'playlist' not in info_dict:
2426 # It isn't part of a playlist
2427 info_dict['playlist'] = None
2428 info_dict['playlist_index'] = None
2429
bc516a3f 2430 self._sanitize_thumbnails(info_dict)
d5519808 2431
536a55da 2432 thumbnail = info_dict.get('thumbnail')
bc516a3f 2433 thumbnails = info_dict.get('thumbnails')
536a55da
S
2434 if thumbnail:
2435 info_dict['thumbnail'] = sanitize_url(thumbnail)
2436 elif thumbnails:
d5519808
PH
2437 info_dict['thumbnail'] = thumbnails[-1]['url']
2438
ae30b840 2439 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2440 info_dict['display_id'] = info_dict['id']
2441
03f83004 2442 self._fill_common_fields(info_dict)
33d2fc2f 2443
05108a49
S
2444 for cc_kind in ('subtitles', 'automatic_captions'):
2445 cc = info_dict.get(cc_kind)
2446 if cc:
2447 for _, subtitle in cc.items():
2448 for subtitle_format in subtitle:
2449 if subtitle_format.get('url'):
2450 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2451 if subtitle_format.get('ext') is None:
2452 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2453
2454 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2455 subtitles = info_dict.get('subtitles')
4bba3716 2456
360e1ca5 2457 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2458 info_dict['id'], subtitles, automatic_captions)
a504ced0 2459
dd82ffea
JMF
2460 if info_dict.get('formats') is None:
2461 # There's only one format available
2462 formats = [info_dict]
2463 else:
2464 formats = info_dict['formats']
2465
e0493e90 2466 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2467 if not self.params.get('allow_unplayable_formats'):
2468 formats = [f for f in formats if not f.get('has_drm')]
c0b6e5c7 2469 if info_dict['__has_drm'] and all(
2470 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2471 self.report_warning(
2472 'This video is DRM protected and only images are available for download. '
2473 'Use --list-formats to see them')
88acdbc2 2474
319b6059 2475 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2476 if not get_from_start:
2477 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2478 if info_dict.get('is_live') and formats:
adbc4ec4 2479 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2480 if get_from_start and not formats:
a44ca5a4 2481 self.raise_no_formats(info_dict, msg=(
2482 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2483 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2484
db95dc13 2485 if not formats:
1151c407 2486 self.raise_no_formats(info_dict)
db95dc13 2487
73af5cc8
S
2488 def is_wellformed(f):
2489 url = f.get('url')
a5ac0c47 2490 if not url:
73af5cc8
S
2491 self.report_warning(
2492 '"url" field is missing or empty - skipping format, '
2493 'there is an error in extractor')
a5ac0c47
S
2494 return False
2495 if isinstance(url, bytes):
2496 sanitize_string_field(f, 'url')
2497 return True
73af5cc8
S
2498
2499 # Filter out malformed formats for better extraction robustness
2500 formats = list(filter(is_wellformed, formats))
2501
181c7053
S
2502 formats_dict = {}
2503
dd82ffea 2504 # We check that all the formats have the format and format_id fields
db95dc13 2505 for i, format in enumerate(formats):
c9969434
S
2506 sanitize_string_field(format, 'format_id')
2507 sanitize_numeric_fields(format)
dcf77cf1 2508 format['url'] = sanitize_url(format['url'])
e74e3b63 2509 if not format.get('format_id'):
8016c922 2510 format['format_id'] = compat_str(i)
e2effb08
S
2511 else:
2512 # Sanitize format_id from characters used in format selector expression
ec85ded8 2513 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2514 format_id = format['format_id']
2515 if format_id not in formats_dict:
2516 formats_dict[format_id] = []
2517 formats_dict[format_id].append(format)
2518
2519 # Make sure all formats have unique format_id
03b4de72 2520 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2521 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2522 ambigious_id = len(ambiguous_formats) > 1
2523 for i, format in enumerate(ambiguous_formats):
2524 if ambigious_id:
181c7053 2525 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2526 if format.get('ext') is None:
2527 format['ext'] = determine_ext(format['url']).lower()
2528 # Ensure there is no conflict between id and ext in format selection
2529 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2530 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2531 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2532
2533 for i, format in enumerate(formats):
8c51aa65 2534 if format.get('format') is None:
6febd1c1 2535 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2536 id=format['format_id'],
2537 res=self.format_resolution(format),
b868936c 2538 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2539 )
6f0be937 2540 if format.get('protocol') is None:
b5559424 2541 format['protocol'] = determine_protocol(format)
239df021 2542 if format.get('resolution') is None:
2543 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2544 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2545 format['dynamic_range'] = 'SDR'
f2fe69c7 2546 if (info_dict.get('duration') and format.get('tbr')
2547 and not format.get('filesize') and not format.get('filesize_approx')):
2548 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2549
e5660ee6
JMF
2550 # Add HTTP headers, so that external programs can use them from the
2551 # json output
2552 full_format_info = info_dict.copy()
2553 full_format_info.update(format)
2554 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2555 # Remove private housekeeping stuff
2556 if '__x_forwarded_for_ip' in info_dict:
2557 del info_dict['__x_forwarded_for_ip']
dd82ffea 2558
9f1a1c36 2559 if self.params.get('check_formats') is True:
282f5709 2560 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2561
88acdbc2 2562 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2563 # only set the 'formats' fields if the original info_dict list them
2564 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2565 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2566 # which can't be exported to json
b3d9ef88 2567 info_dict['formats'] = formats
4ec82a72 2568
2569 info_dict, _ = self.pre_process(info_dict)
2570
6db9c4d5 2571 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2572 return info_dict
2573
2574 self.post_extract(info_dict)
2575 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2576
093a1710 2577 # The pre-processors may have modified the formats
2578 formats = info_dict.get('formats', [info_dict])
2579
fa9f30b8 2580 list_only = self.params.get('simulate') is None and (
2581 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2582 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2583 if self.params.get('list_thumbnails'):
2584 self.list_thumbnails(info_dict)
b7b04c78 2585 if self.params.get('listsubtitles'):
2586 if 'automatic_captions' in info_dict:
2587 self.list_subtitles(
2588 info_dict['id'], automatic_captions, 'automatic captions')
2589 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2590 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2591 self.list_formats(info_dict)
169dbde9 2592 if list_only:
b7b04c78 2593 # Without this printing, -F --print-json will not work
169dbde9 2594 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2595 return
2596
187986a8 2597 format_selector = self.format_selector
2598 if format_selector is None:
0017d9ad 2599 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2600 self.write_debug('Default format spec: %s' % req_format)
187986a8 2601 format_selector = self.build_format_selector(req_format)
317f7ab6 2602
fa9f30b8 2603 while True:
2604 if interactive_format_selection:
2605 req_format = input(
2606 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2607 try:
2608 format_selector = self.build_format_selector(req_format)
2609 except SyntaxError as err:
2610 self.report_error(err, tb=False, is_error=False)
2611 continue
2612
85e801a9 2613 formats_to_download = list(format_selector({
fa9f30b8 2614 'formats': formats,
85e801a9 2615 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2616 'incomplete_formats': (
2617 # All formats are video-only or
2618 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2619 # all formats are audio-only
2620 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2621 }))
fa9f30b8 2622 if interactive_format_selection and not formats_to_download:
2623 self.report_error('Requested format is not available', tb=False, is_error=False)
2624 continue
2625 break
317f7ab6 2626
dd82ffea 2627 if not formats_to_download:
b7da73eb 2628 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2629 raise ExtractorError(
2630 'Requested format is not available. Use --list-formats for a list of available formats',
2631 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2632 self.report_warning('Requested format is not available')
2633 # Process what we can, even without any available formats.
2634 formats_to_download = [{}]
a13e6848 2635
b62fa6d7 2636 best_format = formats_to_download[-1]
2637 if download:
2638 if best_format:
2639 self.to_screen(
2640 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2641 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2642 max_downloads_reached = False
f46e2f9d 2643 for i, fmt in enumerate(formats_to_download):
09b49e1f 2644 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2645 new_info.update(fmt)
a13e6848 2646 try:
2647 self.process_info(new_info)
2648 except MaxDownloadsReached:
2649 max_downloads_reached = True
f46e2f9d 2650 # Remove copied info
2651 for key, val in tuple(new_info.items()):
2652 if info_dict.get(key) == val:
2653 new_info.pop(key)
a13e6848 2654 if max_downloads_reached:
2655 break
ebed8b37 2656
9e907ebd 2657 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
a13e6848 2658 assert write_archive.issubset({True, False, 'ignore'})
2659 if True in write_archive and False not in write_archive:
2660 self.record_download_archive(info_dict)
be72c624 2661
2662 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2663 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2664 if max_downloads_reached:
2665 raise MaxDownloadsReached()
ebed8b37 2666
49a57e70 2667 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2668 info_dict.update(best_format)
dd82ffea
JMF
2669 return info_dict
2670
98c70d6f 2671 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2672 """Select the requested subtitles and their format"""
d8a58ddc 2673 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2674 if normal_subtitles and self.params.get('writesubtitles'):
2675 available_subs.update(normal_subtitles)
d8a58ddc 2676 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2677 if automatic_captions and self.params.get('writeautomaticsub'):
2678 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2679 if lang not in available_subs:
2680 available_subs[lang] = cap_info
2681
4d171848
JMF
2682 if (not self.params.get('writesubtitles') and not
2683 self.params.get('writeautomaticsub') or not
2684 available_subs):
2685 return None
a504ced0 2686
d8a58ddc 2687 all_sub_langs = tuple(available_subs.keys())
a504ced0 2688 if self.params.get('allsubtitles', False):
c32b0aab 2689 requested_langs = all_sub_langs
2690 elif self.params.get('subtitleslangs', False):
77c4a9ef 2691 # A list is used so that the order of languages will be the same as
2692 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2693 requested_langs = []
2694 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2695 discard = lang_re[0] == '-'
c32b0aab 2696 if discard:
77c4a9ef 2697 lang_re = lang_re[1:]
3aa91540 2698 if lang_re == 'all':
2699 if discard:
2700 requested_langs = []
2701 else:
2702 requested_langs.extend(all_sub_langs)
2703 continue
77c4a9ef 2704 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2705 if discard:
2706 for lang in current_langs:
77c4a9ef 2707 while lang in requested_langs:
2708 requested_langs.remove(lang)
c32b0aab 2709 else:
77c4a9ef 2710 requested_langs.extend(current_langs)
2711 requested_langs = orderedSet(requested_langs)
d8a58ddc 2712 elif normal_sub_langs:
2713 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2714 else:
d8a58ddc 2715 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2716 if requested_langs:
2717 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2718
2719 formats_query = self.params.get('subtitlesformat', 'best')
2720 formats_preference = formats_query.split('/') if formats_query else []
2721 subs = {}
2722 for lang in requested_langs:
2723 formats = available_subs.get(lang)
2724 if formats is None:
2725 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2726 continue
a504ced0
JMF
2727 for ext in formats_preference:
2728 if ext == 'best':
2729 f = formats[-1]
2730 break
2731 matches = list(filter(lambda f: f['ext'] == ext, formats))
2732 if matches:
2733 f = matches[-1]
2734 break
2735 else:
2736 f = formats[-1]
2737 self.report_warning(
2738 'No subtitle format found matching "%s" for language %s, '
2739 'using %s' % (formats_query, lang, f['ext']))
2740 subs[lang] = f
2741 return subs
2742
bb66c247 2743 def _forceprint(self, key, info_dict):
2744 if info_dict is None:
2745 return
2746 info_copy = info_dict.copy()
2747 info_copy['formats_table'] = self.render_formats_table(info_dict)
2748 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2749 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2750 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2751
2752 def format_tmpl(tmpl):
2753 mobj = re.match(r'\w+(=?)$', tmpl)
2754 if mobj and mobj.group(1):
2755 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2756 elif mobj:
2757 return f'%({tmpl})s'
2758 return tmpl
8130779d 2759
bb66c247 2760 for tmpl in self.params['forceprint'].get(key, []):
2761 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2762
2763 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2764 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2765 tmpl = format_tmpl(tmpl)
2766 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2767 if self._ensure_dir_exists(filename):
2768 with io.open(filename, 'a', encoding='utf-8') as f:
2769 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2770
d06daf23 2771 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2772 def print_mandatory(field, actual_field=None):
2773 if actual_field is None:
2774 actual_field = field
d06daf23 2775 if (self.params.get('force%s' % field, False)
53c18592 2776 and (not incomplete or info_dict.get(actual_field) is not None)):
2777 self.to_stdout(info_dict[actual_field])
d06daf23
S
2778
2779 def print_optional(field):
2780 if (self.params.get('force%s' % field, False)
2781 and info_dict.get(field) is not None):
2782 self.to_stdout(info_dict[field])
2783
53c18592 2784 info_dict = info_dict.copy()
2785 if filename is not None:
2786 info_dict['filename'] = filename
2787 if info_dict.get('requested_formats') is not None:
2788 # For RTMP URLs, also include the playpath
2789 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2790 elif info_dict.get('url'):
53c18592 2791 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2792
bb66c247 2793 if (self.params.get('forcejson')
2794 or self.params['forceprint'].get('video')
2795 or self.params['print_to_file'].get('video')):
2b8a2973 2796 self.post_extract(info_dict)
bb66c247 2797 self._forceprint('video', info_dict)
53c18592 2798
d06daf23
S
2799 print_mandatory('title')
2800 print_mandatory('id')
53c18592 2801 print_mandatory('url', 'urls')
d06daf23
S
2802 print_optional('thumbnail')
2803 print_optional('description')
53c18592 2804 print_optional('filename')
b868936c 2805 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2806 self.to_stdout(formatSeconds(info_dict['duration']))
2807 print_mandatory('format')
53c18592 2808
2b8a2973 2809 if self.params.get('forcejson'):
6e84b215 2810 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2811
e8e73840 2812 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2813 if not info.get('url'):
1151c407 2814 self.raise_no_formats(info, True)
e8e73840 2815
2816 if test:
2817 verbose = self.params.get('verbose')
2818 params = {
2819 'test': True,
a169858f 2820 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2821 'verbose': verbose,
2822 'noprogress': not verbose,
2823 'nopart': True,
2824 'skip_unavailable_fragments': False,
2825 'keep_fragments': False,
2826 'overwrites': True,
2827 '_no_ytdl_file': True,
2828 }
2829 else:
2830 params = self.params
96fccc10 2831 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2832 if not test:
2833 for ph in self._progress_hooks:
2834 fd.add_progress_hook(ph)
42676437
M
2835 urls = '", "'.join(
2836 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2837 for f in info.get('requested_formats', []) or [info])
18e674b4 2838 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2839
adbc4ec4
THD
2840 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2841 # But it may contain objects that are not deep-copyable
2842 new_info = self._copy_infodict(info)
e8e73840 2843 if new_info.get('http_headers') is None:
2844 new_info['http_headers'] = self._calc_headers(new_info)
2845 return fd.download(name, new_info, subtitle)
2846
e04938ab 2847 def existing_file(self, filepaths, *, default_overwrite=True):
2848 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2849 if existing_files and not self.params.get('overwrites', default_overwrite):
2850 return existing_files[0]
2851
2852 for file in existing_files:
2853 self.report_file_delete(file)
2854 os.remove(file)
2855 return None
2856
8222d8de 2857 def process_info(self, info_dict):
09b49e1f 2858 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2859
2860 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2861 original_infodict = info_dict
fd288278 2862
4513a41a 2863 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2864 info_dict['format'] = info_dict['ext']
2865
09b49e1f 2866 # This is mostly just for backward compatibility of process_info
2867 # As a side-effect, this allows for format-specific filters
c77495e3 2868 if self._match_entry(info_dict) is not None:
9e907ebd 2869 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2870 return
2871
09b49e1f 2872 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2873 self.post_extract(info_dict)
0c14d66a 2874 self._num_downloads += 1
8222d8de 2875
dcf64d43 2876 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2877 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2878 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2879 files_to_move = {}
8222d8de
JMF
2880
2881 # Forced printings
4513a41a 2882 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2883
b7b04c78 2884 if self.params.get('simulate'):
9e907ebd 2885 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2886 return
2887
de6000d9 2888 if full_filename is None:
8222d8de 2889 return
e92caff5 2890 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2891 return
e92caff5 2892 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2893 return
2894
80c03fa9 2895 if self._write_description('video', info_dict,
2896 self.prepare_filename(info_dict, 'description')) is None:
2897 return
2898
2899 sub_files = self._write_subtitles(info_dict, temp_filename)
2900 if sub_files is None:
2901 return
2902 files_to_move.update(dict(sub_files))
2903
2904 thumb_files = self._write_thumbnails(
2905 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2906 if thumb_files is None:
2907 return
2908 files_to_move.update(dict(thumb_files))
8222d8de 2909
80c03fa9 2910 infofn = self.prepare_filename(info_dict, 'infojson')
2911 _infojson_written = self._write_info_json('video', info_dict, infofn)
2912 if _infojson_written:
dac5df5a 2913 info_dict['infojson_filename'] = infofn
e75bb0d6 2914 # For backward compatibility, even though it was a private field
80c03fa9 2915 info_dict['__infojson_filename'] = infofn
2916 elif _infojson_written is None:
2917 return
2918
2919 # Note: Annotations are deprecated
2920 annofn = None
1fb07d10 2921 if self.params.get('writeannotations', False):
de6000d9 2922 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2923 if annofn:
e92caff5 2924 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2925 return
0c3d0f51 2926 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2927 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2928 elif not info_dict.get('annotations'):
2929 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2930 else:
2931 try:
6febd1c1 2932 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2933 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2934 annofile.write(info_dict['annotations'])
2935 except (KeyError, TypeError):
6febd1c1 2936 self.report_warning('There are no annotations to write.')
7b6fefc9 2937 except (OSError, IOError):
6febd1c1 2938 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2939 return
1fb07d10 2940
732044af 2941 # Write internet shortcut files
08438d2c 2942 def _write_link_file(link_type):
60f3e995 2943 url = try_get(info_dict['webpage_url'], iri_to_uri)
2944 if not url:
2945 self.report_warning(
2946 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2947 return True
08438d2c 2948 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2949 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2950 return False
10e3742e 2951 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2952 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2953 return True
2954 try:
2955 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2956 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2957 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2958 template_vars = {'url': url}
08438d2c 2959 if link_type == 'desktop':
2960 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2961 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2962 except (OSError, IOError):
2963 self.report_error(f'Cannot write internet shortcut {linkfn}')
2964 return False
732044af 2965 return True
2966
08438d2c 2967 write_links = {
2968 'url': self.params.get('writeurllink'),
2969 'webloc': self.params.get('writewebloclink'),
2970 'desktop': self.params.get('writedesktoplink'),
2971 }
2972 if self.params.get('writelink'):
2973 link_type = ('webloc' if sys.platform == 'darwin'
2974 else 'desktop' if sys.platform.startswith('linux')
2975 else 'url')
2976 write_links[link_type] = True
2977
2978 if any(should_write and not _write_link_file(link_type)
2979 for link_type, should_write in write_links.items()):
2980 return
732044af 2981
f46e2f9d 2982 def replace_info_dict(new_info):
2983 nonlocal info_dict
2984 if new_info == info_dict:
2985 return
2986 info_dict.clear()
2987 info_dict.update(new_info)
2988
56d868db 2989 try:
f46e2f9d 2990 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2991 replace_info_dict(new_info)
56d868db 2992 except PostProcessingError as err:
2993 self.report_error('Preprocessing: %s' % str(err))
2994 return
2995
a13e6848 2996 if self.params.get('skip_download'):
56d868db 2997 info_dict['filepath'] = temp_filename
2998 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2999 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3000 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3001 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3002 else:
3003 # Download
b868936c 3004 info_dict.setdefault('__postprocessors', [])
4340deca 3005 try:
0202b52a 3006
e04938ab 3007 def existing_video_file(*filepaths):
6b591b29 3008 ext = info_dict.get('ext')
e04938ab 3009 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3010 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3011 default_overwrite=False)
3012 if file:
3013 info_dict['ext'] = os.path.splitext(file)[1][1:]
3014 return file
0202b52a 3015
3016 success = True
4340deca 3017 if info_dict.get('requested_formats') is not None:
81cd954a
S
3018
3019 def compatible_formats(formats):
d03cfdce 3020 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3021 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3022 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3023 if len(video_formats) > 2 or len(audio_formats) > 2:
3024 return False
3025
81cd954a 3026 # Check extension
d03cfdce 3027 exts = set(format.get('ext') for format in formats)
3028 COMPATIBLE_EXTS = (
3029 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3030 set(('webm',)),
3031 )
3032 for ext_sets in COMPATIBLE_EXTS:
3033 if ext_sets.issuperset(exts):
3034 return True
81cd954a
S
3035 # TODO: Check acodec/vcodec
3036 return False
3037
3038 requested_formats = info_dict['requested_formats']
0202b52a 3039 old_ext = info_dict['ext']
4e3b637d 3040 if self.params.get('merge_output_format') is None:
3041 if not compatible_formats(requested_formats):
3042 info_dict['ext'] = 'mkv'
3043 self.report_warning(
3044 'Requested formats are incompatible for merge and will be merged into mkv')
3045 if (info_dict['ext'] == 'webm'
3046 and info_dict.get('thumbnails')
3047 # check with type instead of pp_key, __name__, or isinstance
3048 # since we dont want any custom PPs to trigger this
3049 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3050 info_dict['ext'] = 'mkv'
3051 self.report_warning(
3052 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3053 new_ext = info_dict['ext']
0202b52a 3054
124bc071 3055 def correct_ext(filename, ext=new_ext):
96fccc10 3056 if filename == '-':
3057 return filename
0202b52a 3058 filename_real_ext = os.path.splitext(filename)[1][1:]
3059 filename_wo_ext = (
3060 os.path.splitext(filename)[0]
124bc071 3061 if filename_real_ext in (old_ext, new_ext)
0202b52a 3062 else filename)
124bc071 3063 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 3064
38c6902b 3065 # Ensure filename always has a correct extension for successful merge
0202b52a 3066 full_filename = correct_ext(full_filename)
3067 temp_filename = correct_ext(temp_filename)
e04938ab 3068 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3069 info_dict['__real_download'] = False
18e674b4 3070
adbc4ec4
THD
3071 downloaded = []
3072 merger = FFmpegMergerPP(self)
3073
3074 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3075 if dl_filename is not None:
6c7274ec 3076 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3077 elif fd:
3078 for f in requested_formats if fd != FFmpegFD else []:
3079 f['filepath'] = fname = prepend_extension(
3080 correct_ext(temp_filename, info_dict['ext']),
3081 'f%s' % f['format_id'], info_dict['ext'])
3082 downloaded.append(fname)
dbf5416a 3083 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3084 success, real_download = self.dl(temp_filename, info_dict)
3085 info_dict['__real_download'] = real_download
18e674b4 3086 else:
18e674b4 3087 if self.params.get('allow_unplayable_formats'):
3088 self.report_warning(
3089 'You have requested merging of multiple formats '
3090 'while also allowing unplayable formats to be downloaded. '
3091 'The formats won\'t be merged to prevent data corruption.')
3092 elif not merger.available:
e8969bda 3093 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3094 if not self.params.get('ignoreerrors'):
3095 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3096 return
3097 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3098
96fccc10 3099 if temp_filename == '-':
adbc4ec4 3100 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3101 else 'but the formats are incompatible for simultaneous download' if merger.available
3102 else 'but ffmpeg is not installed')
3103 self.report_warning(
3104 f'You have requested downloading multiple formats to stdout {reason}. '
3105 'The formats will be streamed one after the other')
3106 fname = temp_filename
dbf5416a 3107 for f in requested_formats:
3108 new_info = dict(info_dict)
3109 del new_info['requested_formats']
3110 new_info.update(f)
96fccc10 3111 if temp_filename != '-':
124bc071 3112 fname = prepend_extension(
3113 correct_ext(temp_filename, new_info['ext']),
3114 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3115 if not self._ensure_dir_exists(fname):
3116 return
a21e0ab1 3117 f['filepath'] = fname
96fccc10 3118 downloaded.append(fname)
dbf5416a 3119 partial_success, real_download = self.dl(fname, new_info)
3120 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3121 success = success and partial_success
adbc4ec4
THD
3122
3123 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3124 info_dict['__postprocessors'].append(merger)
3125 info_dict['__files_to_merge'] = downloaded
3126 # Even if there were no downloads, it is being merged only now
3127 info_dict['__real_download'] = True
3128 else:
3129 for file in downloaded:
3130 files_to_move[file] = None
4340deca
P
3131 else:
3132 # Just a single file
e04938ab 3133 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3134 if dl_filename is None or dl_filename == temp_filename:
3135 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3136 # So we should try to resume the download
e8e73840 3137 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3138 info_dict['__real_download'] = real_download
6c7274ec 3139 else:
3140 self.report_file_already_downloaded(dl_filename)
0202b52a 3141
0202b52a 3142 dl_filename = dl_filename or temp_filename
c571435f 3143 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3144
3158150c 3145 except network_exceptions as err:
7960b056 3146 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
3147 return
3148 except (OSError, IOError) as err:
3149 raise UnavailableVideoError(err)
3150 except (ContentTooShortError, ) as err:
3151 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3152 return
8222d8de 3153
de6000d9 3154 if success and full_filename != '-':
f17f8651 3155
fd7cfb64 3156 def fixup():
3157 do_fixup = True
3158 fixup_policy = self.params.get('fixup')
3159 vid = info_dict['id']
3160
3161 if fixup_policy in ('ignore', 'never'):
3162 return
3163 elif fixup_policy == 'warn':
3164 do_fixup = False
f89b3e2d 3165 elif fixup_policy != 'force':
3166 assert fixup_policy in ('detect_or_warn', None)
3167 if not info_dict.get('__real_download'):
3168 do_fixup = False
fd7cfb64 3169
3170 def ffmpeg_fixup(cndn, msg, cls):
3171 if not cndn:
3172 return
3173 if not do_fixup:
3174 self.report_warning(f'{vid}: {msg}')
3175 return
3176 pp = cls(self)
3177 if pp.available:
3178 info_dict['__postprocessors'].append(pp)
3179 else:
3180 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3181
3182 stretched_ratio = info_dict.get('stretched_ratio')
3183 ffmpeg_fixup(
3184 stretched_ratio not in (1, None),
3185 f'Non-uniform pixel ratio {stretched_ratio}',
3186 FFmpegFixupStretchedPP)
3187
3188 ffmpeg_fixup(
3189 (info_dict.get('requested_formats') is None
3190 and info_dict.get('container') == 'm4a_dash'
3191 and info_dict.get('ext') == 'm4a'),
3192 'writing DASH m4a. Only some players support this container',
3193 FFmpegFixupM4aPP)
3194
993191c0 3195 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3196 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3197
3198 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3199 ffmpeg_fixup(downloader == 'HlsFD',
3200 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3201 FFmpegFixupM3u8PP)
3202 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3203 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3204
e04b003e 3205 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3206 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3207
3208 fixup()
8222d8de 3209 try:
f46e2f9d 3210 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3211 except PostProcessingError as err:
3212 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3213 return
ab8e5e51
AM
3214 try:
3215 for ph in self._post_hooks:
23c1a667 3216 ph(info_dict['filepath'])
ab8e5e51
AM
3217 except Exception as err:
3218 self.report_error('post hooks: %s' % str(err))
3219 return
9e907ebd 3220 info_dict['__write_download_archive'] = True
2d30509f 3221
a13e6848 3222 if self.params.get('force_write_download_archive'):
9e907ebd 3223 info_dict['__write_download_archive'] = True
a13e6848 3224
3225 # Make sure the info_dict was modified in-place
f46e2f9d 3226 assert info_dict is original_infodict
a13e6848 3227
c3e6ffba 3228 max_downloads = self.params.get('max_downloads')
3229 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3230 raise MaxDownloadsReached()
8222d8de 3231
aa9369a2 3232 def __download_wrapper(self, func):
3233 @functools.wraps(func)
3234 def wrapper(*args, **kwargs):
3235 try:
3236 res = func(*args, **kwargs)
3237 except UnavailableVideoError as e:
3238 self.report_error(e)
b222c271 3239 except MaxDownloadsReached as e:
aa9369a2 3240 self.to_screen(f'[info] {e}')
3241 raise
b222c271 3242 except DownloadCancelled as e:
3243 self.to_screen(f'[info] {e}')
3244 if not self.params.get('break_per_url'):
3245 raise
aa9369a2 3246 else:
3247 if self.params.get('dump_single_json', False):
3248 self.post_extract(res)
3249 self.to_stdout(json.dumps(self.sanitize_info(res)))
3250 return wrapper
3251
8222d8de
JMF
3252 def download(self, url_list):
3253 """Download a given list of URLs."""
aa9369a2 3254 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3255 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3256 if (len(url_list) > 1
3257 and outtmpl != '-'
3258 and '%' not in outtmpl
3259 and self.params.get('max_downloads') != 1):
acd69589 3260 raise SameFileError(outtmpl)
8222d8de
JMF
3261
3262 for url in url_list:
aa9369a2 3263 self.__download_wrapper(self.extract_info)(
3264 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3265
3266 return self._download_retcode
3267
1dcc4c0c 3268 def download_with_info_file(self, info_filename):
31bd3925
JMF
3269 with contextlib.closing(fileinput.FileInput(
3270 [info_filename], mode='r',
3271 openhook=fileinput.hook_encoded('utf-8'))) as f:
3272 # FileInput doesn't have a read method, we can't call json.load
8012d892 3273 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3274 try:
aa9369a2 3275 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3276 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3277 if not isinstance(e, EntryNotInPlaylist):
3278 self.to_stderr('\r')
d4943898
JMF
3279 webpage_url = info.get('webpage_url')
3280 if webpage_url is not None:
aa9369a2 3281 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3282 return self.download([webpage_url])
3283 else:
3284 raise
3285 return self._download_retcode
1dcc4c0c 3286
cb202fd2 3287 @staticmethod
8012d892 3288 def sanitize_info(info_dict, remove_private_keys=False):
3289 ''' Sanitize the infodict for converting to json '''
3ad56b42 3290 if info_dict is None:
3291 return info_dict
6e84b215 3292 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3293 info_dict.setdefault('_type', 'video')
09b49e1f 3294
8012d892 3295 if remove_private_keys:
09b49e1f 3296 reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
f46e2f9d 3297 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3298 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3299 }
ae8f99e6 3300 else:
09b49e1f 3301 reject = lambda k, v: False
adbc4ec4
THD
3302
3303 def filter_fn(obj):
3304 if isinstance(obj, dict):
3305 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3306 elif isinstance(obj, (list, tuple, set, LazyList)):
3307 return list(map(filter_fn, obj))
3308 elif obj is None or isinstance(obj, (str, int, float, bool)):
3309 return obj
3310 else:
3311 return repr(obj)
3312
5226731e 3313 return filter_fn(info_dict)
cb202fd2 3314
8012d892 3315 @staticmethod
3316 def filter_requested_info(info_dict, actually_filter=True):
3317 ''' Alias of sanitize_info for backward compatibility '''
3318 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3319
ed5835b4 3320 @staticmethod
3321 def post_extract(info_dict):
3322 def actual_post_extract(info_dict):
3323 if info_dict.get('_type') in ('playlist', 'multi_video'):
3324 for video_dict in info_dict.get('entries', {}):
3325 actual_post_extract(video_dict or {})
3326 return
3327
09b49e1f 3328 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3329 info_dict.update(post_extractor())
ed5835b4 3330
3331 actual_post_extract(info_dict or {})
3332
dcf64d43 3333 def run_pp(self, pp, infodict):
5bfa4862 3334 files_to_delete = []
dcf64d43 3335 if '__files_to_move' not in infodict:
3336 infodict['__files_to_move'] = {}
b1940459 3337 try:
3338 files_to_delete, infodict = pp.run(infodict)
3339 except PostProcessingError as e:
3340 # Must be True and not 'only_download'
3341 if self.params.get('ignoreerrors') is True:
3342 self.report_error(e)
3343 return infodict
3344 raise
3345
5bfa4862 3346 if not files_to_delete:
dcf64d43 3347 return infodict
5bfa4862 3348 if self.params.get('keepvideo', False):
3349 for f in files_to_delete:
dcf64d43 3350 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3351 else:
3352 for old_filename in set(files_to_delete):
3353 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3354 try:
3355 os.remove(encodeFilename(old_filename))
3356 except (IOError, OSError):
3357 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3358 if old_filename in infodict['__files_to_move']:
3359 del infodict['__files_to_move'][old_filename]
3360 return infodict
5bfa4862 3361
ed5835b4 3362 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3363 self._forceprint(key, info)
ed5835b4 3364 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3365 info = self.run_pp(pp, info)
ed5835b4 3366 return info
277d6ff5 3367
56d868db 3368 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3369 info = dict(ie_info)
56d868db 3370 info['__files_to_move'] = files_to_move or {}
ed5835b4 3371 info = self.run_all_pps(key, info)
56d868db 3372 return info, info.pop('__files_to_move', None)
5bfa4862 3373
f46e2f9d 3374 def post_process(self, filename, info, files_to_move=None):
8222d8de 3375 """Run all the postprocessors on the given file."""
8222d8de 3376 info['filepath'] = filename
dcf64d43 3377 info['__files_to_move'] = files_to_move or {}
ed5835b4 3378 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3379 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3380 del info['__files_to_move']
ed5835b4 3381 return self.run_all_pps('after_move', info)
c1c9a79c 3382
5db07df6 3383 def _make_archive_id(self, info_dict):
e9fef7ee
S
3384 video_id = info_dict.get('id')
3385 if not video_id:
3386 return
5db07df6
PH
3387 # Future-proof against any change in case
3388 # and backwards compatibility with prior versions
e9fef7ee 3389 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3390 if extractor is None:
1211bb6d
S
3391 url = str_or_none(info_dict.get('url'))
3392 if not url:
3393 return
e9fef7ee 3394 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3395 for ie_key, ie in self._ies.items():
1211bb6d 3396 if ie.suitable(url):
8b7491c8 3397 extractor = ie_key
e9fef7ee
S
3398 break
3399 else:
3400 return
d0757229 3401 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3402
3403 def in_download_archive(self, info_dict):
3404 fn = self.params.get('download_archive')
3405 if fn is None:
3406 return False
3407
3408 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3409 if not vid_id:
7012b23c 3410 return False # Incomplete video information
5db07df6 3411
a45e8619 3412 return vid_id in self.archive
c1c9a79c
PH
3413
3414 def record_download_archive(self, info_dict):
3415 fn = self.params.get('download_archive')
3416 if fn is None:
3417 return
5db07df6
PH
3418 vid_id = self._make_archive_id(info_dict)
3419 assert vid_id
a13e6848 3420 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3421 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3422 archive_file.write(vid_id + '\n')
a45e8619 3423 self.archive.add(vid_id)
dd82ffea 3424
8c51aa65 3425 @staticmethod
8abeeb94 3426 def format_resolution(format, default='unknown'):
9359f3d4 3427 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3428 return 'audio only'
f49d89ee
PH
3429 if format.get('resolution') is not None:
3430 return format['resolution']
35615307 3431 if format.get('width') and format.get('height'):
ff51ed58 3432 return '%dx%d' % (format['width'], format['height'])
35615307 3433 elif format.get('height'):
ff51ed58 3434 return '%sp' % format['height']
35615307 3435 elif format.get('width'):
ff51ed58 3436 return '%dx?' % format['width']
3437 return default
8c51aa65 3438
8130779d 3439 def _list_format_headers(self, *headers):
3440 if self.params.get('listformats_table', True) is not False:
3441 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3442 return headers
3443
c57f7757
PH
3444 def _format_note(self, fdict):
3445 res = ''
3446 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3447 res += '(unsupported)'
32f90364
PH
3448 if fdict.get('language'):
3449 if res:
3450 res += ' '
f304da8a 3451 res += '[%s]' % fdict['language']
c57f7757 3452 if fdict.get('format_note') is not None:
f304da8a 3453 if res:
3454 res += ' '
3455 res += fdict['format_note']
c57f7757 3456 if fdict.get('tbr') is not None:
f304da8a 3457 if res:
3458 res += ', '
3459 res += '%4dk' % fdict['tbr']
c57f7757
PH
3460 if fdict.get('container') is not None:
3461 if res:
3462 res += ', '
3463 res += '%s container' % fdict['container']
3089bc74
S
3464 if (fdict.get('vcodec') is not None
3465 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3466 if res:
3467 res += ', '
3468 res += fdict['vcodec']
91c7271a 3469 if fdict.get('vbr') is not None:
c57f7757
PH
3470 res += '@'
3471 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3472 res += 'video@'
3473 if fdict.get('vbr') is not None:
3474 res += '%4dk' % fdict['vbr']
fbb21cf5 3475 if fdict.get('fps') is not None:
5d583bdf
S
3476 if res:
3477 res += ', '
3478 res += '%sfps' % fdict['fps']
c57f7757
PH
3479 if fdict.get('acodec') is not None:
3480 if res:
3481 res += ', '
3482 if fdict['acodec'] == 'none':
3483 res += 'video only'
3484 else:
3485 res += '%-5s' % fdict['acodec']
3486 elif fdict.get('abr') is not None:
3487 if res:
3488 res += ', '
3489 res += 'audio'
3490 if fdict.get('abr') is not None:
3491 res += '@%3dk' % fdict['abr']
3492 if fdict.get('asr') is not None:
3493 res += ' (%5dHz)' % fdict['asr']
3494 if fdict.get('filesize') is not None:
3495 if res:
3496 res += ', '
3497 res += format_bytes(fdict['filesize'])
9732d77e
PH
3498 elif fdict.get('filesize_approx') is not None:
3499 if res:
3500 res += ', '
3501 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3502 return res
91c7271a 3503
8130779d 3504 def render_formats_table(self, info_dict):
b69fd25c 3505 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3506 return None
b69fd25c 3507
94badb25 3508 formats = info_dict.get('formats', [info_dict])
8130779d 3509 if not self.params.get('listformats_table', True) is not False:
76d321f6 3510 table = [
3511 [
3512 format_field(f, 'format_id'),
3513 format_field(f, 'ext'),
3514 self.format_resolution(f),
8130779d 3515 self._format_note(f)
3516 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3517 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3518
3519 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3520 table = [
3521 [
3522 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3523 format_field(f, 'ext'),
3524 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3525 format_field(f, 'fps', '\t%d'),
3526 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3527 delim,
3528 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3529 format_field(f, 'tbr', '\t%dk'),
3530 shorten_protocol_name(f.get('protocol', '')),
3531 delim,
3532 format_field(f, 'vcodec', default='unknown').replace(
3533 'none', 'images' if f.get('acodec') == 'none'
3534 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3535 format_field(f, 'vbr', '\t%dk'),
3536 format_field(f, 'acodec', default='unknown').replace(
3537 'none', '' if f.get('vcodec') == 'none'
3538 else self._format_screen('video only', self.Styles.SUPPRESS)),
3539 format_field(f, 'abr', '\t%dk'),
3540 format_field(f, 'asr', '\t%dHz'),
3541 join_nonempty(
3542 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3543 format_field(f, 'language', '[%s]'),
3544 join_nonempty(format_field(f, 'format_note'),
3545 format_field(f, 'container', ignore=(None, f.get('ext'))),
3546 delim=', '),
3547 delim=' '),
3548 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3549 header_line = self._list_format_headers(
3550 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3551 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3552
3553 return render_table(
3554 header_line, table, hide_empty=True,
3555 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3556
3557 def render_thumbnails_table(self, info_dict):
88f23a18 3558 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3559 if not thumbnails:
8130779d 3560 return None
3561 return render_table(
ec11a9f4 3562 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3563 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3564
8130779d 3565 def render_subtitles_table(self, video_id, subtitles):
2412044c 3566 def _row(lang, formats):
49c258e1 3567 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3568 if len(set(names)) == 1:
7aee40c1 3569 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3570 return [lang, ', '.join(names), ', '.join(exts)]
3571
8130779d 3572 if not subtitles:
3573 return None
3574 return render_table(
ec11a9f4 3575 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3576 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3577 hide_empty=True)
3578
3579 def __list_table(self, video_id, name, func, *args):
3580 table = func(*args)
3581 if not table:
3582 self.to_screen(f'{video_id} has no {name}')
3583 return
3584 self.to_screen(f'[info] Available {name} for {video_id}:')
3585 self.to_stdout(table)
3586
3587 def list_formats(self, info_dict):
3588 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3589
3590 def list_thumbnails(self, info_dict):
3591 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3592
3593 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3594 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3595
dca08720
PH
3596 def urlopen(self, req):
3597 """ Start an HTTP download """
82d8a8b6 3598 if isinstance(req, compat_basestring):
67dda517 3599 req = sanitized_Request(req)
19a41fc6 3600 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3601
3602 def print_debug_header(self):
3603 if not self.params.get('verbose'):
3604 return
49a57e70 3605
3606 def get_encoding(stream):
2a938746 3607 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3608 if not supports_terminal_sequences(stream):
e3c7d495 3609 from .compat import WINDOWS_VT_MODE
3610 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3611 return ret
3612
3613 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3614 locale.getpreferredencoding(),
3615 sys.getfilesystemencoding(),
cf4f42cb 3616 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3617 self.get_encoding())
883d4b1e 3618
3619 logger = self.params.get('logger')
3620 if logger:
3621 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3622 write_debug(encoding_str)
3623 else:
96565c7e 3624 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3625 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3626
4c88ff87 3627 source = detect_variant()
36eaf303 3628 write_debug(join_nonempty(
3629 'yt-dlp version', __version__,
3630 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3631 '' if source == 'unknown' else f'({source})',
3632 delim=' '))
6e21fdd2 3633 if not _LAZY_LOADER:
3634 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3635 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3636 else:
49a57e70 3637 write_debug('Lazy loading extractors is disabled')
3ae5e797 3638 if plugin_extractors or plugin_postprocessors:
49a57e70 3639 write_debug('Plugins: %s' % [
3ae5e797 3640 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3641 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3642 if self.params.get('compat_opts'):
49a57e70 3643 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3644
3645 if source == 'source':
dca08720 3646 try:
36eaf303 3647 sp = Popen(
3648 ['git', 'rev-parse', '--short', 'HEAD'],
3649 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3650 cwd=os.path.dirname(os.path.abspath(__file__)))
3651 out, err = sp.communicate_or_kill()
3652 out = out.decode().strip()
3653 if re.match('[0-9a-f]+', out):
3654 write_debug('Git HEAD: %s' % out)
70a1165b 3655 except Exception:
36eaf303 3656 try:
3657 sys.exc_clear()
3658 except Exception:
3659 pass
b300cda4
S
3660
3661 def python_implementation():
3662 impl_name = platform.python_implementation()
3663 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3664 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3665 return impl_name
3666
49a57e70 3667 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3668 platform.python_version(),
3669 python_implementation(),
3670 platform.architecture()[0],
b300cda4 3671 platform_name()))
d28b5171 3672
8913ef74 3673 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3674 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3675 if ffmpeg_features:
a4211baf 3676 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3677
4c83c967 3678 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3679 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3680 exe_str = ', '.join(
2831b468 3681 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3682 ) or 'none'
49a57e70 3683 write_debug('exe versions: %s' % exe_str)
dca08720 3684
2831b468 3685 from .downloader.websocket import has_websockets
3686 from .postprocessor.embedthumbnail import has_mutagen
f59f5ef8 3687 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
2831b468 3688
c586f9e8 3689 lib_str = join_nonempty(
4390d5ec 3690 compat_brotli and compat_brotli.__name__,
d5820461 3691 has_certifi and 'certifi',
edf65256 3692 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
f59f5ef8 3693 SECRETSTORAGE_AVAILABLE and 'secretstorage',
2831b468 3694 has_mutagen and 'mutagen',
3695 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3696 has_websockets and 'websockets',
3697 delim=', ') or 'none'
49a57e70 3698 write_debug('Optional libraries: %s' % lib_str)
2831b468 3699
97ec5bc5 3700 self._setup_opener()
dca08720
PH
3701 proxy_map = {}
3702 for handler in self._opener.handlers:
3703 if hasattr(handler, 'proxies'):
3704 proxy_map.update(handler.proxies)
49a57e70 3705 write_debug(f'Proxy map: {proxy_map}')
dca08720 3706
49a57e70 3707 # Not implemented
3708 if False and self.params.get('call_home'):
58b1f00d 3709 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3710 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3711 latest_version = self.urlopen(
3712 'https://yt-dl.org/latest/version').read().decode('utf-8')
3713 if version_tuple(latest_version) > version_tuple(__version__):
3714 self.report_warning(
3715 'You are using an outdated version (newest version: %s)! '
3716 'See https://yt-dl.org/update if you need help updating.' %
3717 latest_version)
3718
e344693b 3719 def _setup_opener(self):
97ec5bc5 3720 if hasattr(self, '_opener'):
3721 return
6ad14cab 3722 timeout_val = self.params.get('socket_timeout')
17bddf3e 3723 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3724
982ee69a 3725 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3726 opts_cookiefile = self.params.get('cookiefile')
3727 opts_proxy = self.params.get('proxy')
3728
982ee69a 3729 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3730
6a3f4c3f 3731 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3732 if opts_proxy is not None:
3733 if opts_proxy == '':
3734 proxies = {}
3735 else:
3736 proxies = {'http': opts_proxy, 'https': opts_proxy}
3737 else:
3738 proxies = compat_urllib_request.getproxies()
067aa17e 3739 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3740 if 'http' in proxies and 'https' not in proxies:
3741 proxies['https'] = proxies['http']
91410c9b 3742 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3743
3744 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3745 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3746 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3747 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3748 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3749
3750 # When passing our own FileHandler instance, build_opener won't add the
3751 # default FileHandler and allows us to disable the file protocol, which
3752 # can be used for malicious purposes (see
067aa17e 3753 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3754 file_handler = compat_urllib_request.FileHandler()
3755
3756 def file_open(*args, **kwargs):
7a5c1cfe 3757 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3758 file_handler.file_open = file_open
3759
3760 opener = compat_urllib_request.build_opener(
fca6dba8 3761 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3762
dca08720
PH
3763 # Delete the default user-agent header, which would otherwise apply in
3764 # cases where our custom HTTP handler doesn't come into play
067aa17e 3765 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3766 opener.addheaders = []
3767 self._opener = opener
62fec3b2
PH
3768
3769 def encode(self, s):
3770 if isinstance(s, bytes):
3771 return s # Already encoded
3772
3773 try:
3774 return s.encode(self.get_encoding())
3775 except UnicodeEncodeError as err:
3776 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3777 raise
3778
3779 def get_encoding(self):
3780 encoding = self.params.get('encoding')
3781 if encoding is None:
3782 encoding = preferredencoding()
3783 return encoding
ec82d85a 3784
e08a85d8 3785 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3786 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3787 if overwrite is None:
3788 overwrite = self.params.get('overwrites', True)
80c03fa9 3789 if not self.params.get('writeinfojson'):
3790 return False
3791 elif not infofn:
3792 self.write_debug(f'Skipping writing {label} infojson')
3793 return False
3794 elif not self._ensure_dir_exists(infofn):
3795 return None
e08a85d8 3796 elif not overwrite and os.path.exists(infofn):
80c03fa9 3797 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3798 return 'exists'
3799
3800 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3801 try:
3802 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3803 return True
3804 except (OSError, IOError):
3805 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3806 return None
80c03fa9 3807
3808 def _write_description(self, label, ie_result, descfn):
3809 ''' Write description and returns True = written, False = skip, None = error '''
3810 if not self.params.get('writedescription'):
3811 return False
3812 elif not descfn:
3813 self.write_debug(f'Skipping writing {label} description')
3814 return False
3815 elif not self._ensure_dir_exists(descfn):
3816 return None
3817 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3818 self.to_screen(f'[info] {label.title()} description is already present')
3819 elif ie_result.get('description') is None:
3820 self.report_warning(f'There\'s no {label} description to write')
3821 return False
3822 else:
3823 try:
3824 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3825 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3826 descfile.write(ie_result['description'])
3827 except (OSError, IOError):
3828 self.report_error(f'Cannot write {label} description file {descfn}')
3829 return None
3830 return True
3831
3832 def _write_subtitles(self, info_dict, filename):
3833 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3834 ret = []
3835 subtitles = info_dict.get('requested_subtitles')
3836 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3837 # subtitles download errors are already managed as troubles in relevant IE
3838 # that way it will silently go on when used with unsupporting IE
3839 return ret
3840
3841 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3842 if not sub_filename_base:
3843 self.to_screen('[info] Skipping writing video subtitles')
3844 return ret
3845 for sub_lang, sub_info in subtitles.items():
3846 sub_format = sub_info['ext']
3847 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3848 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3849 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3850 if existing_sub:
80c03fa9 3851 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3852 sub_info['filepath'] = existing_sub
3853 ret.append((existing_sub, sub_filename_final))
80c03fa9 3854 continue
3855
3856 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3857 if sub_info.get('data') is not None:
3858 try:
3859 # Use newline='' to prevent conversion of newline characters
3860 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3861 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3862 subfile.write(sub_info['data'])
3863 sub_info['filepath'] = sub_filename
3864 ret.append((sub_filename, sub_filename_final))
3865 continue
3866 except (OSError, IOError):
3867 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3868 return None
3869
3870 try:
3871 sub_copy = sub_info.copy()
3872 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3873 self.dl(sub_filename, sub_copy, subtitle=True)
3874 sub_info['filepath'] = sub_filename
3875 ret.append((sub_filename, sub_filename_final))
6020e05d 3876 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3877 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3878 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3879 if not self.params.get('ignoreerrors'):
3880 self.report_error(msg)
3881 raise DownloadError(msg)
3882 self.report_warning(msg)
519804a9 3883 return ret
80c03fa9 3884
3885 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3886 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3887 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3888 thumbnails, ret = [], []
6c4fd172 3889 if write_all or self.params.get('writethumbnail', False):
0202b52a 3890 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3891 multiple = write_all and len(thumbnails) > 1
ec82d85a 3892
80c03fa9 3893 if thumb_filename_base is None:
3894 thumb_filename_base = filename
3895 if thumbnails and not thumb_filename_base:
3896 self.write_debug(f'Skipping writing {label} thumbnail')
3897 return ret
3898
dd0228ce 3899 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3900 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3901 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3902 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3903 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3904
e04938ab 3905 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3906 if existing_thumb:
aa9369a2 3907 self.to_screen('[info] %s is already present' % (
3908 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3909 t['filepath'] = existing_thumb
3910 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3911 else:
80c03fa9 3912 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3913 try:
297e9952 3914 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3915 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3916 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3917 shutil.copyfileobj(uf, thumbf)
80c03fa9 3918 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3919 t['filepath'] = thumb_filename
3158150c 3920 except network_exceptions as err:
dd0228ce 3921 thumbnails.pop(idx)
80c03fa9 3922 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3923 if ret and not write_all:
3924 break
0202b52a 3925 return ret