]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[utils] Sanitize URL when determining protocol
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
b5ae35ee 12import functools
8222d8de 13import io
b82f815f 14import itertools
8694c600 15import json
62fec3b2 16import locale
083c9df9 17import operator
8222d8de 18import os
dca08720 19import platform
8222d8de
JMF
20import re
21import shutil
dca08720 22import subprocess
8222d8de 23import sys
21cd8fae 24import tempfile
8222d8de 25import time
67134eab 26import tokenize
8222d8de 27import traceback
75822ca7 28import random
524e2e4f 29import unicodedata
8222d8de 30
ec11a9f4 31from enum import Enum
961ea474
S
32from string import ascii_letters
33
8c25f81b 34from .compat import (
82d8a8b6 35 compat_basestring,
003c69a8 36 compat_get_terminal_size,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
edf65256 40 compat_pycrypto_AES,
7d1eb38a 41 compat_shlex_quote,
ce02ed60 42 compat_str,
67134eab 43 compat_tokenize_tokenize,
ce02ed60
PH
44 compat_urllib_error,
45 compat_urllib_request,
8b172c2e 46 compat_urllib_request_DataHandler,
819e0531 47 windows_enable_vt_mode,
8c25f81b 48)
982ee69a 49from .cookies import load_cookies
8c25f81b 50from .utils import (
eedb7ba5
S
51 age_restricted,
52 args_to_str,
ce02ed60
PH
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
acd69589 56 DEFAULT_OUTTMPL,
ce02ed60 57 determine_ext,
b5559424 58 determine_protocol,
732044af 59 DOT_DESKTOP_LINK_TEMPLATE,
60 DOT_URL_LINK_TEMPLATE,
61 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 62 DownloadError,
c0384f22 63 encode_compat_str,
ce02ed60 64 encodeFilename,
498f5606 65 EntryNotInPlaylist,
a06916d9 66 error_to_compat_str,
8b0d7497 67 ExistingVideoReached,
590bc6f6 68 expand_path,
ce02ed60 69 ExtractorError,
e29663c6 70 float_or_none,
02dbf93f 71 format_bytes,
76d321f6 72 format_field,
525ef922 73 formatSeconds,
773f291d 74 GeoRestrictedError,
b0249bca 75 HEADRequest,
c9969434 76 int_or_none,
732044af 77 iri_to_uri,
773f291d 78 ISO3166Utils,
56a8fb4f 79 LazyList,
ce02ed60 80 locked_file,
0202b52a 81 make_dir,
dca08720 82 make_HTTPS_handler,
ce02ed60 83 MaxDownloadsReached,
3158150c 84 network_exceptions,
ec11a9f4 85 number_of_digits,
cd6fc19e 86 orderedSet,
a06916d9 87 OUTTMPL_TYPES,
b7ab0590 88 PagedList,
083c9df9 89 parse_filesize,
91410c9b 90 PerRequestProxyHandler,
dca08720 91 platform_name,
d3c93ec2 92 Popen,
eedb7ba5 93 PostProcessingError,
ce02ed60 94 preferredencoding,
eedb7ba5 95 prepend_extension,
51fb4995 96 register_socks_protocols,
a06916d9 97 RejectedVideoReached,
cfb56d1a 98 render_table,
eedb7ba5 99 replace_extension,
ce02ed60
PH
100 SameFileError,
101 sanitize_filename,
1bb5c511 102 sanitize_path,
dcf77cf1 103 sanitize_url,
67dda517 104 sanitized_Request,
e5660ee6 105 std_headers,
819e0531 106 STR_FORMAT_RE_TMPL,
107 STR_FORMAT_TYPES,
1211bb6d 108 str_or_none,
e29663c6 109 strftime_or_none,
ce02ed60 110 subtitles_filename,
819e0531 111 supports_terminal_sequences,
51d9739f 112 ThrottledDownload,
732044af 113 to_high_limit_path,
324ad820 114 traverse_obj,
6033d980 115 try_get,
ce02ed60 116 UnavailableVideoError,
29eb5174 117 url_basename,
7d1eb38a 118 variadic,
58b1f00d 119 version_tuple,
ce02ed60
PH
120 write_json_file,
121 write_string,
6a3f4c3f 122 YoutubeDLCookieProcessor,
dca08720 123 YoutubeDLHandler,
fca6dba8 124 YoutubeDLRedirectHandler,
ce02ed60 125)
a0e07d31 126from .cache import Cache
ec11a9f4 127from .minicurses import format_text
52a8a1e1 128from .extractor import (
129 gen_extractor_classes,
130 get_info_extractor,
131 _LAZY_LOADER,
3ae5e797 132 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 133)
4c54b89e 134from .extractor.openload import PhantomJSwrapper
52a8a1e1 135from .downloader import (
dbf5416a 136 FFmpegFD,
52a8a1e1 137 get_suitable_downloader,
138 shorten_protocol_name
139)
4c83c967 140from .downloader.rtmp import rtmpdump_version
4f026faf 141from .postprocessor import (
e36d50c5 142 get_postprocessor,
4e3b637d 143 EmbedThumbnailPP,
e36d50c5 144 FFmpegFixupDurationPP,
f17f8651 145 FFmpegFixupM3u8PP,
62cd676c 146 FFmpegFixupM4aPP,
6271f1ca 147 FFmpegFixupStretchedPP,
e36d50c5 148 FFmpegFixupTimestampPP,
4f026faf
PH
149 FFmpegMergerPP,
150 FFmpegPostProcessor,
0202b52a 151 MoveFilesAfterDownloadPP,
3ae5e797 152 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 153)
4c88ff87 154from .update import detect_variant
dca08720 155from .version import __version__
8222d8de 156
e9c0cdd3
YCH
157if compat_os_name == 'nt':
158 import ctypes
159
2459b6e1 160
8222d8de
JMF
161class YoutubeDL(object):
162 """YoutubeDL class.
163
164 YoutubeDL objects are the ones responsible of downloading the
165 actual video file and writing it to disk if the user has requested
166 it, among some other tasks. In most cases there should be one per
167 program. As, given a video URL, the downloader doesn't know how to
168 extract all the needed information, task that InfoExtractors do, it
169 has to pass the URL to one of them.
170
171 For this, YoutubeDL objects have a method that allows
172 InfoExtractors to be registered in a given order. When it is passed
173 a URL, the YoutubeDL object handles it to the first InfoExtractor it
174 finds that reports being able to handle it. The InfoExtractor extracts
175 all the information about the video or videos the URL refers to, and
176 YoutubeDL process the extracted information, possibly using a File
177 Downloader to download the video.
178
179 YoutubeDL objects accept a lot of parameters. In order not to saturate
180 the object constructor with arguments, it receives a dictionary of
181 options instead. These options are available through the params
182 attribute for the InfoExtractors to use. The YoutubeDL also
183 registers itself as the downloader in charge for the InfoExtractors
184 that are added to it, so this is a "mutual registration".
185
186 Available options:
187
188 username: Username for authentication purposes.
189 password: Password for authentication purposes.
180940e0 190 videopassword: Password for accessing a video.
1da50aa3
S
191 ap_mso: Adobe Pass multiple-system operator identifier.
192 ap_username: Multiple-system operator account username.
193 ap_password: Multiple-system operator account password.
8222d8de
JMF
194 usenetrc: Use netrc for authentication instead.
195 verbose: Print additional info to stdout.
196 quiet: Do not print messages to stdout.
ad8915b7 197 no_warnings: Do not print out anything for warnings.
53c18592 198 forceprint: A list of templates to force print
199 forceurl: Force printing final URL. (Deprecated)
200 forcetitle: Force printing title. (Deprecated)
201 forceid: Force printing ID. (Deprecated)
202 forcethumbnail: Force printing thumbnail URL. (Deprecated)
203 forcedescription: Force printing description. (Deprecated)
204 forcefilename: Force printing final filename. (Deprecated)
205 forceduration: Force printing duration. (Deprecated)
8694c600 206 forcejson: Force printing info_dict as JSON.
63e0be34
PH
207 dump_single_json: Force printing the info_dict of the whole playlist
208 (or video) as a single JSON line.
c25228e5 209 force_write_download_archive: Force writing download archive regardless
210 of 'skip_download' or 'simulate'.
b7b04c78 211 simulate: Do not download the video files. If unset (or None),
212 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 213 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 214 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 215 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
216 extracting metadata even if the video is not actually
217 available for download (experimental)
c25228e5 218 format_sort: How to sort the video formats. see "Sorting Formats"
219 for more details.
220 format_sort_force: Force the given format_sort. see "Sorting Formats"
221 for more details.
222 allow_multiple_video_streams: Allow multiple video streams to be merged
223 into a single file
224 allow_multiple_audio_streams: Allow multiple audio streams to be merged
225 into a single file
0ba692ac 226 check_formats Whether to test if the formats are downloadable.
9f1a1c36 227 Can be True (check all), False (check none),
228 'selected' (check selected formats),
0ba692ac 229 or None (check only if requested by extractor)
4524baf0 230 paths: Dictionary of output paths. The allowed keys are 'home'
231 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 232 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 233 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 234 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
235 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
236 restrictfilenames: Do not allow "&" and spaces in file names
237 trim_file_name: Limit length of filename (extension excluded)
4524baf0 238 windowsfilenames: Force the filenames to be windows compatible
b1940459 239 ignoreerrors: Do not stop on download/postprocessing errors.
240 Can be 'only_download' to ignore only download errors.
241 Default is 'only_download' for CLI, but False for API
26e2805c 242 skip_playlist_after_errors: Number of allowed failures until the rest of
243 the playlist is skipped
d22dec74 244 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 245 overwrites: Overwrite all video and metadata files if True,
246 overwrite only non-video files if None
247 and don't overwrite any file if False
34488702 248 For compatibility with youtube-dl,
249 "nooverwrites" may also be used instead
8222d8de
JMF
250 playliststart: Playlist item to start at.
251 playlistend: Playlist item to end at.
c14e88f0 252 playlist_items: Specific indices of playlist to download.
ff815fe6 253 playlistreverse: Download playlist items in reverse order.
75822ca7 254 playlistrandom: Download playlist items in random order.
8222d8de
JMF
255 matchtitle: Download only matching titles.
256 rejecttitle: Reject downloads for matching titles.
8bf9319e 257 logger: Log messages to a logging.Logger instance.
8222d8de 258 logtostderr: Log messages to stderr instead of stdout.
819e0531 259 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
260 writedescription: Write the video description to a .description file
261 writeinfojson: Write the video description to a .info.json file
75d43ca0 262 clean_infojson: Remove private fields from the infojson
34488702 263 getcomments: Extract video comments. This will not be written to disk
06167fbb 264 unless writeinfojson is also given
1fb07d10 265 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 266 writethumbnail: Write the thumbnail image to a file
c25228e5 267 allow_playlist_files: Whether to write playlists' description, infojson etc
268 also to disk when using the 'write*' options
ec82d85a 269 write_all_thumbnails: Write all thumbnail formats to files
732044af 270 writelink: Write an internet shortcut file, depending on the
271 current platform (.url/.webloc/.desktop)
272 writeurllink: Write a Windows internet shortcut file (.url)
273 writewebloclink: Write a macOS internet shortcut file (.webloc)
274 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 275 writesubtitles: Write the video subtitles to a file
741dd8ea 276 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 277 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 278 Downloads all the subtitles of the video
0b7f3118 279 (requires writesubtitles or writeautomaticsub)
8222d8de 280 listsubtitles: Lists all available subtitles for the video
a504ced0 281 subtitlesformat: The format code for subtitles
c32b0aab 282 subtitleslangs: List of languages of the subtitles to download (can be regex).
283 The list may contain "all" to refer to all the available
284 subtitles. The language can be prefixed with a "-" to
285 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
286 keepvideo: Keep the video file after post-processing
287 daterange: A DateRange object, download only if the upload_date is in the range.
288 skip_download: Skip the actual download of the video file
c35f9e72 289 cachedir: Location of the cache files in the filesystem.
a0e07d31 290 False to disable filesystem cache.
47192f92 291 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
292 age_limit: An integer representing the user's age in years.
293 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
294 min_views: An integer representing the minimum view count the video
295 must have in order to not be skipped.
296 Videos without view count information are always
297 downloaded. None for no limit.
298 max_views: An integer representing the maximum view count.
299 Videos that are more popular than that are not
300 downloaded.
301 Videos without view count information are always
302 downloaded. None for no limit.
303 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
304 Videos already present in the file are not downloaded
305 again.
8a51f564 306 break_on_existing: Stop the download process after attempting to download a
307 file that is in the archive.
308 break_on_reject: Stop the download process when encountering a video that
309 has been filtered out.
310 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
311 cookiesfrombrowser: A tuple containing the name of the browser and the profile
312 name/path from where cookies are loaded.
49a57e70 313 Eg: ('chrome', ) or ('vivaldi', 'default')
a1ee09e8 314 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
315 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
316 At the moment, this is only supported by YouTube.
a1ee09e8 317 proxy: URL of the proxy server to use
38cce791 318 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 319 on geo-restricted sites.
e344693b 320 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
321 bidi_workaround: Work around buggy terminals without bidirectional text
322 support, using fridibi
a0ddb8a2 323 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 324 include_ads: Download ads as well
04b4d394
PH
325 default_search: Prepend this string if an input url is not valid.
326 'auto' for elaborate guessing
62fec3b2 327 encoding: Use this encoding instead of the system-specified.
e8ee972c 328 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
329 Pass in 'in_playlist' to only show this behavior for
330 playlist items.
4f026faf 331 postprocessors: A list of dictionaries, each with an entry
71b640cc 332 * key: The name of the postprocessor. See
7a5c1cfe 333 yt_dlp/postprocessor/__init__.py for a list.
56d868db 334 * when: When to run the postprocessor. Can be one of
335 pre_process|before_dl|post_process|after_move.
336 Assumed to be 'post_process' if not given
b5ae35ee 337 post_hooks: Deprecated - Register a custom postprocessor instead
338 A list of functions that get called as the final step
ab8e5e51
AM
339 for each video file, after all postprocessors have been
340 called. The filename will be passed as the only argument.
71b640cc
PH
341 progress_hooks: A list of functions that get called on download
342 progress, with a dictionary with the entries
5cda4eda 343 * status: One of "downloading", "error", or "finished".
ee69b99a 344 Check this first and ignore unknown values.
3ba7740d 345 * info_dict: The extracted info_dict
71b640cc 346
5cda4eda 347 If status is one of "downloading", or "finished", the
ee69b99a
PH
348 following properties may also be present:
349 * filename: The final filename (always present)
5cda4eda 350 * tmpfilename: The filename we're currently writing to
71b640cc
PH
351 * downloaded_bytes: Bytes on disk
352 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
353 * total_bytes_estimate: Guess of the eventual file size,
354 None if unavailable.
355 * elapsed: The number of seconds since download started.
71b640cc
PH
356 * eta: The estimated time in seconds, None if unknown
357 * speed: The download speed in bytes/second, None if
358 unknown
5cda4eda
PH
359 * fragment_index: The counter of the currently
360 downloaded video fragment.
361 * fragment_count: The number of fragments (= individual
362 files that will be merged)
71b640cc
PH
363
364 Progress hooks are guaranteed to be called at least once
365 (with status "finished") if the download is successful.
819e0531 366 postprocessor_hooks: A list of functions that get called on postprocessing
367 progress, with a dictionary with the entries
368 * status: One of "started", "processing", or "finished".
369 Check this first and ignore unknown values.
370 * postprocessor: Name of the postprocessor
371 * info_dict: The extracted info_dict
372
373 Progress hooks are guaranteed to be called at least twice
374 (with status "started" and "finished") if the processing is successful.
45598f15 375 merge_output_format: Extension to use when merging formats.
6b591b29 376 final_ext: Expected final extension; used to detect when the file was
377 already downloaded and converted. "merge_output_format" is
378 replaced by this extension when given
6271f1ca
PH
379 fixup: Automatically correct known faults of the file.
380 One of:
381 - "never": do nothing
382 - "warn": only emit a warning
383 - "detect_or_warn": check whether we can do anything
62cd676c 384 about it, warn otherwise (default)
504f20dd 385 source_address: Client-side IP address to bind to.
6ec6cb4e 386 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 387 yt-dlp servers for debugging. (BROKEN)
1cf376f5 388 sleep_interval_requests: Number of seconds to sleep between requests
389 during extraction
7aa589a5
S
390 sleep_interval: Number of seconds to sleep before each download when
391 used alone or a lower bound of a range for randomized
392 sleep before each download (minimum possible number
393 of seconds to sleep) when used along with
394 max_sleep_interval.
395 max_sleep_interval:Upper bound of a range for randomized sleep before each
396 download (maximum possible number of seconds to sleep).
397 Must only be used along with sleep_interval.
398 Actual sleep time will be a random float from range
399 [sleep_interval; max_sleep_interval].
1cf376f5 400 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
401 listformats: Print an overview of available video formats and exit.
402 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
403 match_filter: A function that gets called with the info_dict of
404 every video.
405 If it returns a message, the video is ignored.
406 If it returns None, the video is downloaded.
407 match_filter_func in utils.py is one example for this.
7e5db8c9 408 no_color: Do not emit color codes in output.
0a840f58 409 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 410 HTTP header
0a840f58 411 geo_bypass_country:
773f291d
S
412 Two-letter ISO 3166-2 country code that will be used for
413 explicit geographic restriction bypassing via faking
504f20dd 414 X-Forwarded-For HTTP header
5f95927a
S
415 geo_bypass_ip_block:
416 IP range in CIDR notation that will be used similarly to
504f20dd 417 geo_bypass_country
71b640cc 418
85729c51 419 The following options determine which downloader is picked:
52a8a1e1 420 external_downloader: A dictionary of protocol keys and the executable of the
421 external downloader to use for it. The allowed protocols
422 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
423 Set the value to 'native' to use the native downloader
424 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
425 or {'m3u8': 'ffmpeg'} instead.
426 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
427 if True, otherwise use ffmpeg/avconv if False, otherwise
428 use downloader suggested by extractor if None.
53ed7066 429 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 430 The following options do not work when used through the API:
b5ae35ee 431 filename, abort-on-error, multistreams, no-live-chat, format-sort
b51d2ae3 432 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 433 Refer __init__.py for their implementation
819e0531 434 progress_template: Dictionary of templates for progress outputs.
435 Allowed keys are 'download', 'postprocess',
436 'download-title' (console title) and 'postprocess-title'.
437 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 438
8222d8de 439 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 440 the downloader (see yt_dlp/downloader/common.py):
51d9739f 441 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
b5ae35ee 442 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
443 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
444 external_downloader_args.
76b1bd67
JMF
445
446 The following options are used by the post processors:
d4a24f40 447 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 448 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
449 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
450 to the binary or its containing directory.
43820c03 451 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 452 and a list of additional command-line arguments for the
453 postprocessor/executable. The dict can also have "PP+EXE" keys
454 which are used when the given exe is used by the given PP.
455 Use 'default' as the name for arguments to passed to all PP
456 For compatibility with youtube-dl, a single list of args
457 can also be used
e409895f 458
459 The following options are used by the extractors:
62bff2c1 460 extractor_retries: Number of times to retry for known errors
461 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 462 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 463 discontinuities such as ad breaks (default: False)
5d3a0e79 464 extractor_args: A dictionary of arguments to be passed to the extractors.
465 See "EXTRACTOR ARGUMENTS" for details.
466 Eg: {'youtube': {'skip': ['dash', 'hls']}}
467 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
468 If True (default), DASH manifests and related
62bff2c1 469 data will be downloaded and processed by extractor.
470 You can reduce network I/O by disabling it if you don't
471 care about DASH. (only for youtube)
5d3a0e79 472 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
473 If True (default), HLS manifests and related
62bff2c1 474 data will be downloaded and processed by extractor.
475 You can reduce network I/O by disabling it if you don't
476 care about HLS. (only for youtube)
8222d8de
JMF
477 """
478
c9969434
S
479 _NUMERIC_FIELDS = set((
480 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 481 'timestamp', 'release_timestamp',
c9969434
S
482 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
483 'average_rating', 'comment_count', 'age_limit',
484 'start_time', 'end_time',
485 'chapter_number', 'season_number', 'episode_number',
486 'track_number', 'disc_number', 'release_year',
c9969434
S
487 ))
488
48ee10ee 489 _format_selection_exts = {
490 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
491 'video': {'mp4', 'flv', 'webm', '3gp'},
492 'storyboards': {'mhtml'},
493 }
494
8222d8de 495 params = None
8b7491c8 496 _ies = {}
56d868db 497 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 498 _printed_messages = set()
1cf376f5 499 _first_webpage_request = True
8222d8de
JMF
500 _download_retcode = None
501 _num_downloads = None
30a074c2 502 _playlist_level = 0
503 _playlist_urls = set()
8222d8de
JMF
504 _screen_file = None
505
3511266b 506 def __init__(self, params=None, auto_init=True):
883d4b1e 507 """Create a FileDownloader object with the given options.
508 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 509 Set to 'no_verbose_header' to not print the header
883d4b1e 510 """
e9f9a10f
JMF
511 if params is None:
512 params = {}
8b7491c8 513 self._ies = {}
56c73665 514 self._ies_instances = {}
56d868db 515 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 516 self._printed_messages = set()
1cf376f5 517 self._first_webpage_request = True
ab8e5e51 518 self._post_hooks = []
933605d7 519 self._progress_hooks = []
819e0531 520 self._postprocessor_hooks = []
8222d8de
JMF
521 self._download_retcode = 0
522 self._num_downloads = 0
523 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 524 self._err_file = sys.stderr
819e0531 525 self.params = params
a0e07d31 526 self.cache = Cache(self)
34308b30 527
819e0531 528 windows_enable_vt_mode()
d1d5c08f 529 # FIXME: This will break if we ever print color to stdout
ec11a9f4 530 self._allow_colors = {
531 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
532 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
533 }
819e0531 534
a61f4b28 535 if sys.version_info < (3, 6):
536 self.report_warning(
0181adef 537 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 538
88acdbc2 539 if self.params.get('allow_unplayable_formats'):
540 self.report_warning(
ec11a9f4 541 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 542 'This is a developer option intended for debugging. \n'
543 ' If you experience any issues while using this option, '
ec11a9f4 544 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 545
be5df5ee
S
546 def check_deprecated(param, option, suggestion):
547 if self.params.get(param) is not None:
53ed7066 548 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
549 return True
550 return False
551
552 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
553 if self.params.get('geo_verification_proxy') is None:
554 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
555
0d1bb027 556 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
557 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 558 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 559
49a57e70 560 for msg in self.params.get('_warnings', []):
0d1bb027 561 self.report_warning(msg)
562
ec11a9f4 563 if 'list-formats' in self.params.get('compat_opts', []):
564 self.params['listformats_table'] = False
565
b5ae35ee 566 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 567 # nooverwrites was unnecessarily changed to overwrites
568 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
569 # This ensures compatibility with both keys
570 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 571 elif self.params.get('overwrites') is None:
572 self.params.pop('overwrites', None)
b868936c 573 else:
574 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 575
0783b09b 576 if params.get('bidi_workaround', False):
1c088fa8
PH
577 try:
578 import pty
579 master, slave = pty.openpty()
003c69a8 580 width = compat_get_terminal_size().columns
1c088fa8
PH
581 if width is None:
582 width_args = []
583 else:
584 width_args = ['-w', str(width)]
5d681e96 585 sp_kwargs = dict(
1c088fa8
PH
586 stdin=subprocess.PIPE,
587 stdout=slave,
588 stderr=self._err_file)
5d681e96 589 try:
d3c93ec2 590 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 591 except OSError:
d3c93ec2 592 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 593 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 594 except OSError as ose:
66e7ace1 595 if ose.errno == errno.ENOENT:
49a57e70 596 self.report_warning(
597 'Could not find fribidi executable, ignoring --bidi-workaround. '
598 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
599 else:
600 raise
0783b09b 601
3089bc74
S
602 if (sys.platform != 'win32'
603 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
604 and not params.get('restrictfilenames', False)):
e9137224 605 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 606 self.report_warning(
6febd1c1 607 'Assuming --restrict-filenames since file system encoding '
1b725173 608 'cannot encode all characters. '
6febd1c1 609 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 610 self.params['restrictfilenames'] = True
34308b30 611
de6000d9 612 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 613
187986a8 614 # Creating format selector here allows us to catch syntax errors before the extraction
615 self.format_selector = (
616 None if self.params.get('format') is None
617 else self.build_format_selector(self.params['format']))
618
dca08720
PH
619 self._setup_opener()
620
3511266b 621 if auto_init:
883d4b1e 622 if auto_init != 'no_verbose_header':
623 self.print_debug_header()
3511266b
PH
624 self.add_default_info_extractors()
625
4f026faf 626 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 627 pp_def = dict(pp_def_raw)
fd7cfb64 628 when = pp_def.pop('when', 'post_process')
629 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 630 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 631 self.add_post_processor(pp, when=when)
4f026faf 632
ab8e5e51
AM
633 for ph in self.params.get('post_hooks', []):
634 self.add_post_hook(ph)
635
71b640cc
PH
636 for ph in self.params.get('progress_hooks', []):
637 self.add_progress_hook(ph)
638
51fb4995
YCH
639 register_socks_protocols()
640
ed39cac5 641 def preload_download_archive(fn):
642 """Preload the archive, if any is specified"""
643 if fn is None:
644 return False
49a57e70 645 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 646 try:
647 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
648 for line in archive_file:
649 self.archive.add(line.strip())
650 except IOError as ioe:
651 if ioe.errno != errno.ENOENT:
652 raise
653 return False
654 return True
655
656 self.archive = set()
657 preload_download_archive(self.params.get('download_archive'))
658
7d4111ed
PH
659 def warn_if_short_id(self, argv):
660 # short YouTube ID starting with dash?
661 idxs = [
662 i for i, a in enumerate(argv)
663 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
664 if idxs:
665 correct_argv = (
7a5c1cfe 666 ['yt-dlp']
3089bc74
S
667 + [a for i, a in enumerate(argv) if i not in idxs]
668 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
669 )
670 self.report_warning(
671 'Long argument string detected. '
49a57e70 672 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
673 args_to_str(correct_argv))
674
8222d8de
JMF
675 def add_info_extractor(self, ie):
676 """Add an InfoExtractor object to the end of the list."""
8b7491c8 677 ie_key = ie.ie_key()
678 self._ies[ie_key] = ie
e52d7f85 679 if not isinstance(ie, type):
8b7491c8 680 self._ies_instances[ie_key] = ie
e52d7f85 681 ie.set_downloader(self)
8222d8de 682
8b7491c8 683 def _get_info_extractor_class(self, ie_key):
684 ie = self._ies.get(ie_key)
685 if ie is None:
686 ie = get_info_extractor(ie_key)
687 self.add_info_extractor(ie)
688 return ie
689
56c73665
JMF
690 def get_info_extractor(self, ie_key):
691 """
692 Get an instance of an IE with name ie_key, it will try to get one from
693 the _ies list, if there's no instance it will create a new one and add
694 it to the extractor list.
695 """
696 ie = self._ies_instances.get(ie_key)
697 if ie is None:
698 ie = get_info_extractor(ie_key)()
699 self.add_info_extractor(ie)
700 return ie
701
023fa8c4
JMF
702 def add_default_info_extractors(self):
703 """
704 Add the InfoExtractors returned by gen_extractors to the end of the list
705 """
e52d7f85 706 for ie in gen_extractor_classes():
023fa8c4
JMF
707 self.add_info_extractor(ie)
708
56d868db 709 def add_post_processor(self, pp, when='post_process'):
8222d8de 710 """Add a PostProcessor object to the end of the chain."""
5bfa4862 711 self._pps[when].append(pp)
8222d8de
JMF
712 pp.set_downloader(self)
713
ab8e5e51
AM
714 def add_post_hook(self, ph):
715 """Add the post hook"""
716 self._post_hooks.append(ph)
717
933605d7 718 def add_progress_hook(self, ph):
819e0531 719 """Add the download progress hook"""
933605d7 720 self._progress_hooks.append(ph)
8ab470f1 721
819e0531 722 def add_postprocessor_hook(self, ph):
723 """Add the postprocessing progress hook"""
724 self._postprocessor_hooks.append(ph)
725
1c088fa8 726 def _bidi_workaround(self, message):
5d681e96 727 if not hasattr(self, '_output_channel'):
1c088fa8
PH
728 return message
729
5d681e96 730 assert hasattr(self, '_output_process')
11b85ce6 731 assert isinstance(message, compat_str)
6febd1c1
PH
732 line_count = message.count('\n') + 1
733 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 734 self._output_process.stdin.flush()
6febd1c1 735 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 736 for _ in range(line_count))
6febd1c1 737 return res[:-len('\n')]
1c088fa8 738
b35496d8 739 def _write_string(self, message, out=None, only_once=False):
740 if only_once:
741 if message in self._printed_messages:
742 return
743 self._printed_messages.add(message)
744 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 745
848887eb 746 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 747 """Print message to stdout"""
8bf9319e 748 if self.params.get('logger'):
43afe285 749 self.params['logger'].debug(message)
835a1478 750 elif not quiet or self.params.get('verbose'):
751 self._write_string(
752 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
753 self._err_file if quiet else self._screen_file)
8222d8de 754
b35496d8 755 def to_stderr(self, message, only_once=False):
0760b0a7 756 """Print message to stderr"""
11b85ce6 757 assert isinstance(message, compat_str)
8bf9319e 758 if self.params.get('logger'):
43afe285
IB
759 self.params['logger'].error(message)
760 else:
b35496d8 761 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 762
1e5b9a95
PH
763 def to_console_title(self, message):
764 if not self.params.get('consoletitle', False):
765 return
4bede0d8
C
766 if compat_os_name == 'nt':
767 if ctypes.windll.kernel32.GetConsoleWindow():
768 # c_wchar_p() might not be necessary if `message` is
769 # already of type unicode()
770 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 771 elif 'TERM' in os.environ:
b46696bd 772 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 773
bdde425c
PH
774 def save_console_title(self):
775 if not self.params.get('consoletitle', False):
776 return
b7b04c78 777 if self.params.get('simulate'):
94c3442e 778 return
4bede0d8 779 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 780 # Save the title on stack
734f90bb 781 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
782
783 def restore_console_title(self):
784 if not self.params.get('consoletitle', False):
785 return
b7b04c78 786 if self.params.get('simulate'):
94c3442e 787 return
4bede0d8 788 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 789 # Restore the title from stack
734f90bb 790 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
791
792 def __enter__(self):
793 self.save_console_title()
794 return self
795
796 def __exit__(self, *args):
797 self.restore_console_title()
f89197d7 798
dca08720 799 if self.params.get('cookiefile') is not None:
1bab3437 800 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 801
8222d8de
JMF
802 def trouble(self, message=None, tb=None):
803 """Determine action to take when a download problem appears.
804
805 Depending on if the downloader has been configured to ignore
806 download errors or not, this method may throw an exception or
807 not when errors are found, after printing the message.
808
809 tb, if given, is additional traceback information.
810 """
811 if message is not None:
812 self.to_stderr(message)
813 if self.params.get('verbose'):
814 if tb is None:
815 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 816 tb = ''
8222d8de 817 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 818 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 819 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
820 else:
821 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 822 tb = ''.join(tb_data)
c19bc311 823 if tb:
824 self.to_stderr(tb)
b1940459 825 if not self.params.get('ignoreerrors'):
8222d8de
JMF
826 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
827 exc_info = sys.exc_info()[1].exc_info
828 else:
829 exc_info = sys.exc_info()
830 raise DownloadError(message, exc_info)
831 self._download_retcode = 1
832
0760b0a7 833 def to_screen(self, message, skip_eol=False):
834 """Print message to stdout if not in quiet mode"""
835 self.to_stdout(
836 message, skip_eol, quiet=self.params.get('quiet', False))
837
ec11a9f4 838 class Styles(Enum):
839 HEADERS = 'yellow'
840 EMPHASIS = 'blue'
841 ID = 'green'
842 DELIM = 'blue'
843 ERROR = 'red'
844 WARNING = 'yellow'
845
846 def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
847 assert out in ('screen', 'err')
848 if test_encoding:
849 original_text = text
850 handle = self._screen_file if out == 'screen' else self._err_file
851 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
852 text = text.encode(encoding, 'ignore').decode(encoding)
853 if fallback is not None and text != original_text:
854 text = fallback
855 if isinstance(f, self.Styles):
856 f = f._value_
857 return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
858
859 def _format_screen(self, *args, **kwargs):
860 return self.__format_text('screen', *args, **kwargs)
861
862 def _format_err(self, *args, **kwargs):
863 return self.__format_text('err', *args, **kwargs)
819e0531 864
c84aeac6 865 def report_warning(self, message, only_once=False):
8222d8de
JMF
866 '''
867 Print the message to stderr, it will be prefixed with 'WARNING:'
868 If stderr is a tty file the 'WARNING:' will be colored
869 '''
6d07ce01
JMF
870 if self.params.get('logger') is not None:
871 self.params['logger'].warning(message)
8222d8de 872 else:
ad8915b7
PH
873 if self.params.get('no_warnings'):
874 return
ec11a9f4 875 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de
JMF
876
877 def report_error(self, message, tb=None):
878 '''
879 Do the same as trouble, but prefixes the message with 'ERROR:', colored
880 in red if stderr is a tty file.
881 '''
ec11a9f4 882 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
8222d8de 883
b35496d8 884 def write_debug(self, message, only_once=False):
0760b0a7 885 '''Log debug message or Print message to stderr'''
886 if not self.params.get('verbose', False):
887 return
888 message = '[debug] %s' % message
889 if self.params.get('logger'):
890 self.params['logger'].debug(message)
891 else:
b35496d8 892 self.to_stderr(message, only_once)
0760b0a7 893
8222d8de
JMF
894 def report_file_already_downloaded(self, file_name):
895 """Report file has already been fully downloaded."""
896 try:
6febd1c1 897 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 898 except UnicodeEncodeError:
6febd1c1 899 self.to_screen('[download] The file has already been downloaded')
8222d8de 900
0c3d0f51 901 def report_file_delete(self, file_name):
902 """Report that existing file will be deleted."""
903 try:
c25228e5 904 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 905 except UnicodeEncodeError:
c25228e5 906 self.to_screen('Deleting existing file')
0c3d0f51 907
1151c407 908 def raise_no_formats(self, info, forced=False):
909 has_drm = info.get('__has_drm')
88acdbc2 910 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
911 expected = self.params.get('ignore_no_formats_error')
912 if forced or not expected:
1151c407 913 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
914 expected=has_drm or expected)
88acdbc2 915 else:
916 self.report_warning(msg)
917
de6000d9 918 def parse_outtmpl(self):
919 outtmpl_dict = self.params.get('outtmpl', {})
920 if not isinstance(outtmpl_dict, dict):
921 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 922 # Remove spaces in the default template
923 if self.params.get('restrictfilenames'):
924 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
925 else:
926 sanitize = lambda x: x
de6000d9 927 outtmpl_dict.update({
71ce444a 928 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 929 if outtmpl_dict.get(k) is None})
de6000d9 930 for key, val in outtmpl_dict.items():
931 if isinstance(val, bytes):
932 self.report_warning(
933 'Parameter outtmpl is bytes, but should be a unicode string. '
934 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
935 return outtmpl_dict
936
21cd8fae 937 def get_output_path(self, dir_type='', filename=None):
938 paths = self.params.get('paths', {})
939 assert isinstance(paths, dict)
940 path = os.path.join(
941 expand_path(paths.get('home', '').strip()),
942 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
943 filename or '')
944
945 # Temporary fix for #4787
946 # 'Treat' all problem characters by passing filename through preferredencoding
947 # to workaround encoding issues with subprocess on python2 @ Windows
948 if sys.version_info < (3, 0) and sys.platform == 'win32':
949 path = encodeFilename(path, True).decode(preferredencoding())
950 return sanitize_path(path, force=self.params.get('windowsfilenames'))
951
76a264ac 952 @staticmethod
901130bb 953 def _outtmpl_expandpath(outtmpl):
954 # expand_path translates '%%' into '%' and '$$' into '$'
955 # correspondingly that is not what we want since we need to keep
956 # '%%' intact for template dict substitution step. Working around
957 # with boundary-alike separator hack.
958 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
959 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
960
961 # outtmpl should be expand_path'ed before template dict substitution
962 # because meta fields may contain env variables we don't want to
963 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
964 # title "Hello $PATH", we don't want `$PATH` to be expanded.
965 return expand_path(outtmpl).replace(sep, '')
966
967 @staticmethod
968 def escape_outtmpl(outtmpl):
969 ''' Escape any remaining strings like %s, %abc% etc. '''
970 return re.sub(
971 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
972 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
973 outtmpl)
974
975 @classmethod
976 def validate_outtmpl(cls, outtmpl):
76a264ac 977 ''' @return None or Exception object '''
7d1eb38a 978 outtmpl = re.sub(
524e2e4f 979 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 980 lambda mobj: f'{mobj.group(0)[:-1]}s',
981 cls._outtmpl_expandpath(outtmpl))
76a264ac 982 try:
7d1eb38a 983 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 984 return None
985 except ValueError as err:
986 return err
987
03b4de72 988 @staticmethod
989 def _copy_infodict(info_dict):
990 info_dict = dict(info_dict)
991 for key in ('__original_infodict', '__postprocessors'):
992 info_dict.pop(key, None)
993 return info_dict
994
143db31d 995 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
819e0531 996 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
6e84b215 997 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 998
03b4de72 999 info_dict = self._copy_infodict(info_dict)
752cda38 1000 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1001 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1002 if info_dict.get('duration', None) is not None
1003 else None)
752cda38 1004 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1005 if info_dict.get('resolution') is None:
1006 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1007
e6f21b3d 1008 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1009 # of %(field)s to %(field)0Nd for backward compatibility
1010 field_size_compat_map = {
ec11a9f4 1011 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1012 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1013 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1014 }
752cda38 1015
385a27fa 1016 TMPL_DICT = {}
524e2e4f 1017 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 1018 MATH_FUNCTIONS = {
1019 '+': float.__add__,
1020 '-': float.__sub__,
1021 }
e625be0d 1022 # Field is of the form key1.key2...
1023 # where keys (except first) can be string, int or slice
2b8a2973 1024 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 1025 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1026 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1027 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1028 (?P<negate>-)?
385a27fa 1029 (?P<fields>{field})
1030 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1031 (?:>(?P<strf_format>.+?))?
7c37ff97 1032 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 1033 (?:\|(?P<default>.*?))?
385a27fa 1034 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1035
2b8a2973 1036 def _traverse_infodict(k):
1037 k = k.split('.')
1038 if k[0] == '':
1039 k.pop(0)
1040 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1041
752cda38 1042 def get_value(mdict):
1043 # Object traversal
2b8a2973 1044 value = _traverse_infodict(mdict['fields'])
752cda38 1045 # Negative
1046 if mdict['negate']:
1047 value = float_or_none(value)
1048 if value is not None:
1049 value *= -1
1050 # Do maths
385a27fa 1051 offset_key = mdict['maths']
1052 if offset_key:
752cda38 1053 value = float_or_none(value)
1054 operator = None
385a27fa 1055 while offset_key:
1056 item = re.match(
1057 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1058 offset_key).group(0)
1059 offset_key = offset_key[len(item):]
1060 if operator is None:
752cda38 1061 operator = MATH_FUNCTIONS[item]
385a27fa 1062 continue
1063 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1064 offset = float_or_none(item)
1065 if offset is None:
2b8a2973 1066 offset = float_or_none(_traverse_infodict(item))
385a27fa 1067 try:
1068 value = operator(value, multiplier * offset)
1069 except (TypeError, ZeroDivisionError):
1070 return None
1071 operator = None
752cda38 1072 # Datetime formatting
1073 if mdict['strf_format']:
7c37ff97 1074 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1075
1076 return value
1077
b868936c 1078 na = self.params.get('outtmpl_na_placeholder', 'NA')
1079
6e84b215 1080 def _dumpjson_default(obj):
1081 if isinstance(obj, (set, LazyList)):
1082 return list(obj)
1083 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1084
752cda38 1085 def create_key(outer_mobj):
1086 if not outer_mobj.group('has_key'):
b836dc94 1087 return outer_mobj.group(0)
752cda38 1088 key = outer_mobj.group('key')
752cda38 1089 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1090 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1091 value, default = None, na
1092 while mobj:
e625be0d 1093 mobj = mobj.groupdict()
7c37ff97 1094 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1095 value = get_value(mobj)
7c37ff97 1096 if value is None and mobj['alternate']:
1097 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1098 else:
1099 break
752cda38 1100
b868936c 1101 fmt = outer_mobj.group('format')
752cda38 1102 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1103 fmt = '0{:d}d'.format(field_size_compat_map[key])
1104
1105 value = default if value is None else value
752cda38 1106
7d1eb38a 1107 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1108 if fmt[-1] == 'l': # list
91dd88b9 1109 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1110 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1111 elif fmt[-1] == 'j': # json
6e84b215 1112 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1113 elif fmt[-1] == 'q': # quoted
7d1eb38a 1114 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1115 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1116 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1117 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1118 elif fmt[-1] == 'U': # unicode normalized
1119 opts = outer_mobj.group('conversion') or ''
1120 value, fmt = unicodedata.normalize(
1121 # "+" = compatibility equivalence, "#" = NFD
1122 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1123 value), str_fmt
7d1eb38a 1124 elif fmt[-1] == 'c':
524e2e4f 1125 if value:
1126 value = str(value)[0]
76a264ac 1127 else:
524e2e4f 1128 fmt = str_fmt
76a264ac 1129 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1130 value = float_or_none(value)
752cda38 1131 if value is None:
1132 value, fmt = default, 's'
901130bb 1133
752cda38 1134 if sanitize:
1135 if fmt[-1] == 'r':
1136 # If value is an object, sanitize might convert it to a string
1137 # So we convert it to repr first
7d1eb38a 1138 value, fmt = repr(value), str_fmt
639f1cea 1139 if fmt[-1] in 'csr':
7c37ff97 1140 value = sanitize(initial_field, value)
901130bb 1141
b868936c 1142 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1143 TMPL_DICT[key] = value
b868936c 1144 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1145
385a27fa 1146 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1147
819e0531 1148 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1149 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1150 return self.escape_outtmpl(outtmpl) % info_dict
1151
de6000d9 1152 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1153 try:
586a91b6 1154 sanitize = lambda k, v: sanitize_filename(
45598aab 1155 compat_str(v),
1bb5c511 1156 restricted=self.params.get('restrictfilenames'),
40df485f 1157 is_id=(k == 'id' or k.endswith('_id')))
b836dc94 1158 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1159 filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
15da37c7 1160
143db31d 1161 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1162 if filename and force_ext is not None:
752cda38 1163 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1164
bdc3fd2f
U
1165 # https://github.com/blackjack4494/youtube-dlc/issues/85
1166 trim_file_name = self.params.get('trim_file_name', False)
1167 if trim_file_name:
1168 fn_groups = filename.rsplit('.')
1169 ext = fn_groups[-1]
1170 sub_ext = ''
1171 if len(fn_groups) > 2:
1172 sub_ext = fn_groups[-2]
1173 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1174
0202b52a 1175 return filename
8222d8de 1176 except ValueError as err:
6febd1c1 1177 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1178 return None
1179
de6000d9 1180 def prepare_filename(self, info_dict, dir_type='', warn=False):
1181 """Generate the output filename."""
21cd8fae 1182
de6000d9 1183 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1184 if not filename and dir_type not in ('', 'temp'):
1185 return ''
de6000d9 1186
c84aeac6 1187 if warn:
21cd8fae 1188 if not self.params.get('paths'):
de6000d9 1189 pass
1190 elif filename == '-':
c84aeac6 1191 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1192 elif os.path.isabs(filename):
c84aeac6 1193 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1194 if filename == '-' or not filename:
1195 return filename
1196
21cd8fae 1197 return self.get_output_path(dir_type, filename)
0202b52a 1198
120fe513 1199 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1200 """ Returns None if the file should be downloaded """
8222d8de 1201
c77495e3 1202 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1203
8b0d7497 1204 def check_filter():
8b0d7497 1205 if 'title' in info_dict:
1206 # This can happen when we're just evaluating the playlist
1207 title = info_dict['title']
1208 matchtitle = self.params.get('matchtitle', False)
1209 if matchtitle:
1210 if not re.search(matchtitle, title, re.IGNORECASE):
1211 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1212 rejecttitle = self.params.get('rejecttitle', False)
1213 if rejecttitle:
1214 if re.search(rejecttitle, title, re.IGNORECASE):
1215 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1216 date = info_dict.get('upload_date')
1217 if date is not None:
1218 dateRange = self.params.get('daterange', DateRange())
1219 if date not in dateRange:
1220 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1221 view_count = info_dict.get('view_count')
1222 if view_count is not None:
1223 min_views = self.params.get('min_views')
1224 if min_views is not None and view_count < min_views:
1225 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1226 max_views = self.params.get('max_views')
1227 if max_views is not None and view_count > max_views:
1228 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1229 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1230 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1231
8f18aca8 1232 match_filter = self.params.get('match_filter')
1233 if match_filter is not None:
1234 try:
1235 ret = match_filter(info_dict, incomplete=incomplete)
1236 except TypeError:
1237 # For backward compatibility
1238 ret = None if incomplete else match_filter(info_dict)
1239 if ret is not None:
1240 return ret
8b0d7497 1241 return None
1242
c77495e3 1243 if self.in_download_archive(info_dict):
1244 reason = '%s has already been recorded in the archive' % video_title
1245 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1246 else:
1247 reason = check_filter()
1248 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1249 if reason is not None:
120fe513 1250 if not silent:
1251 self.to_screen('[download] ' + reason)
c77495e3 1252 if self.params.get(break_opt, False):
1253 raise break_err()
8b0d7497 1254 return reason
fe7e0c98 1255
b6c45014
JMF
1256 @staticmethod
1257 def add_extra_info(info_dict, extra_info):
1258 '''Set the keys from extra_info in info dict if they are missing'''
1259 for key, value in extra_info.items():
1260 info_dict.setdefault(key, value)
1261
409e1828 1262 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1263 process=True, force_generic_extractor=False):
41d1cca3 1264 """
1265 Return a list with a dictionary for each video extracted.
1266
1267 Arguments:
1268 url -- URL to extract
1269
1270 Keyword arguments:
1271 download -- whether to download videos during extraction
1272 ie_key -- extractor key hint
1273 extra_info -- dictionary containing the extra values to add to each result
1274 process -- whether to resolve all unresolved references (URLs, playlist items),
1275 must be True for download to work.
1276 force_generic_extractor -- force using the generic extractor
1277 """
fe7e0c98 1278
409e1828 1279 if extra_info is None:
1280 extra_info = {}
1281
61aa5ba3 1282 if not ie_key and force_generic_extractor:
d22dec74
S
1283 ie_key = 'Generic'
1284
8222d8de 1285 if ie_key:
8b7491c8 1286 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1287 else:
1288 ies = self._ies
1289
8b7491c8 1290 for ie_key, ie in ies.items():
8222d8de
JMF
1291 if not ie.suitable(url):
1292 continue
1293
1294 if not ie.working():
6febd1c1
PH
1295 self.report_warning('The program functionality for this site has been marked as broken, '
1296 'and will probably not work.')
8222d8de 1297
1151c407 1298 temp_id = ie.get_temp_id(url)
a0566bbf 1299 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1300 self.to_screen("[%s] %s: has already been recorded in archive" % (
1301 ie_key, temp_id))
1302 break
8b7491c8 1303 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1304 else:
1305 self.report_error('no suitable InfoExtractor for URL %s' % url)
1306
8e5fecc8 1307 def __handle_extraction_exceptions(func):
b5ae35ee 1308 @functools.wraps(func)
a0566bbf 1309 def wrapper(self, *args, **kwargs):
1310 try:
1311 return func(self, *args, **kwargs)
773f291d
S
1312 except GeoRestrictedError as e:
1313 msg = e.msg
1314 if e.countries:
1315 msg += '\nThis video is available in %s.' % ', '.join(
1316 map(ISO3166Utils.short2full, e.countries))
1317 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1318 self.report_error(msg)
fb043a6e 1319 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1320 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1321 except ThrottledDownload:
1322 self.to_stderr('\r')
1323 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1324 return wrapper(self, *args, **kwargs)
8e5fecc8 1325 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1326 raise
8222d8de 1327 except Exception as e:
b1940459 1328 if self.params.get('ignoreerrors'):
9b9c5355 1329 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1330 else:
1331 raise
a0566bbf 1332 return wrapper
1333
1334 @__handle_extraction_exceptions
58f197b7 1335 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1336 ie_result = ie.extract(url)
1337 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1338 return
1339 if isinstance(ie_result, list):
1340 # Backwards compatibility: old IE result format
1341 ie_result = {
1342 '_type': 'compat_list',
1343 'entries': ie_result,
1344 }
e37d0efb 1345 if extra_info.get('original_url'):
1346 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1347 self.add_default_extra_info(ie_result, ie, url)
1348 if process:
1349 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1350 else:
a0566bbf 1351 return ie_result
fe7e0c98 1352
ea38e55f 1353 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1354 if url is not None:
1355 self.add_extra_info(ie_result, {
1356 'webpage_url': url,
1357 'original_url': url,
1358 'webpage_url_basename': url_basename(url),
1359 })
1360 if ie is not None:
1361 self.add_extra_info(ie_result, {
1362 'extractor': ie.IE_NAME,
1363 'extractor_key': ie.ie_key(),
1364 })
ea38e55f 1365
58adec46 1366 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1367 """
1368 Take the result of the ie(may be modified) and resolve all unresolved
1369 references (URLs, playlist items).
1370
1371 It will also download the videos if 'download'.
1372 Returns the resolved ie_result.
1373 """
58adec46 1374 if extra_info is None:
1375 extra_info = {}
e8ee972c
PH
1376 result_type = ie_result.get('_type', 'video')
1377
057a5206 1378 if result_type in ('url', 'url_transparent'):
134c6ea8 1379 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1380 if ie_result.get('original_url'):
1381 extra_info.setdefault('original_url', ie_result['original_url'])
1382
057a5206 1383 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1384 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1385 or extract_flat is True):
ecb54191 1386 info_copy = ie_result.copy()
6033d980 1387 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1388 if ie and not ie_result.get('id'):
4614bc22 1389 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1390 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1391 self.add_extra_info(info_copy, extra_info)
ecb54191 1392 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1393 if self.params.get('force_write_download_archive', False):
1394 self.record_download_archive(info_copy)
e8ee972c
PH
1395 return ie_result
1396
8222d8de 1397 if result_type == 'video':
b6c45014 1398 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1399 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1400 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1401 if additional_urls:
e9f4ccd1 1402 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1403 if isinstance(additional_urls, compat_str):
1404 additional_urls = [additional_urls]
1405 self.to_screen(
1406 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1407 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1408 ie_result['additional_entries'] = [
1409 self.extract_info(
1410 url, download, extra_info,
1411 force_generic_extractor=self.params.get('force_generic_extractor'))
1412 for url in additional_urls
1413 ]
1414 return ie_result
8222d8de
JMF
1415 elif result_type == 'url':
1416 # We have to add extra_info to the results because it may be
1417 # contained in a playlist
07cce701 1418 return self.extract_info(
1419 ie_result['url'], download,
1420 ie_key=ie_result.get('ie_key'),
1421 extra_info=extra_info)
7fc3fa05
PH
1422 elif result_type == 'url_transparent':
1423 # Use the information from the embedding page
1424 info = self.extract_info(
1425 ie_result['url'], ie_key=ie_result.get('ie_key'),
1426 extra_info=extra_info, download=False, process=False)
1427
1640eb09
S
1428 # extract_info may return None when ignoreerrors is enabled and
1429 # extraction failed with an error, don't crash and return early
1430 # in this case
1431 if not info:
1432 return info
1433
412c617d
PH
1434 force_properties = dict(
1435 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1436 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1437 if f in force_properties:
1438 del force_properties[f]
1439 new_result = info.copy()
1440 new_result.update(force_properties)
7fc3fa05 1441
0563f7ac
S
1442 # Extracted info may not be a video result (i.e.
1443 # info.get('_type', 'video') != video) but rather an url or
1444 # url_transparent. In such cases outer metadata (from ie_result)
1445 # should be propagated to inner one (info). For this to happen
1446 # _type of info should be overridden with url_transparent. This
067aa17e 1447 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1448 if new_result.get('_type') == 'url':
1449 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1450
1451 return self.process_ie_result(
1452 new_result, download=download, extra_info=extra_info)
40fcba5e 1453 elif result_type in ('playlist', 'multi_video'):
30a074c2 1454 # Protect from infinite recursion due to recursively nested playlists
1455 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1456 webpage_url = ie_result['webpage_url']
1457 if webpage_url in self._playlist_urls:
7e85e872 1458 self.to_screen(
30a074c2 1459 '[download] Skipping already downloaded playlist: %s'
1460 % ie_result.get('title') or ie_result.get('id'))
1461 return
7e85e872 1462
30a074c2 1463 self._playlist_level += 1
1464 self._playlist_urls.add(webpage_url)
bc516a3f 1465 self._sanitize_thumbnails(ie_result)
30a074c2 1466 try:
1467 return self.__process_playlist(ie_result, download)
1468 finally:
1469 self._playlist_level -= 1
1470 if not self._playlist_level:
1471 self._playlist_urls.clear()
8222d8de 1472 elif result_type == 'compat_list':
c9bf4114
PH
1473 self.report_warning(
1474 'Extractor %s returned a compat_list result. '
1475 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1476
8222d8de 1477 def _fixup(r):
b868936c 1478 self.add_extra_info(r, {
1479 'extractor': ie_result['extractor'],
1480 'webpage_url': ie_result['webpage_url'],
1481 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1482 'extractor_key': ie_result['extractor_key'],
1483 })
8222d8de
JMF
1484 return r
1485 ie_result['entries'] = [
b6c45014 1486 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1487 for r in ie_result['entries']
1488 ]
1489 return ie_result
1490 else:
1491 raise Exception('Invalid result type: %s' % result_type)
1492
e92caff5 1493 def _ensure_dir_exists(self, path):
1494 return make_dir(path, self.report_error)
1495
30a074c2 1496 def __process_playlist(self, ie_result, download):
1497 # We process each entry in the playlist
1498 playlist = ie_result.get('title') or ie_result.get('id')
1499 self.to_screen('[download] Downloading playlist: %s' % playlist)
1500
498f5606 1501 if 'entries' not in ie_result:
1502 raise EntryNotInPlaylist()
1503 incomplete_entries = bool(ie_result.get('requested_entries'))
1504 if incomplete_entries:
1505 def fill_missing_entries(entries, indexes):
1506 ret = [None] * max(*indexes)
1507 for i, entry in zip(indexes, entries):
1508 ret[i - 1] = entry
1509 return ret
1510 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1511
30a074c2 1512 playlist_results = []
1513
56a8fb4f 1514 playliststart = self.params.get('playliststart', 1)
30a074c2 1515 playlistend = self.params.get('playlistend')
1516 # For backwards compatibility, interpret -1 as whole list
1517 if playlistend == -1:
1518 playlistend = None
1519
1520 playlistitems_str = self.params.get('playlist_items')
1521 playlistitems = None
1522 if playlistitems_str is not None:
1523 def iter_playlistitems(format):
1524 for string_segment in format.split(','):
1525 if '-' in string_segment:
1526 start, end = string_segment.split('-')
1527 for item in range(int(start), int(end) + 1):
1528 yield int(item)
1529 else:
1530 yield int(string_segment)
1531 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1532
1533 ie_entries = ie_result['entries']
56a8fb4f 1534 msg = (
1535 'Downloading %d videos' if not isinstance(ie_entries, list)
1536 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1537
1538 if isinstance(ie_entries, list):
1539 def get_entry(i):
1540 return ie_entries[i - 1]
1541 else:
1542 if not isinstance(ie_entries, PagedList):
1543 ie_entries = LazyList(ie_entries)
1544
1545 def get_entry(i):
1546 return YoutubeDL.__handle_extraction_exceptions(
1547 lambda self, i: ie_entries[i - 1]
1548 )(self, i)
50fed816 1549
56a8fb4f 1550 entries = []
ff1c7fc9 1551 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1552 for i in items:
1553 if i == 0:
1554 continue
56a8fb4f 1555 if playlistitems is None and playlistend is not None and playlistend < i:
1556 break
1557 entry = None
1558 try:
50fed816 1559 entry = get_entry(i)
56a8fb4f 1560 if entry is None:
498f5606 1561 raise EntryNotInPlaylist()
56a8fb4f 1562 except (IndexError, EntryNotInPlaylist):
1563 if incomplete_entries:
1564 raise EntryNotInPlaylist()
1565 elif not playlistitems:
1566 break
1567 entries.append(entry)
120fe513 1568 try:
1569 if entry is not None:
1570 self._match_entry(entry, incomplete=True, silent=True)
1571 except (ExistingVideoReached, RejectedVideoReached):
1572 break
56a8fb4f 1573 ie_result['entries'] = entries
30a074c2 1574
56a8fb4f 1575 # Save playlist_index before re-ordering
1576 entries = [
9e598870 1577 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1578 for i, entry in enumerate(entries, 1)
1579 if entry is not None]
1580 n_entries = len(entries)
498f5606 1581
498f5606 1582 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1583 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1584 ie_result['requested_entries'] = playlistitems
1585
49a57e70 1586 if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
498f5606 1587 ie_copy = {
1588 'playlist': playlist,
1589 'playlist_id': ie_result.get('id'),
1590 'playlist_title': ie_result.get('title'),
1591 'playlist_uploader': ie_result.get('uploader'),
1592 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1593 'playlist_index': 0,
49a57e70 1594 'n_entries': n_entries,
498f5606 1595 }
1596 ie_copy.update(dict(ie_result))
1597
80c03fa9 1598 if self._write_info_json('playlist', ie_result,
1599 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1600 return
1601 if self._write_description('playlist', ie_result,
1602 self.prepare_filename(ie_copy, 'pl_description')) is None:
1603 return
681de68e 1604 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1605 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1606
1607 if self.params.get('playlistreverse', False):
1608 entries = entries[::-1]
30a074c2 1609 if self.params.get('playlistrandom', False):
1610 random.shuffle(entries)
1611
1612 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1613
56a8fb4f 1614 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1615 failures = 0
1616 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1617 for i, entry_tuple in enumerate(entries, 1):
1618 playlist_index, entry = entry_tuple
81139999 1619 if 'playlist-index' in self.params.get('compat_opts', []):
1620 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1621 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1622 # This __x_forwarded_for_ip thing is a bit ugly but requires
1623 # minimal changes
1624 if x_forwarded_for:
1625 entry['__x_forwarded_for_ip'] = x_forwarded_for
1626 extra = {
1627 'n_entries': n_entries,
f59ae581 1628 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1629 'playlist_index': playlist_index,
1630 'playlist_autonumber': i,
30a074c2 1631 'playlist': playlist,
1632 'playlist_id': ie_result.get('id'),
1633 'playlist_title': ie_result.get('title'),
1634 'playlist_uploader': ie_result.get('uploader'),
1635 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1636 'extractor': ie_result['extractor'],
1637 'webpage_url': ie_result['webpage_url'],
1638 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1639 'extractor_key': ie_result['extractor_key'],
1640 }
1641
1642 if self._match_entry(entry, incomplete=True) is not None:
1643 continue
1644
1645 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1646 if not entry_result:
1647 failures += 1
1648 if failures >= max_failures:
1649 self.report_error(
1650 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1651 break
30a074c2 1652 # TODO: skip failed (empty) entries?
1653 playlist_results.append(entry_result)
1654 ie_result['entries'] = playlist_results
1655 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1656 return ie_result
1657
a0566bbf 1658 @__handle_extraction_exceptions
1659 def __process_iterable_entry(self, entry, download, extra_info):
1660 return self.process_ie_result(
1661 entry, download=download, extra_info=extra_info)
1662
67134eab
JMF
1663 def _build_format_filter(self, filter_spec):
1664 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1665
1666 OPERATORS = {
1667 '<': operator.lt,
1668 '<=': operator.le,
1669 '>': operator.gt,
1670 '>=': operator.ge,
1671 '=': operator.eq,
1672 '!=': operator.ne,
1673 }
67134eab 1674 operator_rex = re.compile(r'''(?x)\s*
187986a8 1675 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1676 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1677 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1678 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1679 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1680 if m:
1681 try:
1682 comparison_value = int(m.group('value'))
1683 except ValueError:
1684 comparison_value = parse_filesize(m.group('value'))
1685 if comparison_value is None:
1686 comparison_value = parse_filesize(m.group('value') + 'B')
1687 if comparison_value is None:
1688 raise ValueError(
1689 'Invalid value %r in format specification %r' % (
67134eab 1690 m.group('value'), filter_spec))
9ddb6925
S
1691 op = OPERATORS[m.group('op')]
1692
083c9df9 1693 if not m:
9ddb6925
S
1694 STR_OPERATORS = {
1695 '=': operator.eq,
10d33b34
YCH
1696 '^=': lambda attr, value: attr.startswith(value),
1697 '$=': lambda attr, value: attr.endswith(value),
1698 '*=': lambda attr, value: value in attr,
9ddb6925 1699 }
187986a8 1700 str_operator_rex = re.compile(r'''(?x)\s*
1701 (?P<key>[a-zA-Z0-9._-]+)\s*
1702 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1703 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1704 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1705 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1706 if m:
1707 comparison_value = m.group('value')
2cc779f4
S
1708 str_op = STR_OPERATORS[m.group('op')]
1709 if m.group('negation'):
e118a879 1710 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1711 else:
1712 op = str_op
083c9df9 1713
9ddb6925 1714 if not m:
187986a8 1715 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1716
1717 def _filter(f):
1718 actual_value = f.get(m.group('key'))
1719 if actual_value is None:
1720 return m.group('none_inclusive')
1721 return op(actual_value, comparison_value)
67134eab
JMF
1722 return _filter
1723
9f1a1c36 1724 def _check_formats(self, formats):
1725 for f in formats:
1726 self.to_screen('[info] Testing format %s' % f['format_id'])
1727 temp_file = tempfile.NamedTemporaryFile(
1728 suffix='.tmp', delete=False,
1729 dir=self.get_output_path('temp') or None)
1730 temp_file.close()
1731 try:
1732 success, _ = self.dl(temp_file.name, f, test=True)
1733 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1734 success = False
1735 finally:
1736 if os.path.exists(temp_file.name):
1737 try:
1738 os.remove(temp_file.name)
1739 except OSError:
1740 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1741 if success:
1742 yield f
1743 else:
1744 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1745
0017d9ad 1746 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1747
af0f7428
S
1748 def can_merge():
1749 merger = FFmpegMergerPP(self)
1750 return merger.available and merger.can_merge()
1751
91ebc640 1752 prefer_best = (
b7b04c78 1753 not self.params.get('simulate')
91ebc640 1754 and download
1755 and (
1756 not can_merge()
19807826 1757 or info_dict.get('is_live', False)
de6000d9 1758 or self.outtmpl_dict['default'] == '-'))
53ed7066 1759 compat = (
1760 prefer_best
1761 or self.params.get('allow_multiple_audio_streams', False)
1762 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1763
1764 return (
53ed7066 1765 'best/bestvideo+bestaudio' if prefer_best
1766 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1767 else 'bestvideo+bestaudio/best')
0017d9ad 1768
67134eab
JMF
1769 def build_format_selector(self, format_spec):
1770 def syntax_error(note, start):
1771 message = (
1772 'Invalid format specification: '
1773 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1774 return SyntaxError(message)
1775
1776 PICKFIRST = 'PICKFIRST'
1777 MERGE = 'MERGE'
1778 SINGLE = 'SINGLE'
0130afb7 1779 GROUP = 'GROUP'
67134eab
JMF
1780 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1781
91ebc640 1782 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1783 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1784
9f1a1c36 1785 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1786
67134eab
JMF
1787 def _parse_filter(tokens):
1788 filter_parts = []
1789 for type, string, start, _, _ in tokens:
1790 if type == tokenize.OP and string == ']':
1791 return ''.join(filter_parts)
1792 else:
1793 filter_parts.append(string)
1794
232541df 1795 def _remove_unused_ops(tokens):
17cc1534 1796 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1797 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1798 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1799 last_string, last_start, last_end, last_line = None, None, None, None
1800 for type, string, start, end, line in tokens:
1801 if type == tokenize.OP and string == '[':
1802 if last_string:
1803 yield tokenize.NAME, last_string, last_start, last_end, last_line
1804 last_string = None
1805 yield type, string, start, end, line
1806 # everything inside brackets will be handled by _parse_filter
1807 for type, string, start, end, line in tokens:
1808 yield type, string, start, end, line
1809 if type == tokenize.OP and string == ']':
1810 break
1811 elif type == tokenize.OP and string in ALLOWED_OPS:
1812 if last_string:
1813 yield tokenize.NAME, last_string, last_start, last_end, last_line
1814 last_string = None
1815 yield type, string, start, end, line
1816 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1817 if not last_string:
1818 last_string = string
1819 last_start = start
1820 last_end = end
1821 else:
1822 last_string += string
1823 if last_string:
1824 yield tokenize.NAME, last_string, last_start, last_end, last_line
1825
cf2ac6df 1826 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1827 selectors = []
1828 current_selector = None
1829 for type, string, start, _, _ in tokens:
1830 # ENCODING is only defined in python 3.x
1831 if type == getattr(tokenize, 'ENCODING', None):
1832 continue
1833 elif type in [tokenize.NAME, tokenize.NUMBER]:
1834 current_selector = FormatSelector(SINGLE, string, [])
1835 elif type == tokenize.OP:
cf2ac6df
JMF
1836 if string == ')':
1837 if not inside_group:
1838 # ')' will be handled by the parentheses group
1839 tokens.restore_last_token()
67134eab 1840 break
cf2ac6df 1841 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1842 tokens.restore_last_token()
1843 break
cf2ac6df
JMF
1844 elif inside_choice and string == ',':
1845 tokens.restore_last_token()
1846 break
1847 elif string == ',':
0a31a350
JMF
1848 if not current_selector:
1849 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1850 selectors.append(current_selector)
1851 current_selector = None
1852 elif string == '/':
d96d604e
JMF
1853 if not current_selector:
1854 raise syntax_error('"/" must follow a format selector', start)
67134eab 1855 first_choice = current_selector
cf2ac6df 1856 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1857 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1858 elif string == '[':
1859 if not current_selector:
1860 current_selector = FormatSelector(SINGLE, 'best', [])
1861 format_filter = _parse_filter(tokens)
1862 current_selector.filters.append(format_filter)
0130afb7
JMF
1863 elif string == '(':
1864 if current_selector:
1865 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1866 group = _parse_format_selection(tokens, inside_group=True)
1867 current_selector = FormatSelector(GROUP, group, [])
67134eab 1868 elif string == '+':
d03cfdce 1869 if not current_selector:
1870 raise syntax_error('Unexpected "+"', start)
1871 selector_1 = current_selector
1872 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1873 if not selector_2:
1874 raise syntax_error('Expected a selector', start)
1875 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1876 else:
1877 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1878 elif type == tokenize.ENDMARKER:
1879 break
1880 if current_selector:
1881 selectors.append(current_selector)
1882 return selectors
1883
f8d4ad9a 1884 def _merge(formats_pair):
1885 format_1, format_2 = formats_pair
1886
1887 formats_info = []
1888 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1889 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1890
1891 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1892 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1893 for (i, fmt_info) in enumerate(formats_info):
551f9388 1894 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1895 formats_info.pop(i)
1896 continue
1897 for aud_vid in ['audio', 'video']:
f8d4ad9a 1898 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1899 if get_no_more[aud_vid]:
1900 formats_info.pop(i)
f5510afe 1901 break
f8d4ad9a 1902 get_no_more[aud_vid] = True
1903
1904 if len(formats_info) == 1:
1905 return formats_info[0]
1906
1907 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1908 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1909
1910 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1911 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1912
1913 output_ext = self.params.get('merge_output_format')
1914 if not output_ext:
1915 if the_only_video:
1916 output_ext = the_only_video['ext']
1917 elif the_only_audio and not video_fmts:
1918 output_ext = the_only_audio['ext']
1919 else:
1920 output_ext = 'mkv'
1921
975a0d0d 1922 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1923
f8d4ad9a 1924 new_dict = {
1925 'requested_formats': formats_info,
975a0d0d 1926 'format': '+'.join(filtered('format')),
1927 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 1928 'ext': output_ext,
975a0d0d 1929 'protocol': '+'.join(map(determine_protocol, formats_info)),
1930 'language': '+'.join(orderedSet(filtered('language'))),
1931 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1932 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1933 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 1934 }
1935
1936 if the_only_video:
1937 new_dict.update({
1938 'width': the_only_video.get('width'),
1939 'height': the_only_video.get('height'),
1940 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1941 'fps': the_only_video.get('fps'),
49a57e70 1942 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 1943 'vcodec': the_only_video.get('vcodec'),
1944 'vbr': the_only_video.get('vbr'),
1945 'stretched_ratio': the_only_video.get('stretched_ratio'),
1946 })
1947
1948 if the_only_audio:
1949 new_dict.update({
1950 'acodec': the_only_audio.get('acodec'),
1951 'abr': the_only_audio.get('abr'),
975a0d0d 1952 'asr': the_only_audio.get('asr'),
f8d4ad9a 1953 })
1954
1955 return new_dict
1956
e8e73840 1957 def _check_formats(formats):
981052c9 1958 if not check_formats:
1959 yield from formats
b5ac45b1 1960 return
9f1a1c36 1961 yield from self._check_formats(formats)
e8e73840 1962
67134eab 1963 def _build_selector_function(selector):
909d24dd 1964 if isinstance(selector, list): # ,
67134eab
JMF
1965 fs = [_build_selector_function(s) for s in selector]
1966
317f7ab6 1967 def selector_function(ctx):
67134eab 1968 for f in fs:
981052c9 1969 yield from f(ctx)
67134eab 1970 return selector_function
909d24dd 1971
1972 elif selector.type == GROUP: # ()
0130afb7 1973 selector_function = _build_selector_function(selector.selector)
909d24dd 1974
1975 elif selector.type == PICKFIRST: # /
67134eab
JMF
1976 fs = [_build_selector_function(s) for s in selector.selector]
1977
317f7ab6 1978 def selector_function(ctx):
67134eab 1979 for f in fs:
317f7ab6 1980 picked_formats = list(f(ctx))
67134eab
JMF
1981 if picked_formats:
1982 return picked_formats
1983 return []
67134eab 1984
981052c9 1985 elif selector.type == MERGE: # +
1986 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1987
1988 def selector_function(ctx):
1989 for pair in itertools.product(
1990 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1991 yield _merge(pair)
1992
909d24dd 1993 elif selector.type == SINGLE: # atom
598d185d 1994 format_spec = selector.selector or 'best'
909d24dd 1995
f8d4ad9a 1996 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1997 if format_spec == 'all':
1998 def selector_function(ctx):
981052c9 1999 yield from _check_formats(ctx['formats'])
f8d4ad9a 2000 elif format_spec == 'mergeall':
2001 def selector_function(ctx):
981052c9 2002 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2003 if not formats:
2004 return
921b76ca 2005 merged_format = formats[-1]
2006 for f in formats[-2::-1]:
f8d4ad9a 2007 merged_format = _merge((merged_format, f))
2008 yield merged_format
909d24dd 2009
2010 else:
e8e73840 2011 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2012 mobj = re.match(
2013 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2014 format_spec)
2015 if mobj is not None:
2016 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2017 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2018 format_type = (mobj.group('type') or [None])[0]
2019 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2020 format_modified = mobj.group('mod') is not None
909d24dd 2021
2022 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2023 _filter_f = (
eff63539 2024 (lambda f: f.get('%scodec' % format_type) != 'none')
2025 if format_type and format_modified # bv*, ba*, wv*, wa*
2026 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2027 if format_type # bv, ba, wv, wa
2028 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2029 if not format_modified # b, w
8326b00a 2030 else lambda f: True) # b*, w*
2031 filter_f = lambda f: _filter_f(f) and (
2032 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2033 else:
48ee10ee 2034 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2035 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2036 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2037 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2038 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2039 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2040 else:
b5ae35ee 2041 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2042
2043 def selector_function(ctx):
2044 formats = list(ctx['formats'])
909d24dd 2045 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2046 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2047 # for extractors with incomplete formats (audio only (soundcloud)
2048 # or video only (imgur)) best/worst will fallback to
2049 # best/worst {video,audio}-only format
e8e73840 2050 matches = formats
981052c9 2051 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2052 try:
e8e73840 2053 yield matches[format_idx - 1]
981052c9 2054 except IndexError:
2055 return
083c9df9 2056
67134eab 2057 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2058
317f7ab6
S
2059 def final_selector(ctx):
2060 ctx_copy = copy.deepcopy(ctx)
67134eab 2061 for _filter in filters:
317f7ab6
S
2062 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2063 return selector_function(ctx_copy)
67134eab 2064 return final_selector
083c9df9 2065
67134eab 2066 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2067 try:
232541df 2068 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2069 except tokenize.TokenError:
2070 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2071
2072 class TokenIterator(object):
2073 def __init__(self, tokens):
2074 self.tokens = tokens
2075 self.counter = 0
2076
2077 def __iter__(self):
2078 return self
2079
2080 def __next__(self):
2081 if self.counter >= len(self.tokens):
2082 raise StopIteration()
2083 value = self.tokens[self.counter]
2084 self.counter += 1
2085 return value
2086
2087 next = __next__
2088
2089 def restore_last_token(self):
2090 self.counter -= 1
2091
2092 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2093 return _build_selector_function(parsed_selector)
a9c58ad9 2094
e5660ee6
JMF
2095 def _calc_headers(self, info_dict):
2096 res = std_headers.copy()
2097
2098 add_headers = info_dict.get('http_headers')
2099 if add_headers:
2100 res.update(add_headers)
2101
2102 cookies = self._calc_cookies(info_dict)
2103 if cookies:
2104 res['Cookie'] = cookies
2105
0016b84e
S
2106 if 'X-Forwarded-For' not in res:
2107 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2108 if x_forwarded_for_ip:
2109 res['X-Forwarded-For'] = x_forwarded_for_ip
2110
e5660ee6
JMF
2111 return res
2112
2113 def _calc_cookies(self, info_dict):
5c2266df 2114 pr = sanitized_Request(info_dict['url'])
e5660ee6 2115 self.cookiejar.add_cookie_header(pr)
662435f7 2116 return pr.get_header('Cookie')
e5660ee6 2117
9f1a1c36 2118 def _sort_thumbnails(self, thumbnails):
2119 thumbnails.sort(key=lambda t: (
2120 t.get('preference') if t.get('preference') is not None else -1,
2121 t.get('width') if t.get('width') is not None else -1,
2122 t.get('height') if t.get('height') is not None else -1,
2123 t.get('id') if t.get('id') is not None else '',
2124 t.get('url')))
2125
b0249bca 2126 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2127 thumbnails = info_dict.get('thumbnails')
2128 if thumbnails is None:
2129 thumbnail = info_dict.get('thumbnail')
2130 if thumbnail:
2131 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2132 if not thumbnails:
2133 return
2134
2135 def check_thumbnails(thumbnails):
2136 for t in thumbnails:
2137 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2138 try:
2139 self.urlopen(HEADRequest(t['url']))
2140 except network_exceptions as err:
2141 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2142 continue
2143 yield t
2144
2145 self._sort_thumbnails(thumbnails)
2146 for i, t in enumerate(thumbnails):
2147 if t.get('id') is None:
2148 t['id'] = '%d' % i
2149 if t.get('width') and t.get('height'):
2150 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2151 t['url'] = sanitize_url(t['url'])
2152
2153 if self.params.get('check_formats') is True:
2154 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2155 else:
2156 info_dict['thumbnails'] = thumbnails
bc516a3f 2157
dd82ffea
JMF
2158 def process_video_result(self, info_dict, download=True):
2159 assert info_dict.get('_type', 'video') == 'video'
2160
bec1fad2
PH
2161 if 'id' not in info_dict:
2162 raise ExtractorError('Missing "id" field in extractor result')
2163 if 'title' not in info_dict:
1151c407 2164 raise ExtractorError('Missing "title" field in extractor result',
2165 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2166
c9969434
S
2167 def report_force_conversion(field, field_not, conversion):
2168 self.report_warning(
2169 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2170 % (field, field_not, conversion))
2171
2172 def sanitize_string_field(info, string_field):
2173 field = info.get(string_field)
2174 if field is None or isinstance(field, compat_str):
2175 return
2176 report_force_conversion(string_field, 'a string', 'string')
2177 info[string_field] = compat_str(field)
2178
2179 def sanitize_numeric_fields(info):
2180 for numeric_field in self._NUMERIC_FIELDS:
2181 field = info.get(numeric_field)
2182 if field is None or isinstance(field, compat_numeric_types):
2183 continue
2184 report_force_conversion(numeric_field, 'numeric', 'int')
2185 info[numeric_field] = int_or_none(field)
2186
2187 sanitize_string_field(info_dict, 'id')
2188 sanitize_numeric_fields(info_dict)
be6217b2 2189
dd82ffea
JMF
2190 if 'playlist' not in info_dict:
2191 # It isn't part of a playlist
2192 info_dict['playlist'] = None
2193 info_dict['playlist_index'] = None
2194
bc516a3f 2195 self._sanitize_thumbnails(info_dict)
d5519808 2196
536a55da 2197 thumbnail = info_dict.get('thumbnail')
bc516a3f 2198 thumbnails = info_dict.get('thumbnails')
536a55da
S
2199 if thumbnail:
2200 info_dict['thumbnail'] = sanitize_url(thumbnail)
2201 elif thumbnails:
d5519808
PH
2202 info_dict['thumbnail'] = thumbnails[-1]['url']
2203
ae30b840 2204 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2205 info_dict['display_id'] = info_dict['id']
2206
239df021 2207 if info_dict.get('duration') is not None:
2208 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2209
10db0d2f 2210 for ts_key, date_key in (
2211 ('timestamp', 'upload_date'),
2212 ('release_timestamp', 'release_date'),
2213 ):
2214 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2215 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2216 # see http://bugs.python.org/issue1646728)
2217 try:
2218 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2219 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2220 except (ValueError, OverflowError, OSError):
2221 pass
9d2ecdbc 2222
ae30b840 2223 live_keys = ('is_live', 'was_live')
2224 live_status = info_dict.get('live_status')
2225 if live_status is None:
2226 for key in live_keys:
2227 if info_dict.get(key) is False:
2228 continue
2229 if info_dict.get(key):
2230 live_status = key
2231 break
2232 if all(info_dict.get(key) is False for key in live_keys):
2233 live_status = 'not_live'
2234 if live_status:
2235 info_dict['live_status'] = live_status
2236 for key in live_keys:
2237 if info_dict.get(key) is None:
2238 info_dict[key] = (live_status == key)
2239
33d2fc2f
S
2240 # Auto generate title fields corresponding to the *_number fields when missing
2241 # in order to always have clean titles. This is very common for TV series.
2242 for field in ('chapter', 'season', 'episode'):
2243 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2244 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2245
05108a49
S
2246 for cc_kind in ('subtitles', 'automatic_captions'):
2247 cc = info_dict.get(cc_kind)
2248 if cc:
2249 for _, subtitle in cc.items():
2250 for subtitle_format in subtitle:
2251 if subtitle_format.get('url'):
2252 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2253 if subtitle_format.get('ext') is None:
2254 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2255
2256 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2257 subtitles = info_dict.get('subtitles')
4bba3716 2258
360e1ca5 2259 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2260 info_dict['id'], subtitles, automatic_captions)
a504ced0 2261
dd82ffea
JMF
2262 if info_dict.get('formats') is None:
2263 # There's only one format available
2264 formats = [info_dict]
2265 else:
2266 formats = info_dict['formats']
2267
e0493e90 2268 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2269 if not self.params.get('allow_unplayable_formats'):
2270 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2271
db95dc13 2272 if not formats:
1151c407 2273 self.raise_no_formats(info_dict)
db95dc13 2274
73af5cc8
S
2275 def is_wellformed(f):
2276 url = f.get('url')
a5ac0c47 2277 if not url:
73af5cc8
S
2278 self.report_warning(
2279 '"url" field is missing or empty - skipping format, '
2280 'there is an error in extractor')
a5ac0c47
S
2281 return False
2282 if isinstance(url, bytes):
2283 sanitize_string_field(f, 'url')
2284 return True
73af5cc8
S
2285
2286 # Filter out malformed formats for better extraction robustness
2287 formats = list(filter(is_wellformed, formats))
2288
181c7053
S
2289 formats_dict = {}
2290
dd82ffea 2291 # We check that all the formats have the format and format_id fields
db95dc13 2292 for i, format in enumerate(formats):
c9969434
S
2293 sanitize_string_field(format, 'format_id')
2294 sanitize_numeric_fields(format)
dcf77cf1 2295 format['url'] = sanitize_url(format['url'])
e74e3b63 2296 if not format.get('format_id'):
8016c922 2297 format['format_id'] = compat_str(i)
e2effb08
S
2298 else:
2299 # Sanitize format_id from characters used in format selector expression
ec85ded8 2300 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2301 format_id = format['format_id']
2302 if format_id not in formats_dict:
2303 formats_dict[format_id] = []
2304 formats_dict[format_id].append(format)
2305
2306 # Make sure all formats have unique format_id
03b4de72 2307 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2308 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2309 ambigious_id = len(ambiguous_formats) > 1
2310 for i, format in enumerate(ambiguous_formats):
2311 if ambigious_id:
181c7053 2312 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2313 if format.get('ext') is None:
2314 format['ext'] = determine_ext(format['url']).lower()
2315 # Ensure there is no conflict between id and ext in format selection
2316 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2317 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2318 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2319
2320 for i, format in enumerate(formats):
8c51aa65 2321 if format.get('format') is None:
6febd1c1 2322 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2323 id=format['format_id'],
2324 res=self.format_resolution(format),
b868936c 2325 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2326 )
6f0be937 2327 if format.get('protocol') is None:
b5559424 2328 format['protocol'] = determine_protocol(format)
239df021 2329 if format.get('resolution') is None:
2330 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2331 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2332 format['dynamic_range'] = 'SDR'
f2fe69c7 2333 if (info_dict.get('duration') and format.get('tbr')
2334 and not format.get('filesize') and not format.get('filesize_approx')):
2335 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2336
e5660ee6
JMF
2337 # Add HTTP headers, so that external programs can use them from the
2338 # json output
2339 full_format_info = info_dict.copy()
2340 full_format_info.update(format)
2341 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2342 # Remove private housekeeping stuff
2343 if '__x_forwarded_for_ip' in info_dict:
2344 del info_dict['__x_forwarded_for_ip']
dd82ffea 2345
4bcc7bd1 2346 # TODO Central sorting goes here
99e206d5 2347
9f1a1c36 2348 if self.params.get('check_formats') is True:
2349 formats = LazyList(self._check_formats(formats[::-1])).reverse()
2350
88acdbc2 2351 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2352 # only set the 'formats' fields if the original info_dict list them
2353 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2354 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2355 # which can't be exported to json
b3d9ef88 2356 info_dict['formats'] = formats
4ec82a72 2357
2358 info_dict, _ = self.pre_process(info_dict)
2359
b7b04c78 2360 if self.params.get('list_thumbnails'):
2361 self.list_thumbnails(info_dict)
2362 if self.params.get('listformats'):
86c66b2d 2363 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2364 self.to_screen('%s has no formats' % info_dict['id'])
2365 else:
2366 self.list_formats(info_dict)
b7b04c78 2367 if self.params.get('listsubtitles'):
2368 if 'automatic_captions' in info_dict:
2369 self.list_subtitles(
2370 info_dict['id'], automatic_captions, 'automatic captions')
2371 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2372 list_only = self.params.get('simulate') is None and (
2373 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2374 if list_only:
b7b04c78 2375 # Without this printing, -F --print-json will not work
169dbde9 2376 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2377 return
2378
187986a8 2379 format_selector = self.format_selector
2380 if format_selector is None:
0017d9ad 2381 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2382 self.write_debug('Default format spec: %s' % req_format)
187986a8 2383 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2384
2385 # While in format selection we may need to have an access to the original
2386 # format set in order to calculate some metrics or do some processing.
2387 # For now we need to be able to guess whether original formats provided
2388 # by extractor are incomplete or not (i.e. whether extractor provides only
2389 # video-only or audio-only formats) for proper formats selection for
2390 # extractors with such incomplete formats (see
067aa17e 2391 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2392 # Since formats may be filtered during format selection and may not match
2393 # the original formats the results may be incorrect. Thus original formats
2394 # or pre-calculated metrics should be passed to format selection routines
2395 # as well.
2396 # We will pass a context object containing all necessary additional data
2397 # instead of just formats.
2398 # This fixes incorrect format selection issue (see
067aa17e 2399 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2400 incomplete_formats = (
317f7ab6 2401 # All formats are video-only or
3089bc74 2402 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2403 # all formats are audio-only
3089bc74 2404 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2405
2406 ctx = {
2407 'formats': formats,
2408 'incomplete_formats': incomplete_formats,
2409 }
2410
2411 formats_to_download = list(format_selector(ctx))
dd82ffea 2412 if not formats_to_download:
b7da73eb 2413 if not self.params.get('ignore_no_formats_error'):
1151c407 2414 raise ExtractorError('Requested format is not available', expected=True,
2415 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2416 else:
2417 self.report_warning('Requested format is not available')
4513a41a
A
2418 # Process what we can, even without any available formats.
2419 self.process_info(dict(info_dict))
b7da73eb 2420 elif download:
2421 self.to_screen(
07cce701 2422 '[info] %s: Downloading %d format(s): %s' % (
2423 info_dict['id'], len(formats_to_download),
2424 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2425 for fmt in formats_to_download:
dd82ffea 2426 new_info = dict(info_dict)
4ec82a72 2427 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2428 new_info['__original_infodict'] = info_dict
b7da73eb 2429 new_info.update(fmt)
dd82ffea 2430 self.process_info(new_info)
49a57e70 2431 # We update the info dict with the selected best quality format (backwards compatibility)
b7da73eb 2432 if formats_to_download:
2433 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2434 return info_dict
2435
98c70d6f 2436 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2437 """Select the requested subtitles and their format"""
98c70d6f
JMF
2438 available_subs = {}
2439 if normal_subtitles and self.params.get('writesubtitles'):
2440 available_subs.update(normal_subtitles)
2441 if automatic_captions and self.params.get('writeautomaticsub'):
2442 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2443 if lang not in available_subs:
2444 available_subs[lang] = cap_info
2445
4d171848
JMF
2446 if (not self.params.get('writesubtitles') and not
2447 self.params.get('writeautomaticsub') or not
2448 available_subs):
2449 return None
a504ced0 2450
c32b0aab 2451 all_sub_langs = available_subs.keys()
a504ced0 2452 if self.params.get('allsubtitles', False):
c32b0aab 2453 requested_langs = all_sub_langs
2454 elif self.params.get('subtitleslangs', False):
77c4a9ef 2455 # A list is used so that the order of languages will be the same as
2456 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2457 requested_langs = []
2458 for lang_re in self.params.get('subtitleslangs'):
2459 if lang_re == 'all':
2460 requested_langs.extend(all_sub_langs)
c32b0aab 2461 continue
77c4a9ef 2462 discard = lang_re[0] == '-'
c32b0aab 2463 if discard:
77c4a9ef 2464 lang_re = lang_re[1:]
2465 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2466 if discard:
2467 for lang in current_langs:
77c4a9ef 2468 while lang in requested_langs:
2469 requested_langs.remove(lang)
c32b0aab 2470 else:
77c4a9ef 2471 requested_langs.extend(current_langs)
2472 requested_langs = orderedSet(requested_langs)
c32b0aab 2473 elif 'en' in available_subs:
2474 requested_langs = ['en']
a504ced0 2475 else:
c32b0aab 2476 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2477 if requested_langs:
2478 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2479
2480 formats_query = self.params.get('subtitlesformat', 'best')
2481 formats_preference = formats_query.split('/') if formats_query else []
2482 subs = {}
2483 for lang in requested_langs:
2484 formats = available_subs.get(lang)
2485 if formats is None:
2486 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2487 continue
a504ced0
JMF
2488 for ext in formats_preference:
2489 if ext == 'best':
2490 f = formats[-1]
2491 break
2492 matches = list(filter(lambda f: f['ext'] == ext, formats))
2493 if matches:
2494 f = matches[-1]
2495 break
2496 else:
2497 f = formats[-1]
2498 self.report_warning(
2499 'No subtitle format found matching "%s" for language %s, '
2500 'using %s' % (formats_query, lang, f['ext']))
2501 subs[lang] = f
2502 return subs
2503
d06daf23 2504 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2505 def print_mandatory(field, actual_field=None):
2506 if actual_field is None:
2507 actual_field = field
d06daf23 2508 if (self.params.get('force%s' % field, False)
53c18592 2509 and (not incomplete or info_dict.get(actual_field) is not None)):
2510 self.to_stdout(info_dict[actual_field])
d06daf23
S
2511
2512 def print_optional(field):
2513 if (self.params.get('force%s' % field, False)
2514 and info_dict.get(field) is not None):
2515 self.to_stdout(info_dict[field])
2516
53c18592 2517 info_dict = info_dict.copy()
2518 if filename is not None:
2519 info_dict['filename'] = filename
2520 if info_dict.get('requested_formats') is not None:
2521 # For RTMP URLs, also include the playpath
2522 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2523 elif 'url' in info_dict:
2524 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2525
2b8a2973 2526 if self.params.get('forceprint') or self.params.get('forcejson'):
2527 self.post_extract(info_dict)
53c18592 2528 for tmpl in self.params.get('forceprint', []):
b5ae35ee 2529 mobj = re.match(r'\w+(=?)$', tmpl)
2530 if mobj and mobj.group(1):
2531 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2532 elif mobj:
2533 tmpl = '%({})s'.format(tmpl)
2534 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
53c18592 2535
d06daf23
S
2536 print_mandatory('title')
2537 print_mandatory('id')
53c18592 2538 print_mandatory('url', 'urls')
d06daf23
S
2539 print_optional('thumbnail')
2540 print_optional('description')
53c18592 2541 print_optional('filename')
b868936c 2542 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2543 self.to_stdout(formatSeconds(info_dict['duration']))
2544 print_mandatory('format')
53c18592 2545
2b8a2973 2546 if self.params.get('forcejson'):
6e84b215 2547 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2548
e8e73840 2549 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2550 if not info.get('url'):
1151c407 2551 self.raise_no_formats(info, True)
e8e73840 2552
2553 if test:
2554 verbose = self.params.get('verbose')
2555 params = {
2556 'test': True,
a169858f 2557 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2558 'verbose': verbose,
2559 'noprogress': not verbose,
2560 'nopart': True,
2561 'skip_unavailable_fragments': False,
2562 'keep_fragments': False,
2563 'overwrites': True,
2564 '_no_ytdl_file': True,
2565 }
2566 else:
2567 params = self.params
96fccc10 2568 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2569 if not test:
2570 for ph in self._progress_hooks:
2571 fd.add_progress_hook(ph)
18e674b4 2572 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2573 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2574
2575 new_info = copy.deepcopy(self._copy_infodict(info))
e8e73840 2576 if new_info.get('http_headers') is None:
2577 new_info['http_headers'] = self._calc_headers(new_info)
2578 return fd.download(name, new_info, subtitle)
2579
8222d8de
JMF
2580 def process_info(self, info_dict):
2581 """Process a single resolved IE result."""
2582
2583 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2584
2585 max_downloads = self.params.get('max_downloads')
2586 if max_downloads is not None:
2587 if self._num_downloads >= int(max_downloads):
2588 raise MaxDownloadsReached()
8222d8de 2589
d06daf23 2590 # TODO: backward compatibility, to be removed
8222d8de 2591 info_dict['fulltitle'] = info_dict['title']
8222d8de 2592
4513a41a 2593 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2594 info_dict['format'] = info_dict['ext']
2595
c77495e3 2596 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2597 return
2598
277d6ff5 2599 self.post_extract(info_dict)
fd288278 2600 self._num_downloads += 1
8222d8de 2601
dcf64d43 2602 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2603 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2604 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2605 files_to_move = {}
8222d8de
JMF
2606
2607 # Forced printings
4513a41a 2608 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2609
b7b04c78 2610 if self.params.get('simulate'):
2d30509f 2611 if self.params.get('force_write_download_archive', False):
2612 self.record_download_archive(info_dict)
2d30509f 2613 # Do nothing else if in simulate mode
8222d8de
JMF
2614 return
2615
de6000d9 2616 if full_filename is None:
8222d8de 2617 return
e92caff5 2618 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2619 return
e92caff5 2620 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2621 return
2622
80c03fa9 2623 if self._write_description('video', info_dict,
2624 self.prepare_filename(info_dict, 'description')) is None:
2625 return
2626
2627 sub_files = self._write_subtitles(info_dict, temp_filename)
2628 if sub_files is None:
2629 return
2630 files_to_move.update(dict(sub_files))
2631
2632 thumb_files = self._write_thumbnails(
2633 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2634 if thumb_files is None:
2635 return
2636 files_to_move.update(dict(thumb_files))
8222d8de 2637
80c03fa9 2638 infofn = self.prepare_filename(info_dict, 'infojson')
2639 _infojson_written = self._write_info_json('video', info_dict, infofn)
2640 if _infojson_written:
2641 info_dict['__infojson_filename'] = infofn
2642 elif _infojson_written is None:
2643 return
2644
2645 # Note: Annotations are deprecated
2646 annofn = None
1fb07d10 2647 if self.params.get('writeannotations', False):
de6000d9 2648 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2649 if annofn:
e92caff5 2650 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2651 return
0c3d0f51 2652 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2653 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2654 elif not info_dict.get('annotations'):
2655 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2656 else:
2657 try:
6febd1c1 2658 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2659 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2660 annofile.write(info_dict['annotations'])
2661 except (KeyError, TypeError):
6febd1c1 2662 self.report_warning('There are no annotations to write.')
7b6fefc9 2663 except (OSError, IOError):
6febd1c1 2664 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2665 return
1fb07d10 2666
732044af 2667 # Write internet shortcut files
2668 url_link = webloc_link = desktop_link = False
2669 if self.params.get('writelink', False):
2670 if sys.platform == "darwin": # macOS.
2671 webloc_link = True
2672 elif sys.platform.startswith("linux"):
2673 desktop_link = True
2674 else: # if sys.platform in ['win32', 'cygwin']:
2675 url_link = True
2676 if self.params.get('writeurllink', False):
2677 url_link = True
2678 if self.params.get('writewebloclink', False):
2679 webloc_link = True
2680 if self.params.get('writedesktoplink', False):
2681 desktop_link = True
2682
2683 if url_link or webloc_link or desktop_link:
2684 if 'webpage_url' not in info_dict:
2685 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2686 return
2687 ascii_url = iri_to_uri(info_dict['webpage_url'])
2688
2689 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2690 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2691 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2692 self.to_screen('[info] Internet shortcut is already present')
2693 else:
2694 try:
2695 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2696 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2697 template_vars = {'url': ascii_url}
2698 if embed_filename:
2699 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2700 linkfile.write(template % template_vars)
2701 except (OSError, IOError):
2702 self.report_error('Cannot write internet shortcut ' + linkfn)
2703 return False
2704 return True
2705
2706 if url_link:
2707 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2708 return
2709 if webloc_link:
2710 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2711 return
2712 if desktop_link:
2713 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2714 return
2715
56d868db 2716 try:
2717 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2718 except PostProcessingError as err:
2719 self.report_error('Preprocessing: %s' % str(err))
2720 return
2721
732044af 2722 must_record_download_archive = False
56d868db 2723 if self.params.get('skip_download', False):
2724 info_dict['filepath'] = temp_filename
2725 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2726 info_dict['__files_to_move'] = files_to_move
2727 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2728 else:
2729 # Download
b868936c 2730 info_dict.setdefault('__postprocessors', [])
4340deca 2731 try:
0202b52a 2732
6b591b29 2733 def existing_file(*filepaths):
2734 ext = info_dict.get('ext')
2735 final_ext = self.params.get('final_ext', ext)
2736 existing_files = []
2737 for file in orderedSet(filepaths):
2738 if final_ext != ext:
2739 converted = replace_extension(file, final_ext, ext)
2740 if os.path.exists(encodeFilename(converted)):
2741 existing_files.append(converted)
2742 if os.path.exists(encodeFilename(file)):
2743 existing_files.append(file)
2744
2745 if not existing_files or self.params.get('overwrites', False):
2746 for file in orderedSet(existing_files):
2747 self.report_file_delete(file)
2748 os.remove(encodeFilename(file))
2749 return None
2750
6b591b29 2751 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2752 return existing_files[0]
0202b52a 2753
2754 success = True
4340deca 2755 if info_dict.get('requested_formats') is not None:
81cd954a
S
2756
2757 def compatible_formats(formats):
d03cfdce 2758 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2759 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2760 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2761 if len(video_formats) > 2 or len(audio_formats) > 2:
2762 return False
2763
81cd954a 2764 # Check extension
d03cfdce 2765 exts = set(format.get('ext') for format in formats)
2766 COMPATIBLE_EXTS = (
2767 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2768 set(('webm',)),
2769 )
2770 for ext_sets in COMPATIBLE_EXTS:
2771 if ext_sets.issuperset(exts):
2772 return True
81cd954a
S
2773 # TODO: Check acodec/vcodec
2774 return False
2775
2776 requested_formats = info_dict['requested_formats']
0202b52a 2777 old_ext = info_dict['ext']
4e3b637d 2778 if self.params.get('merge_output_format') is None:
2779 if not compatible_formats(requested_formats):
2780 info_dict['ext'] = 'mkv'
2781 self.report_warning(
2782 'Requested formats are incompatible for merge and will be merged into mkv')
2783 if (info_dict['ext'] == 'webm'
2784 and info_dict.get('thumbnails')
2785 # check with type instead of pp_key, __name__, or isinstance
2786 # since we dont want any custom PPs to trigger this
2787 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2788 info_dict['ext'] = 'mkv'
2789 self.report_warning(
2790 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 2791 new_ext = info_dict['ext']
0202b52a 2792
124bc071 2793 def correct_ext(filename, ext=new_ext):
96fccc10 2794 if filename == '-':
2795 return filename
0202b52a 2796 filename_real_ext = os.path.splitext(filename)[1][1:]
2797 filename_wo_ext = (
2798 os.path.splitext(filename)[0]
124bc071 2799 if filename_real_ext in (old_ext, new_ext)
0202b52a 2800 else filename)
124bc071 2801 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2802
38c6902b 2803 # Ensure filename always has a correct extension for successful merge
0202b52a 2804 full_filename = correct_ext(full_filename)
2805 temp_filename = correct_ext(temp_filename)
2806 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2807 info_dict['__real_download'] = False
18e674b4 2808
dbf5416a 2809 if dl_filename is not None:
6c7274ec 2810 self.report_file_already_downloaded(dl_filename)
c111cefa 2811 elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
dbf5416a 2812 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2813 success, real_download = self.dl(temp_filename, info_dict)
2814 info_dict['__real_download'] = real_download
18e674b4 2815 else:
2816 downloaded = []
2817 merger = FFmpegMergerPP(self)
2818 if self.params.get('allow_unplayable_formats'):
2819 self.report_warning(
2820 'You have requested merging of multiple formats '
2821 'while also allowing unplayable formats to be downloaded. '
2822 'The formats won\'t be merged to prevent data corruption.')
2823 elif not merger.available:
2824 self.report_warning(
2825 'You have requested merging of multiple formats but ffmpeg is not installed. '
2826 'The formats won\'t be merged.')
2827
96fccc10 2828 if temp_filename == '-':
c111cefa 2829 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
96fccc10 2830 else 'but the formats are incompatible for simultaneous download' if merger.available
2831 else 'but ffmpeg is not installed')
2832 self.report_warning(
2833 f'You have requested downloading multiple formats to stdout {reason}. '
2834 'The formats will be streamed one after the other')
2835 fname = temp_filename
dbf5416a 2836 for f in requested_formats:
2837 new_info = dict(info_dict)
2838 del new_info['requested_formats']
2839 new_info.update(f)
96fccc10 2840 if temp_filename != '-':
124bc071 2841 fname = prepend_extension(
2842 correct_ext(temp_filename, new_info['ext']),
2843 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2844 if not self._ensure_dir_exists(fname):
2845 return
a21e0ab1 2846 f['filepath'] = fname
96fccc10 2847 downloaded.append(fname)
dbf5416a 2848 partial_success, real_download = self.dl(fname, new_info)
2849 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2850 success = success and partial_success
2851 if merger.available and not self.params.get('allow_unplayable_formats'):
2852 info_dict['__postprocessors'].append(merger)
2853 info_dict['__files_to_merge'] = downloaded
2854 # Even if there were no downloads, it is being merged only now
2855 info_dict['__real_download'] = True
2856 else:
2857 for file in downloaded:
2858 files_to_move[file] = None
4340deca
P
2859 else:
2860 # Just a single file
0202b52a 2861 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2862 if dl_filename is None or dl_filename == temp_filename:
2863 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2864 # So we should try to resume the download
e8e73840 2865 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2866 info_dict['__real_download'] = real_download
6c7274ec 2867 else:
2868 self.report_file_already_downloaded(dl_filename)
0202b52a 2869
0202b52a 2870 dl_filename = dl_filename or temp_filename
c571435f 2871 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2872
3158150c 2873 except network_exceptions as err:
7960b056 2874 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2875 return
2876 except (OSError, IOError) as err:
2877 raise UnavailableVideoError(err)
2878 except (ContentTooShortError, ) as err:
2879 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2880 return
8222d8de 2881
de6000d9 2882 if success and full_filename != '-':
f17f8651 2883
fd7cfb64 2884 def fixup():
2885 do_fixup = True
2886 fixup_policy = self.params.get('fixup')
2887 vid = info_dict['id']
2888
2889 if fixup_policy in ('ignore', 'never'):
2890 return
2891 elif fixup_policy == 'warn':
2892 do_fixup = False
f89b3e2d 2893 elif fixup_policy != 'force':
2894 assert fixup_policy in ('detect_or_warn', None)
2895 if not info_dict.get('__real_download'):
2896 do_fixup = False
fd7cfb64 2897
2898 def ffmpeg_fixup(cndn, msg, cls):
2899 if not cndn:
2900 return
2901 if not do_fixup:
2902 self.report_warning(f'{vid}: {msg}')
2903 return
2904 pp = cls(self)
2905 if pp.available:
2906 info_dict['__postprocessors'].append(pp)
2907 else:
2908 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2909
2910 stretched_ratio = info_dict.get('stretched_ratio')
2911 ffmpeg_fixup(
2912 stretched_ratio not in (1, None),
2913 f'Non-uniform pixel ratio {stretched_ratio}',
2914 FFmpegFixupStretchedPP)
2915
2916 ffmpeg_fixup(
2917 (info_dict.get('requested_formats') is None
2918 and info_dict.get('container') == 'm4a_dash'
2919 and info_dict.get('ext') == 'm4a'),
2920 'writing DASH m4a. Only some players support this container',
2921 FFmpegFixupM4aPP)
2922
993191c0 2923 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2924 downloader = downloader.__name__ if downloader else None
84726743 2925 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2926 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2927 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2928 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2929
2930 fixup()
8222d8de 2931 try:
23c1a667 2932 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2933 except PostProcessingError as err:
2934 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2935 return
ab8e5e51
AM
2936 try:
2937 for ph in self._post_hooks:
23c1a667 2938 ph(info_dict['filepath'])
ab8e5e51
AM
2939 except Exception as err:
2940 self.report_error('post hooks: %s' % str(err))
2941 return
2d30509f 2942 must_record_download_archive = True
2943
2944 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2945 self.record_download_archive(info_dict)
c3e6ffba 2946 max_downloads = self.params.get('max_downloads')
2947 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2948 raise MaxDownloadsReached()
8222d8de
JMF
2949
2950 def download(self, url_list):
2951 """Download a given list of URLs."""
de6000d9 2952 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2953 if (len(url_list) > 1
2954 and outtmpl != '-'
2955 and '%' not in outtmpl
2956 and self.params.get('max_downloads') != 1):
acd69589 2957 raise SameFileError(outtmpl)
8222d8de
JMF
2958
2959 for url in url_list:
2960 try:
5f6a1245 2961 # It also downloads the videos
61aa5ba3
S
2962 res = self.extract_info(
2963 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2964 except UnavailableVideoError:
6febd1c1 2965 self.report_error('unable to download video')
8222d8de 2966 except MaxDownloadsReached:
8f18aca8 2967 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2968 raise
2969 except ExistingVideoReached:
8f18aca8 2970 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2971 raise
2972 except RejectedVideoReached:
8f18aca8 2973 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2974 raise
63e0be34
PH
2975 else:
2976 if self.params.get('dump_single_json', False):
277d6ff5 2977 self.post_extract(res)
6e84b215 2978 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2979
2980 return self._download_retcode
2981
1dcc4c0c 2982 def download_with_info_file(self, info_filename):
31bd3925
JMF
2983 with contextlib.closing(fileinput.FileInput(
2984 [info_filename], mode='r',
2985 openhook=fileinput.hook_encoded('utf-8'))) as f:
2986 # FileInput doesn't have a read method, we can't call json.load
8012d892 2987 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2988 try:
2989 self.process_ie_result(info, download=True)
d3f62c19 2990 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2991 webpage_url = info.get('webpage_url')
2992 if webpage_url is not None:
6febd1c1 2993 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2994 return self.download([webpage_url])
2995 else:
2996 raise
2997 return self._download_retcode
1dcc4c0c 2998
cb202fd2 2999 @staticmethod
8012d892 3000 def sanitize_info(info_dict, remove_private_keys=False):
3001 ''' Sanitize the infodict for converting to json '''
3ad56b42 3002 if info_dict is None:
3003 return info_dict
6e84b215 3004 info_dict.setdefault('epoch', int(time.time()))
3005 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 3006 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 3007 if remove_private_keys:
6e84b215 3008 remove_keys |= {
3009 'requested_formats', 'requested_subtitles', 'requested_entries',
3010 'filepath', 'entries', 'original_url', 'playlist_autonumber',
3011 }
ae8f99e6 3012 empty_values = (None, {}, [], set(), tuple())
3013 reject = lambda k, v: k not in keep_keys and (
3014 k.startswith('_') or k in remove_keys or v in empty_values)
3015 else:
ae8f99e6 3016 reject = lambda k, v: k in remove_keys
5226731e 3017 filter_fn = lambda obj: (
b0249bca 3018 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 3019 else obj if not isinstance(obj, dict)
ae8f99e6 3020 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 3021 return filter_fn(info_dict)
cb202fd2 3022
8012d892 3023 @staticmethod
3024 def filter_requested_info(info_dict, actually_filter=True):
3025 ''' Alias of sanitize_info for backward compatibility '''
3026 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3027
dcf64d43 3028 def run_pp(self, pp, infodict):
5bfa4862 3029 files_to_delete = []
dcf64d43 3030 if '__files_to_move' not in infodict:
3031 infodict['__files_to_move'] = {}
b1940459 3032 try:
3033 files_to_delete, infodict = pp.run(infodict)
3034 except PostProcessingError as e:
3035 # Must be True and not 'only_download'
3036 if self.params.get('ignoreerrors') is True:
3037 self.report_error(e)
3038 return infodict
3039 raise
3040
5bfa4862 3041 if not files_to_delete:
dcf64d43 3042 return infodict
5bfa4862 3043 if self.params.get('keepvideo', False):
3044 for f in files_to_delete:
dcf64d43 3045 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3046 else:
3047 for old_filename in set(files_to_delete):
3048 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3049 try:
3050 os.remove(encodeFilename(old_filename))
3051 except (IOError, OSError):
3052 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3053 if old_filename in infodict['__files_to_move']:
3054 del infodict['__files_to_move'][old_filename]
3055 return infodict
5bfa4862 3056
277d6ff5 3057 @staticmethod
3058 def post_extract(info_dict):
3059 def actual_post_extract(info_dict):
3060 if info_dict.get('_type') in ('playlist', 'multi_video'):
3061 for video_dict in info_dict.get('entries', {}):
b050d210 3062 actual_post_extract(video_dict or {})
277d6ff5 3063 return
3064
07cce701 3065 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3066 extra = post_extractor().items()
3067 info_dict.update(extra)
07cce701 3068 info_dict.pop('__post_extractor', None)
277d6ff5 3069
4ec82a72 3070 original_infodict = info_dict.get('__original_infodict') or {}
3071 original_infodict.update(extra)
3072 original_infodict.pop('__post_extractor', None)
3073
b050d210 3074 actual_post_extract(info_dict or {})
277d6ff5 3075
56d868db 3076 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3077 info = dict(ie_info)
56d868db 3078 info['__files_to_move'] = files_to_move or {}
3079 for pp in self._pps[key]:
dcf64d43 3080 info = self.run_pp(pp, info)
56d868db 3081 return info, info.pop('__files_to_move', None)
5bfa4862 3082
dcf64d43 3083 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3084 """Run all the postprocessors on the given file."""
3085 info = dict(ie_info)
3086 info['filepath'] = filename
dcf64d43 3087 info['__files_to_move'] = files_to_move or {}
0202b52a 3088
56d868db 3089 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3090 info = self.run_pp(pp, info)
3091 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3092 del info['__files_to_move']
56d868db 3093 for pp in self._pps['after_move']:
dcf64d43 3094 info = self.run_pp(pp, info)
23c1a667 3095 return info
c1c9a79c 3096
5db07df6 3097 def _make_archive_id(self, info_dict):
e9fef7ee
S
3098 video_id = info_dict.get('id')
3099 if not video_id:
3100 return
5db07df6
PH
3101 # Future-proof against any change in case
3102 # and backwards compatibility with prior versions
e9fef7ee 3103 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3104 if extractor is None:
1211bb6d
S
3105 url = str_or_none(info_dict.get('url'))
3106 if not url:
3107 return
e9fef7ee 3108 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3109 for ie_key, ie in self._ies.items():
1211bb6d 3110 if ie.suitable(url):
8b7491c8 3111 extractor = ie_key
e9fef7ee
S
3112 break
3113 else:
3114 return
d0757229 3115 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3116
3117 def in_download_archive(self, info_dict):
3118 fn = self.params.get('download_archive')
3119 if fn is None:
3120 return False
3121
3122 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3123 if not vid_id:
7012b23c 3124 return False # Incomplete video information
5db07df6 3125
a45e8619 3126 return vid_id in self.archive
c1c9a79c
PH
3127
3128 def record_download_archive(self, info_dict):
3129 fn = self.params.get('download_archive')
3130 if fn is None:
3131 return
5db07df6
PH
3132 vid_id = self._make_archive_id(info_dict)
3133 assert vid_id
c1c9a79c 3134 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3135 archive_file.write(vid_id + '\n')
a45e8619 3136 self.archive.add(vid_id)
dd82ffea 3137
8c51aa65 3138 @staticmethod
8abeeb94 3139 def format_resolution(format, default='unknown'):
a903d828 3140 is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
9359f3d4 3141 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3142 return 'audio only'
f49d89ee
PH
3143 if format.get('resolution') is not None:
3144 return format['resolution']
35615307
DA
3145 if format.get('width') and format.get('height'):
3146 res = '%dx%d' % (format['width'], format['height'])
3147 elif format.get('height'):
3148 res = '%sp' % format['height']
3149 elif format.get('width'):
388ae76b 3150 res = '%dx?' % format['width']
a903d828 3151 elif is_images:
3152 return 'images'
8c51aa65 3153 else:
a903d828 3154 return default
3155 return f'{res} images' if is_images else res
8c51aa65 3156
c57f7757
PH
3157 def _format_note(self, fdict):
3158 res = ''
3159 if fdict.get('ext') in ['f4f', 'f4m']:
3160 res += '(unsupported) '
32f90364
PH
3161 if fdict.get('language'):
3162 if res:
3163 res += ' '
9016d76f 3164 res += '[%s] ' % fdict['language']
c57f7757
PH
3165 if fdict.get('format_note') is not None:
3166 res += fdict['format_note'] + ' '
3167 if fdict.get('tbr') is not None:
3168 res += '%4dk ' % fdict['tbr']
3169 if fdict.get('container') is not None:
3170 if res:
3171 res += ', '
3172 res += '%s container' % fdict['container']
3089bc74
S
3173 if (fdict.get('vcodec') is not None
3174 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3175 if res:
3176 res += ', '
3177 res += fdict['vcodec']
91c7271a 3178 if fdict.get('vbr') is not None:
c57f7757
PH
3179 res += '@'
3180 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3181 res += 'video@'
3182 if fdict.get('vbr') is not None:
3183 res += '%4dk' % fdict['vbr']
fbb21cf5 3184 if fdict.get('fps') is not None:
5d583bdf
S
3185 if res:
3186 res += ', '
3187 res += '%sfps' % fdict['fps']
c57f7757
PH
3188 if fdict.get('acodec') is not None:
3189 if res:
3190 res += ', '
3191 if fdict['acodec'] == 'none':
3192 res += 'video only'
3193 else:
3194 res += '%-5s' % fdict['acodec']
3195 elif fdict.get('abr') is not None:
3196 if res:
3197 res += ', '
3198 res += 'audio'
3199 if fdict.get('abr') is not None:
3200 res += '@%3dk' % fdict['abr']
3201 if fdict.get('asr') is not None:
3202 res += ' (%5dHz)' % fdict['asr']
3203 if fdict.get('filesize') is not None:
3204 if res:
3205 res += ', '
3206 res += format_bytes(fdict['filesize'])
9732d77e
PH
3207 elif fdict.get('filesize_approx') is not None:
3208 if res:
3209 res += ', '
3210 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3211 return res
91c7271a 3212
ec11a9f4 3213 def _list_format_headers(self, *headers):
3214 if self.params.get('listformats_table', True) is not False:
3215 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3216 return headers
3217
c57f7757 3218 def list_formats(self, info_dict):
94badb25 3219 formats = info_dict.get('formats', [info_dict])
ec11a9f4 3220 new_format = self.params.get('listformats_table', True) is not False
76d321f6 3221 if new_format:
ec11a9f4 3222 tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3223 vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3224 abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3225 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
76d321f6 3226 table = [
3227 [
ec11a9f4 3228 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
76d321f6 3229 format_field(f, 'ext'),
3230 self.format_resolution(f),
3231 format_field(f, 'fps', '%d'),
176f1866 3232 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
ec11a9f4 3233 delim,
76d321f6 3234 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
ec11a9f4 3235 format_field(f, 'tbr', f'%{tbr_digits}dk'),
52a8a1e1 3236 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
ec11a9f4 3237 delim,
76d321f6 3238 format_field(f, 'vcodec', default='unknown').replace('none', ''),
ec11a9f4 3239 format_field(f, 'vbr', f'%{vbr_digits}dk'),
76d321f6 3240 format_field(f, 'acodec', default='unknown').replace('none', ''),
ec11a9f4 3241 format_field(f, 'abr', f'%{abr_digits}dk'),
76d321f6 3242 format_field(f, 'asr', '%5dHz'),
3f698246 3243 ', '.join(filter(None, (
ec11a9f4 3244 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
3f698246 3245 format_field(f, 'language', '[%s]'),
3246 format_field(f, 'format_note'),
3247 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3248 ))),
3f698246 3249 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
ec11a9f4 3250 header_line = self._list_format_headers(
3251 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO',
3252 delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
76d321f6 3253 else:
3254 table = [
3255 [
3256 format_field(f, 'format_id'),
3257 format_field(f, 'ext'),
3258 self.format_resolution(f),
3259 self._format_note(f)]
3260 for f in formats
3261 if f.get('preference') is None or f['preference'] >= -1000]
3262 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3263
cfb56d1a 3264 self.to_screen(
169dbde9 3265 '[info] Available formats for %s:' % info_dict['id'])
3266 self.to_stdout(render_table(
ec11a9f4 3267 header_line, table,
3268 extraGap=(0 if new_format else 1),
3269 hideEmpty=new_format,
3270 delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
cfb56d1a
PH
3271
3272 def list_thumbnails(self, info_dict):
b0249bca 3273 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3274 if not thumbnails:
b7b72db9 3275 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3276 return
cfb56d1a
PH
3277
3278 self.to_screen(
3279 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3280 self.to_stdout(render_table(
ec11a9f4 3281 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
cfb56d1a 3282 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3283
360e1ca5 3284 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3285 if not subtitles:
360e1ca5 3286 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3287 return
a504ced0 3288 self.to_screen(
edab9dbf 3289 'Available %s for %s:' % (name, video_id))
2412044c 3290
3291 def _row(lang, formats):
49c258e1 3292 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3293 if len(set(names)) == 1:
7aee40c1 3294 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3295 return [lang, ', '.join(names), ', '.join(exts)]
3296
169dbde9 3297 self.to_stdout(render_table(
ec11a9f4 3298 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3299 [_row(lang, formats) for lang, formats in subtitles.items()],
3300 hideEmpty=True))
a504ced0 3301
dca08720
PH
3302 def urlopen(self, req):
3303 """ Start an HTTP download """
82d8a8b6 3304 if isinstance(req, compat_basestring):
67dda517 3305 req = sanitized_Request(req)
19a41fc6 3306 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3307
3308 def print_debug_header(self):
3309 if not self.params.get('verbose'):
3310 return
49a57e70 3311
3312 def get_encoding(stream):
3313 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3314 if not supports_terminal_sequences(stream):
3315 ret += ' (No ANSI)'
3316 return ret
3317
3318 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3319 locale.getpreferredencoding(),
3320 sys.getfilesystemencoding(),
3321 get_encoding(self._screen_file), get_encoding(self._err_file),
3322 self.get_encoding())
883d4b1e 3323
3324 logger = self.params.get('logger')
3325 if logger:
3326 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3327 write_debug(encoding_str)
3328 else:
96565c7e 3329 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3330 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3331
4c88ff87 3332 source = detect_variant()
49a57e70 3333 write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
6e21fdd2 3334 if not _LAZY_LOADER:
3335 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3336 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3337 else:
49a57e70 3338 write_debug('Lazy loading extractors is disabled')
3ae5e797 3339 if plugin_extractors or plugin_postprocessors:
49a57e70 3340 write_debug('Plugins: %s' % [
3ae5e797 3341 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3342 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3343 if self.params.get('compat_opts'):
49a57e70 3344 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
dca08720 3345 try:
d3c93ec2 3346 sp = Popen(
dca08720
PH
3347 ['git', 'rev-parse', '--short', 'HEAD'],
3348 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3349 cwd=os.path.dirname(os.path.abspath(__file__)))
d3c93ec2 3350 out, err = sp.communicate_or_kill()
dca08720
PH
3351 out = out.decode().strip()
3352 if re.match('[0-9a-f]+', out):
49a57e70 3353 write_debug('Git HEAD: %s' % out)
70a1165b 3354 except Exception:
dca08720
PH
3355 try:
3356 sys.exc_clear()
70a1165b 3357 except Exception:
dca08720 3358 pass
b300cda4
S
3359
3360 def python_implementation():
3361 impl_name = platform.python_implementation()
3362 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3363 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3364 return impl_name
3365
49a57e70 3366 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3367 platform.python_version(),
3368 python_implementation(),
3369 platform.architecture()[0],
b300cda4 3370 platform_name()))
d28b5171 3371
73fac4e9 3372 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3373 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3374 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3375 exe_str = ', '.join(
2831b468 3376 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3377 ) or 'none'
49a57e70 3378 write_debug('exe versions: %s' % exe_str)
dca08720 3379
2831b468 3380 from .downloader.websocket import has_websockets
3381 from .postprocessor.embedthumbnail import has_mutagen
3382 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3383
ad3dc496 3384 lib_str = ', '.join(sorted(filter(None, (
edf65256 3385 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3386 has_websockets and 'websockets',
3387 has_mutagen and 'mutagen',
3388 SQLITE_AVAILABLE and 'sqlite',
3389 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3390 )))) or 'none'
49a57e70 3391 write_debug('Optional libraries: %s' % lib_str)
2831b468 3392
dca08720
PH
3393 proxy_map = {}
3394 for handler in self._opener.handlers:
3395 if hasattr(handler, 'proxies'):
3396 proxy_map.update(handler.proxies)
49a57e70 3397 write_debug(f'Proxy map: {proxy_map}')
dca08720 3398
49a57e70 3399 # Not implemented
3400 if False and self.params.get('call_home'):
58b1f00d 3401 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3402 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3403 latest_version = self.urlopen(
3404 'https://yt-dl.org/latest/version').read().decode('utf-8')
3405 if version_tuple(latest_version) > version_tuple(__version__):
3406 self.report_warning(
3407 'You are using an outdated version (newest version: %s)! '
3408 'See https://yt-dl.org/update if you need help updating.' %
3409 latest_version)
3410
e344693b 3411 def _setup_opener(self):
6ad14cab 3412 timeout_val = self.params.get('socket_timeout')
17bddf3e 3413 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3414
982ee69a 3415 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3416 opts_cookiefile = self.params.get('cookiefile')
3417 opts_proxy = self.params.get('proxy')
3418
982ee69a 3419 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3420
6a3f4c3f 3421 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3422 if opts_proxy is not None:
3423 if opts_proxy == '':
3424 proxies = {}
3425 else:
3426 proxies = {'http': opts_proxy, 'https': opts_proxy}
3427 else:
3428 proxies = compat_urllib_request.getproxies()
067aa17e 3429 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3430 if 'http' in proxies and 'https' not in proxies:
3431 proxies['https'] = proxies['http']
91410c9b 3432 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3433
3434 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3435 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3436 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3437 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3438 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3439
3440 # When passing our own FileHandler instance, build_opener won't add the
3441 # default FileHandler and allows us to disable the file protocol, which
3442 # can be used for malicious purposes (see
067aa17e 3443 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3444 file_handler = compat_urllib_request.FileHandler()
3445
3446 def file_open(*args, **kwargs):
7a5c1cfe 3447 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3448 file_handler.file_open = file_open
3449
3450 opener = compat_urllib_request.build_opener(
fca6dba8 3451 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3452
dca08720
PH
3453 # Delete the default user-agent header, which would otherwise apply in
3454 # cases where our custom HTTP handler doesn't come into play
067aa17e 3455 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3456 opener.addheaders = []
3457 self._opener = opener
62fec3b2
PH
3458
3459 def encode(self, s):
3460 if isinstance(s, bytes):
3461 return s # Already encoded
3462
3463 try:
3464 return s.encode(self.get_encoding())
3465 except UnicodeEncodeError as err:
3466 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3467 raise
3468
3469 def get_encoding(self):
3470 encoding = self.params.get('encoding')
3471 if encoding is None:
3472 encoding = preferredencoding()
3473 return encoding
ec82d85a 3474
80c03fa9 3475 def _write_info_json(self, label, ie_result, infofn):
3476 ''' Write infojson and returns True = written, False = skip, None = error '''
3477 if not self.params.get('writeinfojson'):
3478 return False
3479 elif not infofn:
3480 self.write_debug(f'Skipping writing {label} infojson')
3481 return False
3482 elif not self._ensure_dir_exists(infofn):
3483 return None
3484 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3485 self.to_screen(f'[info] {label.title()} metadata is already present')
3486 else:
3487 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3488 try:
3489 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3490 except (OSError, IOError):
3491 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3492 return None
3493 return True
3494
3495 def _write_description(self, label, ie_result, descfn):
3496 ''' Write description and returns True = written, False = skip, None = error '''
3497 if not self.params.get('writedescription'):
3498 return False
3499 elif not descfn:
3500 self.write_debug(f'Skipping writing {label} description')
3501 return False
3502 elif not self._ensure_dir_exists(descfn):
3503 return None
3504 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3505 self.to_screen(f'[info] {label.title()} description is already present')
3506 elif ie_result.get('description') is None:
3507 self.report_warning(f'There\'s no {label} description to write')
3508 return False
3509 else:
3510 try:
3511 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3512 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3513 descfile.write(ie_result['description'])
3514 except (OSError, IOError):
3515 self.report_error(f'Cannot write {label} description file {descfn}')
3516 return None
3517 return True
3518
3519 def _write_subtitles(self, info_dict, filename):
3520 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3521 ret = []
3522 subtitles = info_dict.get('requested_subtitles')
3523 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3524 # subtitles download errors are already managed as troubles in relevant IE
3525 # that way it will silently go on when used with unsupporting IE
3526 return ret
3527
3528 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3529 if not sub_filename_base:
3530 self.to_screen('[info] Skipping writing video subtitles')
3531 return ret
3532 for sub_lang, sub_info in subtitles.items():
3533 sub_format = sub_info['ext']
3534 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3535 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3536 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3537 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3538 sub_info['filepath'] = sub_filename
3539 ret.append((sub_filename, sub_filename_final))
3540 continue
3541
3542 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3543 if sub_info.get('data') is not None:
3544 try:
3545 # Use newline='' to prevent conversion of newline characters
3546 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3547 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3548 subfile.write(sub_info['data'])
3549 sub_info['filepath'] = sub_filename
3550 ret.append((sub_filename, sub_filename_final))
3551 continue
3552 except (OSError, IOError):
3553 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3554 return None
3555
3556 try:
3557 sub_copy = sub_info.copy()
3558 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3559 self.dl(sub_filename, sub_copy, subtitle=True)
3560 sub_info['filepath'] = sub_filename
3561 ret.append((sub_filename, sub_filename_final))
3562 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3563 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3564 continue
519804a9 3565 return ret
80c03fa9 3566
3567 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3568 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3569 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3570 thumbnails, ret = [], []
6c4fd172 3571 if write_all or self.params.get('writethumbnail', False):
0202b52a 3572 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3573 multiple = write_all and len(thumbnails) > 1
ec82d85a 3574
80c03fa9 3575 if thumb_filename_base is None:
3576 thumb_filename_base = filename
3577 if thumbnails and not thumb_filename_base:
3578 self.write_debug(f'Skipping writing {label} thumbnail')
3579 return ret
3580
981052c9 3581 for t in thumbnails[::-1]:
80c03fa9 3582 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3583 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3584 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3585 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3586
80c03fa9 3587 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3588 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3589 t['filepath'] = thumb_filename
80c03fa9 3590 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
ec82d85a 3591 else:
80c03fa9 3592 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3593 try:
3594 uf = self.urlopen(t['url'])
80c03fa9 3595 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3596 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3597 shutil.copyfileobj(uf, thumbf)
80c03fa9 3598 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3599 t['filepath'] = thumb_filename
3158150c 3600 except network_exceptions as err:
80c03fa9 3601 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3602 if ret and not write_all:
3603 break
0202b52a 3604 return ret