]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Improved progress reporting (See desc) (#1125)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
961ea474
S
30from string import ascii_letters
31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
edf65256 38 compat_pycrypto_AES,
7d1eb38a 39 compat_shlex_quote,
ce02ed60 40 compat_str,
67134eab 41 compat_tokenize_tokenize,
ce02ed60
PH
42 compat_urllib_error,
43 compat_urllib_request,
8b172c2e 44 compat_urllib_request_DataHandler,
819e0531 45 windows_enable_vt_mode,
8c25f81b 46)
982ee69a 47from .cookies import load_cookies
8c25f81b 48from .utils import (
eedb7ba5
S
49 age_restricted,
50 args_to_str,
ce02ed60
PH
51 ContentTooShortError,
52 date_from_str,
53 DateRange,
acd69589 54 DEFAULT_OUTTMPL,
ce02ed60 55 determine_ext,
b5559424 56 determine_protocol,
732044af 57 DOT_DESKTOP_LINK_TEMPLATE,
58 DOT_URL_LINK_TEMPLATE,
59 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 60 DownloadError,
c0384f22 61 encode_compat_str,
ce02ed60 62 encodeFilename,
498f5606 63 EntryNotInPlaylist,
a06916d9 64 error_to_compat_str,
8b0d7497 65 ExistingVideoReached,
590bc6f6 66 expand_path,
ce02ed60 67 ExtractorError,
e29663c6 68 float_or_none,
02dbf93f 69 format_bytes,
76d321f6 70 format_field,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
b0249bca 73 HEADRequest,
c9969434 74 int_or_none,
732044af 75 iri_to_uri,
773f291d 76 ISO3166Utils,
56a8fb4f 77 LazyList,
ce02ed60 78 locked_file,
0202b52a 79 make_dir,
dca08720 80 make_HTTPS_handler,
ce02ed60 81 MaxDownloadsReached,
3158150c 82 network_exceptions,
cd6fc19e 83 orderedSet,
a06916d9 84 OUTTMPL_TYPES,
b7ab0590 85 PagedList,
083c9df9 86 parse_filesize,
91410c9b 87 PerRequestProxyHandler,
dca08720 88 platform_name,
eedb7ba5 89 PostProcessingError,
ce02ed60 90 preferredencoding,
eedb7ba5 91 prepend_extension,
a06916d9 92 process_communicate_or_kill,
51fb4995 93 register_socks_protocols,
a06916d9 94 RejectedVideoReached,
cfb56d1a 95 render_table,
eedb7ba5 96 replace_extension,
ce02ed60
PH
97 SameFileError,
98 sanitize_filename,
1bb5c511 99 sanitize_path,
dcf77cf1 100 sanitize_url,
67dda517 101 sanitized_Request,
e5660ee6 102 std_headers,
819e0531 103 STR_FORMAT_RE_TMPL,
104 STR_FORMAT_TYPES,
1211bb6d 105 str_or_none,
e29663c6 106 strftime_or_none,
ce02ed60 107 subtitles_filename,
819e0531 108 supports_terminal_sequences,
109 TERMINAL_SEQUENCES,
51d9739f 110 ThrottledDownload,
732044af 111 to_high_limit_path,
324ad820 112 traverse_obj,
6033d980 113 try_get,
ce02ed60 114 UnavailableVideoError,
29eb5174 115 url_basename,
7d1eb38a 116 variadic,
58b1f00d 117 version_tuple,
ce02ed60
PH
118 write_json_file,
119 write_string,
6a3f4c3f 120 YoutubeDLCookieProcessor,
dca08720 121 YoutubeDLHandler,
fca6dba8 122 YoutubeDLRedirectHandler,
ce02ed60 123)
a0e07d31 124from .cache import Cache
52a8a1e1 125from .extractor import (
126 gen_extractor_classes,
127 get_info_extractor,
128 _LAZY_LOADER,
3ae5e797 129 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 130)
4c54b89e 131from .extractor.openload import PhantomJSwrapper
52a8a1e1 132from .downloader import (
dbf5416a 133 FFmpegFD,
52a8a1e1 134 get_suitable_downloader,
135 shorten_protocol_name
136)
4c83c967 137from .downloader.rtmp import rtmpdump_version
4f026faf 138from .postprocessor import (
e36d50c5 139 get_postprocessor,
140 FFmpegFixupDurationPP,
f17f8651 141 FFmpegFixupM3u8PP,
62cd676c 142 FFmpegFixupM4aPP,
6271f1ca 143 FFmpegFixupStretchedPP,
e36d50c5 144 FFmpegFixupTimestampPP,
4f026faf
PH
145 FFmpegMergerPP,
146 FFmpegPostProcessor,
0202b52a 147 MoveFilesAfterDownloadPP,
3ae5e797 148 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 149)
4c88ff87 150from .update import detect_variant
dca08720 151from .version import __version__
8222d8de 152
e9c0cdd3
YCH
153if compat_os_name == 'nt':
154 import ctypes
155
2459b6e1 156
8222d8de
JMF
157class YoutubeDL(object):
158 """YoutubeDL class.
159
160 YoutubeDL objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
166
167 For this, YoutubeDL objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the YoutubeDL object handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 YoutubeDL process the extracted information, possibly using a File
173 Downloader to download the video.
174
175 YoutubeDL objects accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The YoutubeDL also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
181
182 Available options:
183
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
180940e0 186 videopassword: Password for accessing a video.
1da50aa3
S
187 ap_mso: Adobe Pass multiple-system operator identifier.
188 ap_username: Multiple-system operator account username.
189 ap_password: Multiple-system operator account password.
8222d8de
JMF
190 usenetrc: Use netrc for authentication instead.
191 verbose: Print additional info to stdout.
192 quiet: Do not print messages to stdout.
ad8915b7 193 no_warnings: Do not print out anything for warnings.
53c18592 194 forceprint: A list of templates to force print
195 forceurl: Force printing final URL. (Deprecated)
196 forcetitle: Force printing title. (Deprecated)
197 forceid: Force printing ID. (Deprecated)
198 forcethumbnail: Force printing thumbnail URL. (Deprecated)
199 forcedescription: Force printing description. (Deprecated)
200 forcefilename: Force printing final filename. (Deprecated)
201 forceduration: Force printing duration. (Deprecated)
8694c600 202 forcejson: Force printing info_dict as JSON.
63e0be34
PH
203 dump_single_json: Force printing the info_dict of the whole playlist
204 (or video) as a single JSON line.
c25228e5 205 force_write_download_archive: Force writing download archive regardless
206 of 'skip_download' or 'simulate'.
b7b04c78 207 simulate: Do not download the video files. If unset (or None),
208 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 209 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 210 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 211 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
212 extracting metadata even if the video is not actually
213 available for download (experimental)
c25228e5 214 format_sort: How to sort the video formats. see "Sorting Formats"
215 for more details.
216 format_sort_force: Force the given format_sort. see "Sorting Formats"
217 for more details.
218 allow_multiple_video_streams: Allow multiple video streams to be merged
219 into a single file
220 allow_multiple_audio_streams: Allow multiple audio streams to be merged
221 into a single file
0ba692ac 222 check_formats Whether to test if the formats are downloadable.
223 Can be True (check all), False (check none)
224 or None (check only if requested by extractor)
4524baf0 225 paths: Dictionary of output paths. The allowed keys are 'home'
226 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 227 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 228 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 229 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
230 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
231 restrictfilenames: Do not allow "&" and spaces in file names
232 trim_file_name: Limit length of filename (extension excluded)
4524baf0 233 windowsfilenames: Force the filenames to be windows compatible
b1940459 234 ignoreerrors: Do not stop on download/postprocessing errors.
235 Can be 'only_download' to ignore only download errors.
236 Default is 'only_download' for CLI, but False for API
26e2805c 237 skip_playlist_after_errors: Number of allowed failures until the rest of
238 the playlist is skipped
d22dec74 239 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 240 overwrites: Overwrite all video and metadata files if True,
241 overwrite only non-video files if None
242 and don't overwrite any file if False
34488702 243 For compatibility with youtube-dl,
244 "nooverwrites" may also be used instead
8222d8de
JMF
245 playliststart: Playlist item to start at.
246 playlistend: Playlist item to end at.
c14e88f0 247 playlist_items: Specific indices of playlist to download.
ff815fe6 248 playlistreverse: Download playlist items in reverse order.
75822ca7 249 playlistrandom: Download playlist items in random order.
8222d8de
JMF
250 matchtitle: Download only matching titles.
251 rejecttitle: Reject downloads for matching titles.
8bf9319e 252 logger: Log messages to a logging.Logger instance.
8222d8de 253 logtostderr: Log messages to stderr instead of stdout.
819e0531 254 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
255 writedescription: Write the video description to a .description file
256 writeinfojson: Write the video description to a .info.json file
75d43ca0 257 clean_infojson: Remove private fields from the infojson
34488702 258 getcomments: Extract video comments. This will not be written to disk
06167fbb 259 unless writeinfojson is also given
1fb07d10 260 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 261 writethumbnail: Write the thumbnail image to a file
c25228e5 262 allow_playlist_files: Whether to write playlists' description, infojson etc
263 also to disk when using the 'write*' options
ec82d85a 264 write_all_thumbnails: Write all thumbnail formats to files
732044af 265 writelink: Write an internet shortcut file, depending on the
266 current platform (.url/.webloc/.desktop)
267 writeurllink: Write a Windows internet shortcut file (.url)
268 writewebloclink: Write a macOS internet shortcut file (.webloc)
269 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 270 writesubtitles: Write the video subtitles to a file
741dd8ea 271 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 272 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 273 Downloads all the subtitles of the video
0b7f3118 274 (requires writesubtitles or writeautomaticsub)
8222d8de 275 listsubtitles: Lists all available subtitles for the video
a504ced0 276 subtitlesformat: The format code for subtitles
c32b0aab 277 subtitleslangs: List of languages of the subtitles to download (can be regex).
278 The list may contain "all" to refer to all the available
279 subtitles. The language can be prefixed with a "-" to
280 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
281 keepvideo: Keep the video file after post-processing
282 daterange: A DateRange object, download only if the upload_date is in the range.
283 skip_download: Skip the actual download of the video file
c35f9e72 284 cachedir: Location of the cache files in the filesystem.
a0e07d31 285 False to disable filesystem cache.
47192f92 286 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
287 age_limit: An integer representing the user's age in years.
288 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
289 min_views: An integer representing the minimum view count the video
290 must have in order to not be skipped.
291 Videos without view count information are always
292 downloaded. None for no limit.
293 max_views: An integer representing the maximum view count.
294 Videos that are more popular than that are not
295 downloaded.
296 Videos without view count information are always
297 downloaded. None for no limit.
298 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
299 Videos already present in the file are not downloaded
300 again.
8a51f564 301 break_on_existing: Stop the download process after attempting to download a
302 file that is in the archive.
303 break_on_reject: Stop the download process when encountering a video that
304 has been filtered out.
305 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
306 cookiesfrombrowser: A tuple containing the name of the browser and the profile
307 name/path from where cookies are loaded.
308 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 309 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
310 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
311 At the moment, this is only supported by YouTube.
a1ee09e8 312 proxy: URL of the proxy server to use
38cce791 313 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 314 on geo-restricted sites.
e344693b 315 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
316 bidi_workaround: Work around buggy terminals without bidirectional text
317 support, using fridibi
a0ddb8a2 318 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 319 include_ads: Download ads as well
04b4d394
PH
320 default_search: Prepend this string if an input url is not valid.
321 'auto' for elaborate guessing
62fec3b2 322 encoding: Use this encoding instead of the system-specified.
e8ee972c 323 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
324 Pass in 'in_playlist' to only show this behavior for
325 playlist items.
4f026faf 326 postprocessors: A list of dictionaries, each with an entry
71b640cc 327 * key: The name of the postprocessor. See
7a5c1cfe 328 yt_dlp/postprocessor/__init__.py for a list.
56d868db 329 * when: When to run the postprocessor. Can be one of
330 pre_process|before_dl|post_process|after_move.
331 Assumed to be 'post_process' if not given
ab8e5e51
AM
332 post_hooks: A list of functions that get called as the final step
333 for each video file, after all postprocessors have been
334 called. The filename will be passed as the only argument.
71b640cc
PH
335 progress_hooks: A list of functions that get called on download
336 progress, with a dictionary with the entries
5cda4eda 337 * status: One of "downloading", "error", or "finished".
ee69b99a 338 Check this first and ignore unknown values.
3ba7740d 339 * info_dict: The extracted info_dict
71b640cc 340
5cda4eda 341 If status is one of "downloading", or "finished", the
ee69b99a
PH
342 following properties may also be present:
343 * filename: The final filename (always present)
5cda4eda 344 * tmpfilename: The filename we're currently writing to
71b640cc
PH
345 * downloaded_bytes: Bytes on disk
346 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
347 * total_bytes_estimate: Guess of the eventual file size,
348 None if unavailable.
349 * elapsed: The number of seconds since download started.
71b640cc
PH
350 * eta: The estimated time in seconds, None if unknown
351 * speed: The download speed in bytes/second, None if
352 unknown
5cda4eda
PH
353 * fragment_index: The counter of the currently
354 downloaded video fragment.
355 * fragment_count: The number of fragments (= individual
356 files that will be merged)
71b640cc
PH
357
358 Progress hooks are guaranteed to be called at least once
359 (with status "finished") if the download is successful.
819e0531 360 postprocessor_hooks: A list of functions that get called on postprocessing
361 progress, with a dictionary with the entries
362 * status: One of "started", "processing", or "finished".
363 Check this first and ignore unknown values.
364 * postprocessor: Name of the postprocessor
365 * info_dict: The extracted info_dict
366
367 Progress hooks are guaranteed to be called at least twice
368 (with status "started" and "finished") if the processing is successful.
45598f15 369 merge_output_format: Extension to use when merging formats.
6b591b29 370 final_ext: Expected final extension; used to detect when the file was
371 already downloaded and converted. "merge_output_format" is
372 replaced by this extension when given
6271f1ca
PH
373 fixup: Automatically correct known faults of the file.
374 One of:
375 - "never": do nothing
376 - "warn": only emit a warning
377 - "detect_or_warn": check whether we can do anything
62cd676c 378 about it, warn otherwise (default)
504f20dd 379 source_address: Client-side IP address to bind to.
6ec6cb4e 380 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 381 yt-dlp servers for debugging. (BROKEN)
1cf376f5 382 sleep_interval_requests: Number of seconds to sleep between requests
383 during extraction
7aa589a5
S
384 sleep_interval: Number of seconds to sleep before each download when
385 used alone or a lower bound of a range for randomized
386 sleep before each download (minimum possible number
387 of seconds to sleep) when used along with
388 max_sleep_interval.
389 max_sleep_interval:Upper bound of a range for randomized sleep before each
390 download (maximum possible number of seconds to sleep).
391 Must only be used along with sleep_interval.
392 Actual sleep time will be a random float from range
393 [sleep_interval; max_sleep_interval].
1cf376f5 394 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
395 listformats: Print an overview of available video formats and exit.
396 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
397 match_filter: A function that gets called with the info_dict of
398 every video.
399 If it returns a message, the video is ignored.
400 If it returns None, the video is downloaded.
401 match_filter_func in utils.py is one example for this.
7e5db8c9 402 no_color: Do not emit color codes in output.
0a840f58 403 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 404 HTTP header
0a840f58 405 geo_bypass_country:
773f291d
S
406 Two-letter ISO 3166-2 country code that will be used for
407 explicit geographic restriction bypassing via faking
504f20dd 408 X-Forwarded-For HTTP header
5f95927a
S
409 geo_bypass_ip_block:
410 IP range in CIDR notation that will be used similarly to
504f20dd 411 geo_bypass_country
71b640cc 412
85729c51 413 The following options determine which downloader is picked:
52a8a1e1 414 external_downloader: A dictionary of protocol keys and the executable of the
415 external downloader to use for it. The allowed protocols
416 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
417 Set the value to 'native' to use the native downloader
418 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
419 or {'m3u8': 'ffmpeg'} instead.
420 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
421 if True, otherwise use ffmpeg/avconv if False, otherwise
422 use downloader suggested by extractor if None.
53ed7066 423 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 424 The following options do not work when used through the API:
425 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 426 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 427 Refer __init__.py for their implementation
819e0531 428 progress_template: Dictionary of templates for progress outputs.
429 Allowed keys are 'download', 'postprocess',
430 'download-title' (console title) and 'postprocess-title'.
431 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 432
8222d8de 433 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 434 the downloader (see yt_dlp/downloader/common.py):
51d9739f 435 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
819e0531 436 max_filesize, test, noresizebuffer, retries, continuedl, noprogress,
51d9739f 437 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
438
439 The following options are used by the post processors:
d4a24f40 440 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 441 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
442 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
443 to the binary or its containing directory.
43820c03 444 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 445 and a list of additional command-line arguments for the
446 postprocessor/executable. The dict can also have "PP+EXE" keys
447 which are used when the given exe is used by the given PP.
448 Use 'default' as the name for arguments to passed to all PP
449 For compatibility with youtube-dl, a single list of args
450 can also be used
e409895f 451
452 The following options are used by the extractors:
62bff2c1 453 extractor_retries: Number of times to retry for known errors
454 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 455 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 456 discontinuities such as ad breaks (default: False)
5d3a0e79 457 extractor_args: A dictionary of arguments to be passed to the extractors.
458 See "EXTRACTOR ARGUMENTS" for details.
459 Eg: {'youtube': {'skip': ['dash', 'hls']}}
460 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
461 If True (default), DASH manifests and related
62bff2c1 462 data will be downloaded and processed by extractor.
463 You can reduce network I/O by disabling it if you don't
464 care about DASH. (only for youtube)
5d3a0e79 465 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
466 If True (default), HLS manifests and related
62bff2c1 467 data will be downloaded and processed by extractor.
468 You can reduce network I/O by disabling it if you don't
469 care about HLS. (only for youtube)
8222d8de
JMF
470 """
471
c9969434
S
472 _NUMERIC_FIELDS = set((
473 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 474 'timestamp', 'release_timestamp',
c9969434
S
475 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
476 'average_rating', 'comment_count', 'age_limit',
477 'start_time', 'end_time',
478 'chapter_number', 'season_number', 'episode_number',
479 'track_number', 'disc_number', 'release_year',
c9969434
S
480 ))
481
8222d8de 482 params = None
8b7491c8 483 _ies = {}
56d868db 484 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 485 _printed_messages = set()
1cf376f5 486 _first_webpage_request = True
8222d8de
JMF
487 _download_retcode = None
488 _num_downloads = None
30a074c2 489 _playlist_level = 0
490 _playlist_urls = set()
8222d8de
JMF
491 _screen_file = None
492
3511266b 493 def __init__(self, params=None, auto_init=True):
8222d8de 494 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
495 if params is None:
496 params = {}
8b7491c8 497 self._ies = {}
56c73665 498 self._ies_instances = {}
56d868db 499 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 500 self._printed_messages = set()
1cf376f5 501 self._first_webpage_request = True
ab8e5e51 502 self._post_hooks = []
933605d7 503 self._progress_hooks = []
819e0531 504 self._postprocessor_hooks = []
8222d8de
JMF
505 self._download_retcode = 0
506 self._num_downloads = 0
507 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 508 self._err_file = sys.stderr
819e0531 509 self.params = params
a0e07d31 510 self.cache = Cache(self)
34308b30 511
819e0531 512 windows_enable_vt_mode()
513 self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
514
a61f4b28 515 if sys.version_info < (3, 6):
516 self.report_warning(
0181adef 517 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 518
88acdbc2 519 if self.params.get('allow_unplayable_formats'):
520 self.report_warning(
819e0531 521 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
522 'This is a developer option intended for debugging. \n'
523 ' If you experience any issues while using this option, '
524 f'{self._color_text("DO NOT", "red")} open a bug report')
88acdbc2 525
be5df5ee
S
526 def check_deprecated(param, option, suggestion):
527 if self.params.get(param) is not None:
53ed7066 528 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
529 return True
530 return False
531
532 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
533 if self.params.get('geo_verification_proxy') is None:
534 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
535
0d1bb027 536 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
537 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 538 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 539
540 for msg in self.params.get('warnings', []):
541 self.report_warning(msg)
542
b868936c 543 if self.params.get('overwrites') is None:
544 self.params.pop('overwrites', None)
545 elif self.params.get('nooverwrites') is not None:
546 # nooverwrites was unnecessarily changed to overwrites
547 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
548 # This ensures compatibility with both keys
549 self.params['overwrites'] = not self.params['nooverwrites']
550 else:
551 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 552
0783b09b 553 if params.get('bidi_workaround', False):
1c088fa8
PH
554 try:
555 import pty
556 master, slave = pty.openpty()
003c69a8 557 width = compat_get_terminal_size().columns
1c088fa8
PH
558 if width is None:
559 width_args = []
560 else:
561 width_args = ['-w', str(width)]
5d681e96 562 sp_kwargs = dict(
1c088fa8
PH
563 stdin=subprocess.PIPE,
564 stdout=slave,
565 stderr=self._err_file)
5d681e96
PH
566 try:
567 self._output_process = subprocess.Popen(
568 ['bidiv'] + width_args, **sp_kwargs
569 )
570 except OSError:
5d681e96
PH
571 self._output_process = subprocess.Popen(
572 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
573 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 574 except OSError as ose:
66e7ace1 575 if ose.errno == errno.ENOENT:
6febd1c1 576 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
577 else:
578 raise
0783b09b 579
3089bc74
S
580 if (sys.platform != 'win32'
581 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
582 and not params.get('restrictfilenames', False)):
e9137224 583 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 584 self.report_warning(
6febd1c1 585 'Assuming --restrict-filenames since file system encoding '
1b725173 586 'cannot encode all characters. '
6febd1c1 587 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 588 self.params['restrictfilenames'] = True
34308b30 589
de6000d9 590 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 591
187986a8 592 # Creating format selector here allows us to catch syntax errors before the extraction
593 self.format_selector = (
594 None if self.params.get('format') is None
595 else self.build_format_selector(self.params['format']))
596
dca08720
PH
597 self._setup_opener()
598
4cd0a709 599 def preload_download_archive(fn):
e6f21b3d 600 """Preload the archive, if any is specified"""
4cd0a709 601 if fn is None:
602 return False
0760b0a7 603 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 604 try:
605 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
606 for line in archive_file:
607 self.archive.add(line.strip())
608 except IOError as ioe:
609 if ioe.errno != errno.ENOENT:
610 raise
611 return False
612 return True
613
614 self.archive = set()
615 preload_download_archive(self.params.get('download_archive'))
616
3511266b
PH
617 if auto_init:
618 self.print_debug_header()
619 self.add_default_info_extractors()
620
4f026faf 621 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 622 pp_def = dict(pp_def_raw)
fd7cfb64 623 when = pp_def.pop('when', 'post_process')
624 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 625 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 626 self.add_post_processor(pp, when=when)
4f026faf 627
ab8e5e51
AM
628 for ph in self.params.get('post_hooks', []):
629 self.add_post_hook(ph)
630
71b640cc
PH
631 for ph in self.params.get('progress_hooks', []):
632 self.add_progress_hook(ph)
633
51fb4995
YCH
634 register_socks_protocols()
635
7d4111ed
PH
636 def warn_if_short_id(self, argv):
637 # short YouTube ID starting with dash?
638 idxs = [
639 i for i, a in enumerate(argv)
640 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
641 if idxs:
642 correct_argv = (
7a5c1cfe 643 ['yt-dlp']
3089bc74
S
644 + [a for i, a in enumerate(argv) if i not in idxs]
645 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
646 )
647 self.report_warning(
648 'Long argument string detected. '
649 'Use -- to separate parameters and URLs, like this:\n%s\n' %
650 args_to_str(correct_argv))
651
8222d8de
JMF
652 def add_info_extractor(self, ie):
653 """Add an InfoExtractor object to the end of the list."""
8b7491c8 654 ie_key = ie.ie_key()
655 self._ies[ie_key] = ie
e52d7f85 656 if not isinstance(ie, type):
8b7491c8 657 self._ies_instances[ie_key] = ie
e52d7f85 658 ie.set_downloader(self)
8222d8de 659
8b7491c8 660 def _get_info_extractor_class(self, ie_key):
661 ie = self._ies.get(ie_key)
662 if ie is None:
663 ie = get_info_extractor(ie_key)
664 self.add_info_extractor(ie)
665 return ie
666
56c73665
JMF
667 def get_info_extractor(self, ie_key):
668 """
669 Get an instance of an IE with name ie_key, it will try to get one from
670 the _ies list, if there's no instance it will create a new one and add
671 it to the extractor list.
672 """
673 ie = self._ies_instances.get(ie_key)
674 if ie is None:
675 ie = get_info_extractor(ie_key)()
676 self.add_info_extractor(ie)
677 return ie
678
023fa8c4
JMF
679 def add_default_info_extractors(self):
680 """
681 Add the InfoExtractors returned by gen_extractors to the end of the list
682 """
e52d7f85 683 for ie in gen_extractor_classes():
023fa8c4
JMF
684 self.add_info_extractor(ie)
685
56d868db 686 def add_post_processor(self, pp, when='post_process'):
8222d8de 687 """Add a PostProcessor object to the end of the chain."""
5bfa4862 688 self._pps[when].append(pp)
8222d8de
JMF
689 pp.set_downloader(self)
690
ab8e5e51
AM
691 def add_post_hook(self, ph):
692 """Add the post hook"""
693 self._post_hooks.append(ph)
694
933605d7 695 def add_progress_hook(self, ph):
819e0531 696 """Add the download progress hook"""
933605d7 697 self._progress_hooks.append(ph)
8ab470f1 698
819e0531 699 def add_postprocessor_hook(self, ph):
700 """Add the postprocessing progress hook"""
701 self._postprocessor_hooks.append(ph)
702
1c088fa8 703 def _bidi_workaround(self, message):
5d681e96 704 if not hasattr(self, '_output_channel'):
1c088fa8
PH
705 return message
706
5d681e96 707 assert hasattr(self, '_output_process')
11b85ce6 708 assert isinstance(message, compat_str)
6febd1c1
PH
709 line_count = message.count('\n') + 1
710 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 711 self._output_process.stdin.flush()
6febd1c1 712 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 713 for _ in range(line_count))
6febd1c1 714 return res[:-len('\n')]
1c088fa8 715
b35496d8 716 def _write_string(self, message, out=None, only_once=False):
717 if only_once:
718 if message in self._printed_messages:
719 return
720 self._printed_messages.add(message)
721 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 722
848887eb 723 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 724 """Print message to stdout"""
8bf9319e 725 if self.params.get('logger'):
43afe285 726 self.params['logger'].debug(message)
835a1478 727 elif not quiet or self.params.get('verbose'):
728 self._write_string(
729 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
730 self._err_file if quiet else self._screen_file)
8222d8de 731
b35496d8 732 def to_stderr(self, message, only_once=False):
0760b0a7 733 """Print message to stderr"""
11b85ce6 734 assert isinstance(message, compat_str)
8bf9319e 735 if self.params.get('logger'):
43afe285
IB
736 self.params['logger'].error(message)
737 else:
b35496d8 738 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 739
1e5b9a95
PH
740 def to_console_title(self, message):
741 if not self.params.get('consoletitle', False):
742 return
4bede0d8
C
743 if compat_os_name == 'nt':
744 if ctypes.windll.kernel32.GetConsoleWindow():
745 # c_wchar_p() might not be necessary if `message` is
746 # already of type unicode()
747 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 748 elif 'TERM' in os.environ:
b46696bd 749 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 750
bdde425c
PH
751 def save_console_title(self):
752 if not self.params.get('consoletitle', False):
753 return
b7b04c78 754 if self.params.get('simulate'):
94c3442e 755 return
4bede0d8 756 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 757 # Save the title on stack
734f90bb 758 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
759
760 def restore_console_title(self):
761 if not self.params.get('consoletitle', False):
762 return
b7b04c78 763 if self.params.get('simulate'):
94c3442e 764 return
4bede0d8 765 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 766 # Restore the title from stack
734f90bb 767 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
768
769 def __enter__(self):
770 self.save_console_title()
771 return self
772
773 def __exit__(self, *args):
774 self.restore_console_title()
f89197d7 775
dca08720 776 if self.params.get('cookiefile') is not None:
1bab3437 777 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 778
8222d8de
JMF
779 def trouble(self, message=None, tb=None):
780 """Determine action to take when a download problem appears.
781
782 Depending on if the downloader has been configured to ignore
783 download errors or not, this method may throw an exception or
784 not when errors are found, after printing the message.
785
786 tb, if given, is additional traceback information.
787 """
788 if message is not None:
789 self.to_stderr(message)
790 if self.params.get('verbose'):
791 if tb is None:
792 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 793 tb = ''
8222d8de 794 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 795 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 796 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
797 else:
798 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 799 tb = ''.join(tb_data)
c19bc311 800 if tb:
801 self.to_stderr(tb)
b1940459 802 if not self.params.get('ignoreerrors'):
8222d8de
JMF
803 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
804 exc_info = sys.exc_info()[1].exc_info
805 else:
806 exc_info = sys.exc_info()
807 raise DownloadError(message, exc_info)
808 self._download_retcode = 1
809
0760b0a7 810 def to_screen(self, message, skip_eol=False):
811 """Print message to stdout if not in quiet mode"""
812 self.to_stdout(
813 message, skip_eol, quiet=self.params.get('quiet', False))
814
819e0531 815 def _color_text(self, text, color):
816 if self.params.get('no_color'):
817 return text
818 return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
819
c84aeac6 820 def report_warning(self, message, only_once=False):
8222d8de
JMF
821 '''
822 Print the message to stderr, it will be prefixed with 'WARNING:'
823 If stderr is a tty file the 'WARNING:' will be colored
824 '''
6d07ce01
JMF
825 if self.params.get('logger') is not None:
826 self.params['logger'].warning(message)
8222d8de 827 else:
ad8915b7
PH
828 if self.params.get('no_warnings'):
829 return
819e0531 830 self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
8222d8de
JMF
831
832 def report_error(self, message, tb=None):
833 '''
834 Do the same as trouble, but prefixes the message with 'ERROR:', colored
835 in red if stderr is a tty file.
836 '''
819e0531 837 self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
8222d8de 838
b35496d8 839 def write_debug(self, message, only_once=False):
0760b0a7 840 '''Log debug message or Print message to stderr'''
841 if not self.params.get('verbose', False):
842 return
843 message = '[debug] %s' % message
844 if self.params.get('logger'):
845 self.params['logger'].debug(message)
846 else:
b35496d8 847 self.to_stderr(message, only_once)
0760b0a7 848
8222d8de
JMF
849 def report_file_already_downloaded(self, file_name):
850 """Report file has already been fully downloaded."""
851 try:
6febd1c1 852 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 853 except UnicodeEncodeError:
6febd1c1 854 self.to_screen('[download] The file has already been downloaded')
8222d8de 855
0c3d0f51 856 def report_file_delete(self, file_name):
857 """Report that existing file will be deleted."""
858 try:
c25228e5 859 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 860 except UnicodeEncodeError:
c25228e5 861 self.to_screen('Deleting existing file')
0c3d0f51 862
1151c407 863 def raise_no_formats(self, info, forced=False):
864 has_drm = info.get('__has_drm')
88acdbc2 865 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
866 expected = self.params.get('ignore_no_formats_error')
867 if forced or not expected:
1151c407 868 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
869 expected=has_drm or expected)
88acdbc2 870 else:
871 self.report_warning(msg)
872
de6000d9 873 def parse_outtmpl(self):
874 outtmpl_dict = self.params.get('outtmpl', {})
875 if not isinstance(outtmpl_dict, dict):
876 outtmpl_dict = {'default': outtmpl_dict}
877 outtmpl_dict.update({
878 k: v for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 879 if outtmpl_dict.get(k) is None})
de6000d9 880 for key, val in outtmpl_dict.items():
881 if isinstance(val, bytes):
882 self.report_warning(
883 'Parameter outtmpl is bytes, but should be a unicode string. '
884 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
885 return outtmpl_dict
886
21cd8fae 887 def get_output_path(self, dir_type='', filename=None):
888 paths = self.params.get('paths', {})
889 assert isinstance(paths, dict)
890 path = os.path.join(
891 expand_path(paths.get('home', '').strip()),
892 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
893 filename or '')
894
895 # Temporary fix for #4787
896 # 'Treat' all problem characters by passing filename through preferredencoding
897 # to workaround encoding issues with subprocess on python2 @ Windows
898 if sys.version_info < (3, 0) and sys.platform == 'win32':
899 path = encodeFilename(path, True).decode(preferredencoding())
900 return sanitize_path(path, force=self.params.get('windowsfilenames'))
901
76a264ac 902 @staticmethod
901130bb 903 def _outtmpl_expandpath(outtmpl):
904 # expand_path translates '%%' into '%' and '$$' into '$'
905 # correspondingly that is not what we want since we need to keep
906 # '%%' intact for template dict substitution step. Working around
907 # with boundary-alike separator hack.
908 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
909 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
910
911 # outtmpl should be expand_path'ed before template dict substitution
912 # because meta fields may contain env variables we don't want to
913 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
914 # title "Hello $PATH", we don't want `$PATH` to be expanded.
915 return expand_path(outtmpl).replace(sep, '')
916
917 @staticmethod
918 def escape_outtmpl(outtmpl):
919 ''' Escape any remaining strings like %s, %abc% etc. '''
920 return re.sub(
921 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
922 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
923 outtmpl)
924
925 @classmethod
926 def validate_outtmpl(cls, outtmpl):
76a264ac 927 ''' @return None or Exception object '''
7d1eb38a 928 outtmpl = re.sub(
524e2e4f 929 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 930 lambda mobj: f'{mobj.group(0)[:-1]}s',
931 cls._outtmpl_expandpath(outtmpl))
76a264ac 932 try:
7d1eb38a 933 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 934 return None
935 except ValueError as err:
936 return err
937
143db31d 938 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
819e0531 939 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
6e84b215 940 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 941
6e84b215 942 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
943 for key in ('__original_infodict', '__postprocessors'):
944 info_dict.pop(key, None)
752cda38 945 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 946 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 947 if info_dict.get('duration', None) is not None
948 else None)
752cda38 949 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
950 if info_dict.get('resolution') is None:
951 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 952
e6f21b3d 953 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 954 # of %(field)s to %(field)0Nd for backward compatibility
955 field_size_compat_map = {
752cda38 956 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
e6f21b3d 957 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
752cda38 958 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 959 }
752cda38 960
385a27fa 961 TMPL_DICT = {}
524e2e4f 962 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 963 MATH_FUNCTIONS = {
964 '+': float.__add__,
965 '-': float.__sub__,
966 }
e625be0d 967 # Field is of the form key1.key2...
968 # where keys (except first) can be string, int or slice
2b8a2973 969 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 970 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
971 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 972 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
973 (?P<negate>-)?
385a27fa 974 (?P<fields>{field})
975 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 976 (?:>(?P<strf_format>.+?))?
7c37ff97 977 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 978 (?:\|(?P<default>.*?))?
385a27fa 979 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 980
2b8a2973 981 def _traverse_infodict(k):
982 k = k.split('.')
983 if k[0] == '':
984 k.pop(0)
985 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 986
752cda38 987 def get_value(mdict):
988 # Object traversal
2b8a2973 989 value = _traverse_infodict(mdict['fields'])
752cda38 990 # Negative
991 if mdict['negate']:
992 value = float_or_none(value)
993 if value is not None:
994 value *= -1
995 # Do maths
385a27fa 996 offset_key = mdict['maths']
997 if offset_key:
752cda38 998 value = float_or_none(value)
999 operator = None
385a27fa 1000 while offset_key:
1001 item = re.match(
1002 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1003 offset_key).group(0)
1004 offset_key = offset_key[len(item):]
1005 if operator is None:
752cda38 1006 operator = MATH_FUNCTIONS[item]
385a27fa 1007 continue
1008 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1009 offset = float_or_none(item)
1010 if offset is None:
2b8a2973 1011 offset = float_or_none(_traverse_infodict(item))
385a27fa 1012 try:
1013 value = operator(value, multiplier * offset)
1014 except (TypeError, ZeroDivisionError):
1015 return None
1016 operator = None
752cda38 1017 # Datetime formatting
1018 if mdict['strf_format']:
7c37ff97 1019 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1020
1021 return value
1022
b868936c 1023 na = self.params.get('outtmpl_na_placeholder', 'NA')
1024
6e84b215 1025 def _dumpjson_default(obj):
1026 if isinstance(obj, (set, LazyList)):
1027 return list(obj)
1028 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1029
752cda38 1030 def create_key(outer_mobj):
1031 if not outer_mobj.group('has_key'):
901130bb 1032 return f'%{outer_mobj.group(0)}'
752cda38 1033 key = outer_mobj.group('key')
752cda38 1034 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1035 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1036 value, default = None, na
1037 while mobj:
e625be0d 1038 mobj = mobj.groupdict()
7c37ff97 1039 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1040 value = get_value(mobj)
7c37ff97 1041 if value is None and mobj['alternate']:
1042 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1043 else:
1044 break
752cda38 1045
b868936c 1046 fmt = outer_mobj.group('format')
752cda38 1047 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1048 fmt = '0{:d}d'.format(field_size_compat_map[key])
1049
1050 value = default if value is None else value
752cda38 1051
7d1eb38a 1052 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1053 if fmt[-1] == 'l': # list
91dd88b9 1054 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1055 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1056 elif fmt[-1] == 'j': # json
6e84b215 1057 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1058 elif fmt[-1] == 'q': # quoted
7d1eb38a 1059 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1060 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1061 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1062 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1063 elif fmt[-1] == 'U': # unicode normalized
1064 opts = outer_mobj.group('conversion') or ''
1065 value, fmt = unicodedata.normalize(
1066 # "+" = compatibility equivalence, "#" = NFD
1067 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1068 value), str_fmt
7d1eb38a 1069 elif fmt[-1] == 'c':
524e2e4f 1070 if value:
1071 value = str(value)[0]
76a264ac 1072 else:
524e2e4f 1073 fmt = str_fmt
76a264ac 1074 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1075 value = float_or_none(value)
752cda38 1076 if value is None:
1077 value, fmt = default, 's'
901130bb 1078
752cda38 1079 if sanitize:
1080 if fmt[-1] == 'r':
1081 # If value is an object, sanitize might convert it to a string
1082 # So we convert it to repr first
7d1eb38a 1083 value, fmt = repr(value), str_fmt
639f1cea 1084 if fmt[-1] in 'csr':
7c37ff97 1085 value = sanitize(initial_field, value)
901130bb 1086
b868936c 1087 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1088 TMPL_DICT[key] = value
b868936c 1089 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1090
385a27fa 1091 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1092
819e0531 1093 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1094 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1095 return self.escape_outtmpl(outtmpl) % info_dict
1096
de6000d9 1097 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1098 try:
586a91b6 1099 sanitize = lambda k, v: sanitize_filename(
45598aab 1100 compat_str(v),
1bb5c511 1101 restricted=self.params.get('restrictfilenames'),
40df485f 1102 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1103 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1104 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1105 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1106 filename = outtmpl % template_dict
15da37c7 1107
143db31d 1108 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1109 if filename and force_ext is not None:
752cda38 1110 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1111
bdc3fd2f
U
1112 # https://github.com/blackjack4494/youtube-dlc/issues/85
1113 trim_file_name = self.params.get('trim_file_name', False)
1114 if trim_file_name:
1115 fn_groups = filename.rsplit('.')
1116 ext = fn_groups[-1]
1117 sub_ext = ''
1118 if len(fn_groups) > 2:
1119 sub_ext = fn_groups[-2]
1120 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1121
0202b52a 1122 return filename
8222d8de 1123 except ValueError as err:
6febd1c1 1124 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1125 return None
1126
de6000d9 1127 def prepare_filename(self, info_dict, dir_type='', warn=False):
1128 """Generate the output filename."""
21cd8fae 1129
de6000d9 1130 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1131 if not filename and dir_type not in ('', 'temp'):
1132 return ''
de6000d9 1133
c84aeac6 1134 if warn:
21cd8fae 1135 if not self.params.get('paths'):
de6000d9 1136 pass
1137 elif filename == '-':
c84aeac6 1138 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1139 elif os.path.isabs(filename):
c84aeac6 1140 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1141 if filename == '-' or not filename:
1142 return filename
1143
21cd8fae 1144 return self.get_output_path(dir_type, filename)
0202b52a 1145
120fe513 1146 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1147 """ Returns None if the file should be downloaded """
8222d8de 1148
c77495e3 1149 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1150
8b0d7497 1151 def check_filter():
8b0d7497 1152 if 'title' in info_dict:
1153 # This can happen when we're just evaluating the playlist
1154 title = info_dict['title']
1155 matchtitle = self.params.get('matchtitle', False)
1156 if matchtitle:
1157 if not re.search(matchtitle, title, re.IGNORECASE):
1158 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1159 rejecttitle = self.params.get('rejecttitle', False)
1160 if rejecttitle:
1161 if re.search(rejecttitle, title, re.IGNORECASE):
1162 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1163 date = info_dict.get('upload_date')
1164 if date is not None:
1165 dateRange = self.params.get('daterange', DateRange())
1166 if date not in dateRange:
1167 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1168 view_count = info_dict.get('view_count')
1169 if view_count is not None:
1170 min_views = self.params.get('min_views')
1171 if min_views is not None and view_count < min_views:
1172 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1173 max_views = self.params.get('max_views')
1174 if max_views is not None and view_count > max_views:
1175 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1176 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1177 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1178
8f18aca8 1179 match_filter = self.params.get('match_filter')
1180 if match_filter is not None:
1181 try:
1182 ret = match_filter(info_dict, incomplete=incomplete)
1183 except TypeError:
1184 # For backward compatibility
1185 ret = None if incomplete else match_filter(info_dict)
1186 if ret is not None:
1187 return ret
8b0d7497 1188 return None
1189
c77495e3 1190 if self.in_download_archive(info_dict):
1191 reason = '%s has already been recorded in the archive' % video_title
1192 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1193 else:
1194 reason = check_filter()
1195 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1196 if reason is not None:
120fe513 1197 if not silent:
1198 self.to_screen('[download] ' + reason)
c77495e3 1199 if self.params.get(break_opt, False):
1200 raise break_err()
8b0d7497 1201 return reason
fe7e0c98 1202
b6c45014
JMF
1203 @staticmethod
1204 def add_extra_info(info_dict, extra_info):
1205 '''Set the keys from extra_info in info dict if they are missing'''
1206 for key, value in extra_info.items():
1207 info_dict.setdefault(key, value)
1208
409e1828 1209 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1210 process=True, force_generic_extractor=False):
41d1cca3 1211 """
1212 Return a list with a dictionary for each video extracted.
1213
1214 Arguments:
1215 url -- URL to extract
1216
1217 Keyword arguments:
1218 download -- whether to download videos during extraction
1219 ie_key -- extractor key hint
1220 extra_info -- dictionary containing the extra values to add to each result
1221 process -- whether to resolve all unresolved references (URLs, playlist items),
1222 must be True for download to work.
1223 force_generic_extractor -- force using the generic extractor
1224 """
fe7e0c98 1225
409e1828 1226 if extra_info is None:
1227 extra_info = {}
1228
61aa5ba3 1229 if not ie_key and force_generic_extractor:
d22dec74
S
1230 ie_key = 'Generic'
1231
8222d8de 1232 if ie_key:
8b7491c8 1233 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1234 else:
1235 ies = self._ies
1236
8b7491c8 1237 for ie_key, ie in ies.items():
8222d8de
JMF
1238 if not ie.suitable(url):
1239 continue
1240
1241 if not ie.working():
6febd1c1
PH
1242 self.report_warning('The program functionality for this site has been marked as broken, '
1243 'and will probably not work.')
8222d8de 1244
1151c407 1245 temp_id = ie.get_temp_id(url)
a0566bbf 1246 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1247 self.to_screen("[%s] %s: has already been recorded in archive" % (
1248 ie_key, temp_id))
1249 break
8b7491c8 1250 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1251 else:
1252 self.report_error('no suitable InfoExtractor for URL %s' % url)
1253
8e5fecc8 1254 def __handle_extraction_exceptions(func):
1255
a0566bbf 1256 def wrapper(self, *args, **kwargs):
1257 try:
1258 return func(self, *args, **kwargs)
773f291d
S
1259 except GeoRestrictedError as e:
1260 msg = e.msg
1261 if e.countries:
1262 msg += '\nThis video is available in %s.' % ', '.join(
1263 map(ISO3166Utils.short2full, e.countries))
1264 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1265 self.report_error(msg)
fb043a6e 1266 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1267 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1268 except ThrottledDownload:
1269 self.to_stderr('\r')
1270 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1271 return wrapper(self, *args, **kwargs)
8e5fecc8 1272 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1273 raise
8222d8de 1274 except Exception as e:
b1940459 1275 if self.params.get('ignoreerrors'):
9b9c5355 1276 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1277 else:
1278 raise
a0566bbf 1279 return wrapper
1280
1281 @__handle_extraction_exceptions
58f197b7 1282 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1283 ie_result = ie.extract(url)
1284 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1285 return
1286 if isinstance(ie_result, list):
1287 # Backwards compatibility: old IE result format
1288 ie_result = {
1289 '_type': 'compat_list',
1290 'entries': ie_result,
1291 }
e37d0efb 1292 if extra_info.get('original_url'):
1293 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1294 self.add_default_extra_info(ie_result, ie, url)
1295 if process:
1296 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1297 else:
a0566bbf 1298 return ie_result
fe7e0c98 1299
ea38e55f 1300 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1301 if url is not None:
1302 self.add_extra_info(ie_result, {
1303 'webpage_url': url,
1304 'original_url': url,
1305 'webpage_url_basename': url_basename(url),
1306 })
1307 if ie is not None:
1308 self.add_extra_info(ie_result, {
1309 'extractor': ie.IE_NAME,
1310 'extractor_key': ie.ie_key(),
1311 })
ea38e55f 1312
58adec46 1313 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1314 """
1315 Take the result of the ie(may be modified) and resolve all unresolved
1316 references (URLs, playlist items).
1317
1318 It will also download the videos if 'download'.
1319 Returns the resolved ie_result.
1320 """
58adec46 1321 if extra_info is None:
1322 extra_info = {}
e8ee972c
PH
1323 result_type = ie_result.get('_type', 'video')
1324
057a5206 1325 if result_type in ('url', 'url_transparent'):
134c6ea8 1326 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1327 if ie_result.get('original_url'):
1328 extra_info.setdefault('original_url', ie_result['original_url'])
1329
057a5206 1330 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1331 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1332 or extract_flat is True):
ecb54191 1333 info_copy = ie_result.copy()
6033d980 1334 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1335 if ie and not ie_result.get('id'):
4614bc22 1336 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1337 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1338 self.add_extra_info(info_copy, extra_info)
ecb54191 1339 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1340 if self.params.get('force_write_download_archive', False):
1341 self.record_download_archive(info_copy)
e8ee972c
PH
1342 return ie_result
1343
8222d8de 1344 if result_type == 'video':
b6c45014 1345 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1346 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1347 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1348 if additional_urls:
e9f4ccd1 1349 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1350 if isinstance(additional_urls, compat_str):
1351 additional_urls = [additional_urls]
1352 self.to_screen(
1353 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1354 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1355 ie_result['additional_entries'] = [
1356 self.extract_info(
1357 url, download, extra_info,
1358 force_generic_extractor=self.params.get('force_generic_extractor'))
1359 for url in additional_urls
1360 ]
1361 return ie_result
8222d8de
JMF
1362 elif result_type == 'url':
1363 # We have to add extra_info to the results because it may be
1364 # contained in a playlist
07cce701 1365 return self.extract_info(
1366 ie_result['url'], download,
1367 ie_key=ie_result.get('ie_key'),
1368 extra_info=extra_info)
7fc3fa05
PH
1369 elif result_type == 'url_transparent':
1370 # Use the information from the embedding page
1371 info = self.extract_info(
1372 ie_result['url'], ie_key=ie_result.get('ie_key'),
1373 extra_info=extra_info, download=False, process=False)
1374
1640eb09
S
1375 # extract_info may return None when ignoreerrors is enabled and
1376 # extraction failed with an error, don't crash and return early
1377 # in this case
1378 if not info:
1379 return info
1380
412c617d
PH
1381 force_properties = dict(
1382 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1383 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1384 if f in force_properties:
1385 del force_properties[f]
1386 new_result = info.copy()
1387 new_result.update(force_properties)
7fc3fa05 1388
0563f7ac
S
1389 # Extracted info may not be a video result (i.e.
1390 # info.get('_type', 'video') != video) but rather an url or
1391 # url_transparent. In such cases outer metadata (from ie_result)
1392 # should be propagated to inner one (info). For this to happen
1393 # _type of info should be overridden with url_transparent. This
067aa17e 1394 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1395 if new_result.get('_type') == 'url':
1396 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1397
1398 return self.process_ie_result(
1399 new_result, download=download, extra_info=extra_info)
40fcba5e 1400 elif result_type in ('playlist', 'multi_video'):
30a074c2 1401 # Protect from infinite recursion due to recursively nested playlists
1402 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1403 webpage_url = ie_result['webpage_url']
1404 if webpage_url in self._playlist_urls:
7e85e872 1405 self.to_screen(
30a074c2 1406 '[download] Skipping already downloaded playlist: %s'
1407 % ie_result.get('title') or ie_result.get('id'))
1408 return
7e85e872 1409
30a074c2 1410 self._playlist_level += 1
1411 self._playlist_urls.add(webpage_url)
bc516a3f 1412 self._sanitize_thumbnails(ie_result)
30a074c2 1413 try:
1414 return self.__process_playlist(ie_result, download)
1415 finally:
1416 self._playlist_level -= 1
1417 if not self._playlist_level:
1418 self._playlist_urls.clear()
8222d8de 1419 elif result_type == 'compat_list':
c9bf4114
PH
1420 self.report_warning(
1421 'Extractor %s returned a compat_list result. '
1422 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1423
8222d8de 1424 def _fixup(r):
b868936c 1425 self.add_extra_info(r, {
1426 'extractor': ie_result['extractor'],
1427 'webpage_url': ie_result['webpage_url'],
1428 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1429 'extractor_key': ie_result['extractor_key'],
1430 })
8222d8de
JMF
1431 return r
1432 ie_result['entries'] = [
b6c45014 1433 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1434 for r in ie_result['entries']
1435 ]
1436 return ie_result
1437 else:
1438 raise Exception('Invalid result type: %s' % result_type)
1439
e92caff5 1440 def _ensure_dir_exists(self, path):
1441 return make_dir(path, self.report_error)
1442
30a074c2 1443 def __process_playlist(self, ie_result, download):
1444 # We process each entry in the playlist
1445 playlist = ie_result.get('title') or ie_result.get('id')
1446 self.to_screen('[download] Downloading playlist: %s' % playlist)
1447
498f5606 1448 if 'entries' not in ie_result:
1449 raise EntryNotInPlaylist()
1450 incomplete_entries = bool(ie_result.get('requested_entries'))
1451 if incomplete_entries:
1452 def fill_missing_entries(entries, indexes):
1453 ret = [None] * max(*indexes)
1454 for i, entry in zip(indexes, entries):
1455 ret[i - 1] = entry
1456 return ret
1457 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1458
30a074c2 1459 playlist_results = []
1460
56a8fb4f 1461 playliststart = self.params.get('playliststart', 1)
30a074c2 1462 playlistend = self.params.get('playlistend')
1463 # For backwards compatibility, interpret -1 as whole list
1464 if playlistend == -1:
1465 playlistend = None
1466
1467 playlistitems_str = self.params.get('playlist_items')
1468 playlistitems = None
1469 if playlistitems_str is not None:
1470 def iter_playlistitems(format):
1471 for string_segment in format.split(','):
1472 if '-' in string_segment:
1473 start, end = string_segment.split('-')
1474 for item in range(int(start), int(end) + 1):
1475 yield int(item)
1476 else:
1477 yield int(string_segment)
1478 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1479
1480 ie_entries = ie_result['entries']
56a8fb4f 1481 msg = (
1482 'Downloading %d videos' if not isinstance(ie_entries, list)
1483 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1484
1485 if isinstance(ie_entries, list):
1486 def get_entry(i):
1487 return ie_entries[i - 1]
1488 else:
1489 if not isinstance(ie_entries, PagedList):
1490 ie_entries = LazyList(ie_entries)
1491
1492 def get_entry(i):
1493 return YoutubeDL.__handle_extraction_exceptions(
1494 lambda self, i: ie_entries[i - 1]
1495 )(self, i)
50fed816 1496
56a8fb4f 1497 entries = []
ff1c7fc9 1498 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1499 for i in items:
1500 if i == 0:
1501 continue
56a8fb4f 1502 if playlistitems is None and playlistend is not None and playlistend < i:
1503 break
1504 entry = None
1505 try:
50fed816 1506 entry = get_entry(i)
56a8fb4f 1507 if entry is None:
498f5606 1508 raise EntryNotInPlaylist()
56a8fb4f 1509 except (IndexError, EntryNotInPlaylist):
1510 if incomplete_entries:
1511 raise EntryNotInPlaylist()
1512 elif not playlistitems:
1513 break
1514 entries.append(entry)
120fe513 1515 try:
1516 if entry is not None:
1517 self._match_entry(entry, incomplete=True, silent=True)
1518 except (ExistingVideoReached, RejectedVideoReached):
1519 break
56a8fb4f 1520 ie_result['entries'] = entries
30a074c2 1521
56a8fb4f 1522 # Save playlist_index before re-ordering
1523 entries = [
9e598870 1524 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1525 for i, entry in enumerate(entries, 1)
1526 if entry is not None]
1527 n_entries = len(entries)
498f5606 1528
498f5606 1529 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1530 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1531 ie_result['requested_entries'] = playlistitems
1532
1533 if self.params.get('allow_playlist_files', True):
1534 ie_copy = {
1535 'playlist': playlist,
1536 'playlist_id': ie_result.get('id'),
1537 'playlist_title': ie_result.get('title'),
1538 'playlist_uploader': ie_result.get('uploader'),
1539 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1540 'playlist_index': 0,
498f5606 1541 }
1542 ie_copy.update(dict(ie_result))
1543
80c03fa9 1544 if self._write_info_json('playlist', ie_result,
1545 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1546 return
1547 if self._write_description('playlist', ie_result,
1548 self.prepare_filename(ie_copy, 'pl_description')) is None:
1549 return
681de68e 1550 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1551 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1552
1553 if self.params.get('playlistreverse', False):
1554 entries = entries[::-1]
30a074c2 1555 if self.params.get('playlistrandom', False):
1556 random.shuffle(entries)
1557
1558 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1559
56a8fb4f 1560 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1561 failures = 0
1562 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1563 for i, entry_tuple in enumerate(entries, 1):
1564 playlist_index, entry = entry_tuple
81139999 1565 if 'playlist-index' in self.params.get('compat_opts', []):
1566 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1567 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1568 # This __x_forwarded_for_ip thing is a bit ugly but requires
1569 # minimal changes
1570 if x_forwarded_for:
1571 entry['__x_forwarded_for_ip'] = x_forwarded_for
1572 extra = {
1573 'n_entries': n_entries,
f59ae581 1574 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1575 'playlist_index': playlist_index,
1576 'playlist_autonumber': i,
30a074c2 1577 'playlist': playlist,
1578 'playlist_id': ie_result.get('id'),
1579 'playlist_title': ie_result.get('title'),
1580 'playlist_uploader': ie_result.get('uploader'),
1581 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1582 'extractor': ie_result['extractor'],
1583 'webpage_url': ie_result['webpage_url'],
1584 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1585 'extractor_key': ie_result['extractor_key'],
1586 }
1587
1588 if self._match_entry(entry, incomplete=True) is not None:
1589 continue
1590
1591 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1592 if not entry_result:
1593 failures += 1
1594 if failures >= max_failures:
1595 self.report_error(
1596 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1597 break
30a074c2 1598 # TODO: skip failed (empty) entries?
1599 playlist_results.append(entry_result)
1600 ie_result['entries'] = playlist_results
1601 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1602 return ie_result
1603
a0566bbf 1604 @__handle_extraction_exceptions
1605 def __process_iterable_entry(self, entry, download, extra_info):
1606 return self.process_ie_result(
1607 entry, download=download, extra_info=extra_info)
1608
67134eab
JMF
1609 def _build_format_filter(self, filter_spec):
1610 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1611
1612 OPERATORS = {
1613 '<': operator.lt,
1614 '<=': operator.le,
1615 '>': operator.gt,
1616 '>=': operator.ge,
1617 '=': operator.eq,
1618 '!=': operator.ne,
1619 }
67134eab 1620 operator_rex = re.compile(r'''(?x)\s*
187986a8 1621 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1622 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1623 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1624 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1625 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1626 if m:
1627 try:
1628 comparison_value = int(m.group('value'))
1629 except ValueError:
1630 comparison_value = parse_filesize(m.group('value'))
1631 if comparison_value is None:
1632 comparison_value = parse_filesize(m.group('value') + 'B')
1633 if comparison_value is None:
1634 raise ValueError(
1635 'Invalid value %r in format specification %r' % (
67134eab 1636 m.group('value'), filter_spec))
9ddb6925
S
1637 op = OPERATORS[m.group('op')]
1638
083c9df9 1639 if not m:
9ddb6925
S
1640 STR_OPERATORS = {
1641 '=': operator.eq,
10d33b34
YCH
1642 '^=': lambda attr, value: attr.startswith(value),
1643 '$=': lambda attr, value: attr.endswith(value),
1644 '*=': lambda attr, value: value in attr,
9ddb6925 1645 }
187986a8 1646 str_operator_rex = re.compile(r'''(?x)\s*
1647 (?P<key>[a-zA-Z0-9._-]+)\s*
1648 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1649 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1650 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1651 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1652 if m:
1653 comparison_value = m.group('value')
2cc779f4
S
1654 str_op = STR_OPERATORS[m.group('op')]
1655 if m.group('negation'):
e118a879 1656 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1657 else:
1658 op = str_op
083c9df9 1659
9ddb6925 1660 if not m:
187986a8 1661 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1662
1663 def _filter(f):
1664 actual_value = f.get(m.group('key'))
1665 if actual_value is None:
1666 return m.group('none_inclusive')
1667 return op(actual_value, comparison_value)
67134eab
JMF
1668 return _filter
1669
0017d9ad 1670 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1671
af0f7428
S
1672 def can_merge():
1673 merger = FFmpegMergerPP(self)
1674 return merger.available and merger.can_merge()
1675
91ebc640 1676 prefer_best = (
b7b04c78 1677 not self.params.get('simulate')
91ebc640 1678 and download
1679 and (
1680 not can_merge()
19807826 1681 or info_dict.get('is_live', False)
de6000d9 1682 or self.outtmpl_dict['default'] == '-'))
53ed7066 1683 compat = (
1684 prefer_best
1685 or self.params.get('allow_multiple_audio_streams', False)
1686 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1687
1688 return (
53ed7066 1689 'best/bestvideo+bestaudio' if prefer_best
1690 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1691 else 'bestvideo+bestaudio/best')
0017d9ad 1692
67134eab
JMF
1693 def build_format_selector(self, format_spec):
1694 def syntax_error(note, start):
1695 message = (
1696 'Invalid format specification: '
1697 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1698 return SyntaxError(message)
1699
1700 PICKFIRST = 'PICKFIRST'
1701 MERGE = 'MERGE'
1702 SINGLE = 'SINGLE'
0130afb7 1703 GROUP = 'GROUP'
67134eab
JMF
1704 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1705
91ebc640 1706 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1707 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1708
e8e73840 1709 check_formats = self.params.get('check_formats')
1710
67134eab
JMF
1711 def _parse_filter(tokens):
1712 filter_parts = []
1713 for type, string, start, _, _ in tokens:
1714 if type == tokenize.OP and string == ']':
1715 return ''.join(filter_parts)
1716 else:
1717 filter_parts.append(string)
1718
232541df 1719 def _remove_unused_ops(tokens):
17cc1534 1720 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1721 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1722 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1723 last_string, last_start, last_end, last_line = None, None, None, None
1724 for type, string, start, end, line in tokens:
1725 if type == tokenize.OP and string == '[':
1726 if last_string:
1727 yield tokenize.NAME, last_string, last_start, last_end, last_line
1728 last_string = None
1729 yield type, string, start, end, line
1730 # everything inside brackets will be handled by _parse_filter
1731 for type, string, start, end, line in tokens:
1732 yield type, string, start, end, line
1733 if type == tokenize.OP and string == ']':
1734 break
1735 elif type == tokenize.OP and string in ALLOWED_OPS:
1736 if last_string:
1737 yield tokenize.NAME, last_string, last_start, last_end, last_line
1738 last_string = None
1739 yield type, string, start, end, line
1740 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1741 if not last_string:
1742 last_string = string
1743 last_start = start
1744 last_end = end
1745 else:
1746 last_string += string
1747 if last_string:
1748 yield tokenize.NAME, last_string, last_start, last_end, last_line
1749
cf2ac6df 1750 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1751 selectors = []
1752 current_selector = None
1753 for type, string, start, _, _ in tokens:
1754 # ENCODING is only defined in python 3.x
1755 if type == getattr(tokenize, 'ENCODING', None):
1756 continue
1757 elif type in [tokenize.NAME, tokenize.NUMBER]:
1758 current_selector = FormatSelector(SINGLE, string, [])
1759 elif type == tokenize.OP:
cf2ac6df
JMF
1760 if string == ')':
1761 if not inside_group:
1762 # ')' will be handled by the parentheses group
1763 tokens.restore_last_token()
67134eab 1764 break
cf2ac6df 1765 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1766 tokens.restore_last_token()
1767 break
cf2ac6df
JMF
1768 elif inside_choice and string == ',':
1769 tokens.restore_last_token()
1770 break
1771 elif string == ',':
0a31a350
JMF
1772 if not current_selector:
1773 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1774 selectors.append(current_selector)
1775 current_selector = None
1776 elif string == '/':
d96d604e
JMF
1777 if not current_selector:
1778 raise syntax_error('"/" must follow a format selector', start)
67134eab 1779 first_choice = current_selector
cf2ac6df 1780 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1781 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1782 elif string == '[':
1783 if not current_selector:
1784 current_selector = FormatSelector(SINGLE, 'best', [])
1785 format_filter = _parse_filter(tokens)
1786 current_selector.filters.append(format_filter)
0130afb7
JMF
1787 elif string == '(':
1788 if current_selector:
1789 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1790 group = _parse_format_selection(tokens, inside_group=True)
1791 current_selector = FormatSelector(GROUP, group, [])
67134eab 1792 elif string == '+':
d03cfdce 1793 if not current_selector:
1794 raise syntax_error('Unexpected "+"', start)
1795 selector_1 = current_selector
1796 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1797 if not selector_2:
1798 raise syntax_error('Expected a selector', start)
1799 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1800 else:
1801 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1802 elif type == tokenize.ENDMARKER:
1803 break
1804 if current_selector:
1805 selectors.append(current_selector)
1806 return selectors
1807
f8d4ad9a 1808 def _merge(formats_pair):
1809 format_1, format_2 = formats_pair
1810
1811 formats_info = []
1812 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1813 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1814
1815 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1816 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1817 for (i, fmt_info) in enumerate(formats_info):
551f9388 1818 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1819 formats_info.pop(i)
1820 continue
1821 for aud_vid in ['audio', 'video']:
f8d4ad9a 1822 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1823 if get_no_more[aud_vid]:
1824 formats_info.pop(i)
f5510afe 1825 break
f8d4ad9a 1826 get_no_more[aud_vid] = True
1827
1828 if len(formats_info) == 1:
1829 return formats_info[0]
1830
1831 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1832 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1833
1834 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1835 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1836
1837 output_ext = self.params.get('merge_output_format')
1838 if not output_ext:
1839 if the_only_video:
1840 output_ext = the_only_video['ext']
1841 elif the_only_audio and not video_fmts:
1842 output_ext = the_only_audio['ext']
1843 else:
1844 output_ext = 'mkv'
1845
1846 new_dict = {
1847 'requested_formats': formats_info,
1848 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1849 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1850 'ext': output_ext,
1851 }
1852
1853 if the_only_video:
1854 new_dict.update({
1855 'width': the_only_video.get('width'),
1856 'height': the_only_video.get('height'),
1857 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1858 'fps': the_only_video.get('fps'),
1859 'vcodec': the_only_video.get('vcodec'),
1860 'vbr': the_only_video.get('vbr'),
1861 'stretched_ratio': the_only_video.get('stretched_ratio'),
1862 })
1863
1864 if the_only_audio:
1865 new_dict.update({
1866 'acodec': the_only_audio.get('acodec'),
1867 'abr': the_only_audio.get('abr'),
1868 })
1869
1870 return new_dict
1871
e8e73840 1872 def _check_formats(formats):
981052c9 1873 if not check_formats:
1874 yield from formats
b5ac45b1 1875 return
e8e73840 1876 for f in formats:
1877 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1878 temp_file = tempfile.NamedTemporaryFile(
1879 suffix='.tmp', delete=False,
1880 dir=self.get_output_path('temp') or None)
1881 temp_file.close()
fe346461 1882 try:
981052c9 1883 success, _ = self.dl(temp_file.name, f, test=True)
1884 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1885 success = False
fe346461 1886 finally:
21cd8fae 1887 if os.path.exists(temp_file.name):
1888 try:
1889 os.remove(temp_file.name)
1890 except OSError:
1891 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1892 if success:
e8e73840 1893 yield f
1894 else:
1895 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1896
67134eab 1897 def _build_selector_function(selector):
909d24dd 1898 if isinstance(selector, list): # ,
67134eab
JMF
1899 fs = [_build_selector_function(s) for s in selector]
1900
317f7ab6 1901 def selector_function(ctx):
67134eab 1902 for f in fs:
981052c9 1903 yield from f(ctx)
67134eab 1904 return selector_function
909d24dd 1905
1906 elif selector.type == GROUP: # ()
0130afb7 1907 selector_function = _build_selector_function(selector.selector)
909d24dd 1908
1909 elif selector.type == PICKFIRST: # /
67134eab
JMF
1910 fs = [_build_selector_function(s) for s in selector.selector]
1911
317f7ab6 1912 def selector_function(ctx):
67134eab 1913 for f in fs:
317f7ab6 1914 picked_formats = list(f(ctx))
67134eab
JMF
1915 if picked_formats:
1916 return picked_formats
1917 return []
67134eab 1918
981052c9 1919 elif selector.type == MERGE: # +
1920 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1921
1922 def selector_function(ctx):
1923 for pair in itertools.product(
1924 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1925 yield _merge(pair)
1926
909d24dd 1927 elif selector.type == SINGLE: # atom
598d185d 1928 format_spec = selector.selector or 'best'
909d24dd 1929
f8d4ad9a 1930 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1931 if format_spec == 'all':
1932 def selector_function(ctx):
981052c9 1933 yield from _check_formats(ctx['formats'])
f8d4ad9a 1934 elif format_spec == 'mergeall':
1935 def selector_function(ctx):
981052c9 1936 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1937 if not formats:
1938 return
921b76ca 1939 merged_format = formats[-1]
1940 for f in formats[-2::-1]:
f8d4ad9a 1941 merged_format = _merge((merged_format, f))
1942 yield merged_format
909d24dd 1943
1944 else:
e8e73840 1945 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1946 mobj = re.match(
1947 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1948 format_spec)
1949 if mobj is not None:
1950 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1951 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1952 format_type = (mobj.group('type') or [None])[0]
1953 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1954 format_modified = mobj.group('mod') is not None
909d24dd 1955
1956 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1957 _filter_f = (
eff63539 1958 (lambda f: f.get('%scodec' % format_type) != 'none')
1959 if format_type and format_modified # bv*, ba*, wv*, wa*
1960 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1961 if format_type # bv, ba, wv, wa
1962 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1963 if not format_modified # b, w
8326b00a 1964 else lambda f: True) # b*, w*
1965 filter_f = lambda f: _filter_f(f) and (
1966 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1967 else:
b11c04a8 1968 if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension
1969 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
1970 elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension
1971 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
1972 elif format_spec in ('mhtml', ): # storyboards extension
1973 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
1974 else:
1975 filter_f = (lambda f: f.get('format_id') == format_spec) # id
909d24dd 1976
1977 def selector_function(ctx):
1978 formats = list(ctx['formats'])
909d24dd 1979 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1980 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1981 # for extractors with incomplete formats (audio only (soundcloud)
1982 # or video only (imgur)) best/worst will fallback to
1983 # best/worst {video,audio}-only format
e8e73840 1984 matches = formats
981052c9 1985 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1986 try:
e8e73840 1987 yield matches[format_idx - 1]
981052c9 1988 except IndexError:
1989 return
083c9df9 1990
67134eab 1991 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1992
317f7ab6
S
1993 def final_selector(ctx):
1994 ctx_copy = copy.deepcopy(ctx)
67134eab 1995 for _filter in filters:
317f7ab6
S
1996 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1997 return selector_function(ctx_copy)
67134eab 1998 return final_selector
083c9df9 1999
67134eab 2000 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2001 try:
232541df 2002 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2003 except tokenize.TokenError:
2004 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2005
2006 class TokenIterator(object):
2007 def __init__(self, tokens):
2008 self.tokens = tokens
2009 self.counter = 0
2010
2011 def __iter__(self):
2012 return self
2013
2014 def __next__(self):
2015 if self.counter >= len(self.tokens):
2016 raise StopIteration()
2017 value = self.tokens[self.counter]
2018 self.counter += 1
2019 return value
2020
2021 next = __next__
2022
2023 def restore_last_token(self):
2024 self.counter -= 1
2025
2026 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2027 return _build_selector_function(parsed_selector)
a9c58ad9 2028
e5660ee6
JMF
2029 def _calc_headers(self, info_dict):
2030 res = std_headers.copy()
2031
2032 add_headers = info_dict.get('http_headers')
2033 if add_headers:
2034 res.update(add_headers)
2035
2036 cookies = self._calc_cookies(info_dict)
2037 if cookies:
2038 res['Cookie'] = cookies
2039
0016b84e
S
2040 if 'X-Forwarded-For' not in res:
2041 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2042 if x_forwarded_for_ip:
2043 res['X-Forwarded-For'] = x_forwarded_for_ip
2044
e5660ee6
JMF
2045 return res
2046
2047 def _calc_cookies(self, info_dict):
5c2266df 2048 pr = sanitized_Request(info_dict['url'])
e5660ee6 2049 self.cookiejar.add_cookie_header(pr)
662435f7 2050 return pr.get_header('Cookie')
e5660ee6 2051
b0249bca 2052 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2053 thumbnails = info_dict.get('thumbnails')
2054 if thumbnails is None:
2055 thumbnail = info_dict.get('thumbnail')
2056 if thumbnail:
2057 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2058 if thumbnails:
2059 thumbnails.sort(key=lambda t: (
2060 t.get('preference') if t.get('preference') is not None else -1,
2061 t.get('width') if t.get('width') is not None else -1,
2062 t.get('height') if t.get('height') is not None else -1,
2063 t.get('id') if t.get('id') is not None else '',
2064 t.get('url')))
b0249bca 2065
0ba692ac 2066 def thumbnail_tester():
2067 if self.params.get('check_formats'):
cca80fe6 2068 test_all = True
2069 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2070 else:
cca80fe6 2071 test_all = False
0ba692ac 2072 to_screen = self.write_debug
2073
2074 def test_thumbnail(t):
cca80fe6 2075 if not test_all and not t.get('_test_url'):
2076 return True
0ba692ac 2077 to_screen('Testing thumbnail %s' % t['id'])
2078 try:
2079 self.urlopen(HEADRequest(t['url']))
2080 except network_exceptions as err:
2081 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2082 t['id'], t['url'], error_to_compat_str(err)))
2083 return False
2084 return True
2085
2086 return test_thumbnail
b0249bca 2087
bc516a3f 2088 for i, t in enumerate(thumbnails):
bc516a3f 2089 if t.get('id') is None:
2090 t['id'] = '%d' % i
b0249bca 2091 if t.get('width') and t.get('height'):
2092 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2093 t['url'] = sanitize_url(t['url'])
0ba692ac 2094
2095 if self.params.get('check_formats') is not False:
2096 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2097 else:
2098 info_dict['thumbnails'] = thumbnails
bc516a3f 2099
dd82ffea
JMF
2100 def process_video_result(self, info_dict, download=True):
2101 assert info_dict.get('_type', 'video') == 'video'
2102
bec1fad2
PH
2103 if 'id' not in info_dict:
2104 raise ExtractorError('Missing "id" field in extractor result')
2105 if 'title' not in info_dict:
1151c407 2106 raise ExtractorError('Missing "title" field in extractor result',
2107 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2108
c9969434
S
2109 def report_force_conversion(field, field_not, conversion):
2110 self.report_warning(
2111 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2112 % (field, field_not, conversion))
2113
2114 def sanitize_string_field(info, string_field):
2115 field = info.get(string_field)
2116 if field is None or isinstance(field, compat_str):
2117 return
2118 report_force_conversion(string_field, 'a string', 'string')
2119 info[string_field] = compat_str(field)
2120
2121 def sanitize_numeric_fields(info):
2122 for numeric_field in self._NUMERIC_FIELDS:
2123 field = info.get(numeric_field)
2124 if field is None or isinstance(field, compat_numeric_types):
2125 continue
2126 report_force_conversion(numeric_field, 'numeric', 'int')
2127 info[numeric_field] = int_or_none(field)
2128
2129 sanitize_string_field(info_dict, 'id')
2130 sanitize_numeric_fields(info_dict)
be6217b2 2131
dd82ffea
JMF
2132 if 'playlist' not in info_dict:
2133 # It isn't part of a playlist
2134 info_dict['playlist'] = None
2135 info_dict['playlist_index'] = None
2136
bc516a3f 2137 self._sanitize_thumbnails(info_dict)
d5519808 2138
536a55da 2139 thumbnail = info_dict.get('thumbnail')
bc516a3f 2140 thumbnails = info_dict.get('thumbnails')
536a55da
S
2141 if thumbnail:
2142 info_dict['thumbnail'] = sanitize_url(thumbnail)
2143 elif thumbnails:
d5519808
PH
2144 info_dict['thumbnail'] = thumbnails[-1]['url']
2145
ae30b840 2146 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2147 info_dict['display_id'] = info_dict['id']
2148
10db0d2f 2149 for ts_key, date_key in (
2150 ('timestamp', 'upload_date'),
2151 ('release_timestamp', 'release_date'),
2152 ):
2153 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2154 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2155 # see http://bugs.python.org/issue1646728)
2156 try:
2157 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2158 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2159 except (ValueError, OverflowError, OSError):
2160 pass
9d2ecdbc 2161
ae30b840 2162 live_keys = ('is_live', 'was_live')
2163 live_status = info_dict.get('live_status')
2164 if live_status is None:
2165 for key in live_keys:
2166 if info_dict.get(key) is False:
2167 continue
2168 if info_dict.get(key):
2169 live_status = key
2170 break
2171 if all(info_dict.get(key) is False for key in live_keys):
2172 live_status = 'not_live'
2173 if live_status:
2174 info_dict['live_status'] = live_status
2175 for key in live_keys:
2176 if info_dict.get(key) is None:
2177 info_dict[key] = (live_status == key)
2178
33d2fc2f
S
2179 # Auto generate title fields corresponding to the *_number fields when missing
2180 # in order to always have clean titles. This is very common for TV series.
2181 for field in ('chapter', 'season', 'episode'):
2182 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2183 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2184
05108a49
S
2185 for cc_kind in ('subtitles', 'automatic_captions'):
2186 cc = info_dict.get(cc_kind)
2187 if cc:
2188 for _, subtitle in cc.items():
2189 for subtitle_format in subtitle:
2190 if subtitle_format.get('url'):
2191 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2192 if subtitle_format.get('ext') is None:
2193 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2194
2195 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2196 subtitles = info_dict.get('subtitles')
4bba3716 2197
360e1ca5 2198 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2199 info_dict['id'], subtitles, automatic_captions)
a504ced0 2200
dd82ffea
JMF
2201 # We now pick which formats have to be downloaded
2202 if info_dict.get('formats') is None:
2203 # There's only one format available
2204 formats = [info_dict]
2205 else:
2206 formats = info_dict['formats']
2207
e0493e90 2208 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2209 if not self.params.get('allow_unplayable_formats'):
2210 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2211
db95dc13 2212 if not formats:
1151c407 2213 self.raise_no_formats(info_dict)
db95dc13 2214
73af5cc8
S
2215 def is_wellformed(f):
2216 url = f.get('url')
a5ac0c47 2217 if not url:
73af5cc8
S
2218 self.report_warning(
2219 '"url" field is missing or empty - skipping format, '
2220 'there is an error in extractor')
a5ac0c47
S
2221 return False
2222 if isinstance(url, bytes):
2223 sanitize_string_field(f, 'url')
2224 return True
73af5cc8
S
2225
2226 # Filter out malformed formats for better extraction robustness
2227 formats = list(filter(is_wellformed, formats))
2228
181c7053
S
2229 formats_dict = {}
2230
dd82ffea 2231 # We check that all the formats have the format and format_id fields
db95dc13 2232 for i, format in enumerate(formats):
c9969434
S
2233 sanitize_string_field(format, 'format_id')
2234 sanitize_numeric_fields(format)
dcf77cf1 2235 format['url'] = sanitize_url(format['url'])
e74e3b63 2236 if not format.get('format_id'):
8016c922 2237 format['format_id'] = compat_str(i)
e2effb08
S
2238 else:
2239 # Sanitize format_id from characters used in format selector expression
ec85ded8 2240 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2241 format_id = format['format_id']
2242 if format_id not in formats_dict:
2243 formats_dict[format_id] = []
2244 formats_dict[format_id].append(format)
2245
2246 # Make sure all formats have unique format_id
2247 for format_id, ambiguous_formats in formats_dict.items():
2248 if len(ambiguous_formats) > 1:
2249 for i, format in enumerate(ambiguous_formats):
2250 format['format_id'] = '%s-%d' % (format_id, i)
2251
2252 for i, format in enumerate(formats):
8c51aa65 2253 if format.get('format') is None:
6febd1c1 2254 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2255 id=format['format_id'],
2256 res=self.format_resolution(format),
b868936c 2257 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2258 )
c1002e96 2259 # Automatically determine file extension if missing
5b1d8575 2260 if format.get('ext') is None:
cce929ea 2261 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2262 # Automatically determine protocol if missing (useful for format
2263 # selection purposes)
6f0be937 2264 if format.get('protocol') is None:
b5559424 2265 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2266 # Add HTTP headers, so that external programs can use them from the
2267 # json output
2268 full_format_info = info_dict.copy()
2269 full_format_info.update(format)
2270 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2271 # Remove private housekeeping stuff
2272 if '__x_forwarded_for_ip' in info_dict:
2273 del info_dict['__x_forwarded_for_ip']
dd82ffea 2274
4bcc7bd1 2275 # TODO Central sorting goes here
99e206d5 2276
88acdbc2 2277 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2278 # only set the 'formats' fields if the original info_dict list them
2279 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2280 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2281 # which can't be exported to json
b3d9ef88 2282 info_dict['formats'] = formats
4ec82a72 2283
2284 info_dict, _ = self.pre_process(info_dict)
2285
b7b04c78 2286 if self.params.get('list_thumbnails'):
2287 self.list_thumbnails(info_dict)
2288 if self.params.get('listformats'):
86c66b2d 2289 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2290 self.to_screen('%s has no formats' % info_dict['id'])
2291 else:
2292 self.list_formats(info_dict)
b7b04c78 2293 if self.params.get('listsubtitles'):
2294 if 'automatic_captions' in info_dict:
2295 self.list_subtitles(
2296 info_dict['id'], automatic_captions, 'automatic captions')
2297 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2298 list_only = self.params.get('simulate') is None and (
2299 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2300 if list_only:
b7b04c78 2301 # Without this printing, -F --print-json will not work
169dbde9 2302 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2303 return
2304
187986a8 2305 format_selector = self.format_selector
2306 if format_selector is None:
0017d9ad 2307 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2308 self.write_debug('Default format spec: %s' % req_format)
187986a8 2309 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2310
2311 # While in format selection we may need to have an access to the original
2312 # format set in order to calculate some metrics or do some processing.
2313 # For now we need to be able to guess whether original formats provided
2314 # by extractor are incomplete or not (i.e. whether extractor provides only
2315 # video-only or audio-only formats) for proper formats selection for
2316 # extractors with such incomplete formats (see
067aa17e 2317 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2318 # Since formats may be filtered during format selection and may not match
2319 # the original formats the results may be incorrect. Thus original formats
2320 # or pre-calculated metrics should be passed to format selection routines
2321 # as well.
2322 # We will pass a context object containing all necessary additional data
2323 # instead of just formats.
2324 # This fixes incorrect format selection issue (see
067aa17e 2325 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2326 incomplete_formats = (
317f7ab6 2327 # All formats are video-only or
3089bc74 2328 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2329 # all formats are audio-only
3089bc74 2330 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2331
2332 ctx = {
2333 'formats': formats,
2334 'incomplete_formats': incomplete_formats,
2335 }
2336
2337 formats_to_download = list(format_selector(ctx))
dd82ffea 2338 if not formats_to_download:
b7da73eb 2339 if not self.params.get('ignore_no_formats_error'):
1151c407 2340 raise ExtractorError('Requested format is not available', expected=True,
2341 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2342 else:
2343 self.report_warning('Requested format is not available')
4513a41a
A
2344 # Process what we can, even without any available formats.
2345 self.process_info(dict(info_dict))
b7da73eb 2346 elif download:
2347 self.to_screen(
07cce701 2348 '[info] %s: Downloading %d format(s): %s' % (
2349 info_dict['id'], len(formats_to_download),
2350 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2351 for fmt in formats_to_download:
dd82ffea 2352 new_info = dict(info_dict)
4ec82a72 2353 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2354 new_info['__original_infodict'] = info_dict
b7da73eb 2355 new_info.update(fmt)
dd82ffea
JMF
2356 self.process_info(new_info)
2357 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2358 if formats_to_download:
2359 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2360 return info_dict
2361
98c70d6f 2362 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2363 """Select the requested subtitles and their format"""
98c70d6f
JMF
2364 available_subs = {}
2365 if normal_subtitles and self.params.get('writesubtitles'):
2366 available_subs.update(normal_subtitles)
2367 if automatic_captions and self.params.get('writeautomaticsub'):
2368 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2369 if lang not in available_subs:
2370 available_subs[lang] = cap_info
2371
4d171848
JMF
2372 if (not self.params.get('writesubtitles') and not
2373 self.params.get('writeautomaticsub') or not
2374 available_subs):
2375 return None
a504ced0 2376
c32b0aab 2377 all_sub_langs = available_subs.keys()
a504ced0 2378 if self.params.get('allsubtitles', False):
c32b0aab 2379 requested_langs = all_sub_langs
2380 elif self.params.get('subtitleslangs', False):
77c4a9ef 2381 # A list is used so that the order of languages will be the same as
2382 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2383 requested_langs = []
2384 for lang_re in self.params.get('subtitleslangs'):
2385 if lang_re == 'all':
2386 requested_langs.extend(all_sub_langs)
c32b0aab 2387 continue
77c4a9ef 2388 discard = lang_re[0] == '-'
c32b0aab 2389 if discard:
77c4a9ef 2390 lang_re = lang_re[1:]
2391 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2392 if discard:
2393 for lang in current_langs:
77c4a9ef 2394 while lang in requested_langs:
2395 requested_langs.remove(lang)
c32b0aab 2396 else:
77c4a9ef 2397 requested_langs.extend(current_langs)
2398 requested_langs = orderedSet(requested_langs)
c32b0aab 2399 elif 'en' in available_subs:
2400 requested_langs = ['en']
a504ced0 2401 else:
c32b0aab 2402 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2403 if requested_langs:
2404 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2405
2406 formats_query = self.params.get('subtitlesformat', 'best')
2407 formats_preference = formats_query.split('/') if formats_query else []
2408 subs = {}
2409 for lang in requested_langs:
2410 formats = available_subs.get(lang)
2411 if formats is None:
2412 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2413 continue
a504ced0
JMF
2414 for ext in formats_preference:
2415 if ext == 'best':
2416 f = formats[-1]
2417 break
2418 matches = list(filter(lambda f: f['ext'] == ext, formats))
2419 if matches:
2420 f = matches[-1]
2421 break
2422 else:
2423 f = formats[-1]
2424 self.report_warning(
2425 'No subtitle format found matching "%s" for language %s, '
2426 'using %s' % (formats_query, lang, f['ext']))
2427 subs[lang] = f
2428 return subs
2429
d06daf23 2430 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2431 def print_mandatory(field, actual_field=None):
2432 if actual_field is None:
2433 actual_field = field
d06daf23 2434 if (self.params.get('force%s' % field, False)
53c18592 2435 and (not incomplete or info_dict.get(actual_field) is not None)):
2436 self.to_stdout(info_dict[actual_field])
d06daf23
S
2437
2438 def print_optional(field):
2439 if (self.params.get('force%s' % field, False)
2440 and info_dict.get(field) is not None):
2441 self.to_stdout(info_dict[field])
2442
53c18592 2443 info_dict = info_dict.copy()
2444 if filename is not None:
2445 info_dict['filename'] = filename
2446 if info_dict.get('requested_formats') is not None:
2447 # For RTMP URLs, also include the playpath
2448 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2449 elif 'url' in info_dict:
2450 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2451
2b8a2973 2452 if self.params.get('forceprint') or self.params.get('forcejson'):
2453 self.post_extract(info_dict)
53c18592 2454 for tmpl in self.params.get('forceprint', []):
819e0531 2455 self.to_stdout(self.evaluate_outtmpl(
2456 f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict))
53c18592 2457
d06daf23
S
2458 print_mandatory('title')
2459 print_mandatory('id')
53c18592 2460 print_mandatory('url', 'urls')
d06daf23
S
2461 print_optional('thumbnail')
2462 print_optional('description')
53c18592 2463 print_optional('filename')
b868936c 2464 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2465 self.to_stdout(formatSeconds(info_dict['duration']))
2466 print_mandatory('format')
53c18592 2467
2b8a2973 2468 if self.params.get('forcejson'):
6e84b215 2469 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2470
e8e73840 2471 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2472 if not info.get('url'):
1151c407 2473 self.raise_no_formats(info, True)
e8e73840 2474
2475 if test:
2476 verbose = self.params.get('verbose')
2477 params = {
2478 'test': True,
2479 'quiet': not verbose,
2480 'verbose': verbose,
2481 'noprogress': not verbose,
2482 'nopart': True,
2483 'skip_unavailable_fragments': False,
2484 'keep_fragments': False,
2485 'overwrites': True,
2486 '_no_ytdl_file': True,
2487 }
2488 else:
2489 params = self.params
96fccc10 2490 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2491 if not test:
2492 for ph in self._progress_hooks:
2493 fd.add_progress_hook(ph)
18e674b4 2494 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2495 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2496 new_info = dict(info)
2497 if new_info.get('http_headers') is None:
2498 new_info['http_headers'] = self._calc_headers(new_info)
2499 return fd.download(name, new_info, subtitle)
2500
8222d8de
JMF
2501 def process_info(self, info_dict):
2502 """Process a single resolved IE result."""
2503
2504 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2505
2506 max_downloads = self.params.get('max_downloads')
2507 if max_downloads is not None:
2508 if self._num_downloads >= int(max_downloads):
2509 raise MaxDownloadsReached()
8222d8de 2510
d06daf23 2511 # TODO: backward compatibility, to be removed
8222d8de 2512 info_dict['fulltitle'] = info_dict['title']
8222d8de 2513
4513a41a 2514 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2515 info_dict['format'] = info_dict['ext']
2516
c77495e3 2517 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2518 return
2519
277d6ff5 2520 self.post_extract(info_dict)
fd288278 2521 self._num_downloads += 1
8222d8de 2522
dcf64d43 2523 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2524 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2525 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2526 files_to_move = {}
8222d8de
JMF
2527
2528 # Forced printings
4513a41a 2529 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2530
b7b04c78 2531 if self.params.get('simulate'):
2d30509f 2532 if self.params.get('force_write_download_archive', False):
2533 self.record_download_archive(info_dict)
2d30509f 2534 # Do nothing else if in simulate mode
8222d8de
JMF
2535 return
2536
de6000d9 2537 if full_filename is None:
8222d8de 2538 return
e92caff5 2539 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2540 return
e92caff5 2541 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2542 return
2543
80c03fa9 2544 if self._write_description('video', info_dict,
2545 self.prepare_filename(info_dict, 'description')) is None:
2546 return
2547
2548 sub_files = self._write_subtitles(info_dict, temp_filename)
2549 if sub_files is None:
2550 return
2551 files_to_move.update(dict(sub_files))
2552
2553 thumb_files = self._write_thumbnails(
2554 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2555 if thumb_files is None:
2556 return
2557 files_to_move.update(dict(thumb_files))
8222d8de 2558
80c03fa9 2559 infofn = self.prepare_filename(info_dict, 'infojson')
2560 _infojson_written = self._write_info_json('video', info_dict, infofn)
2561 if _infojson_written:
2562 info_dict['__infojson_filename'] = infofn
2563 elif _infojson_written is None:
2564 return
2565
2566 # Note: Annotations are deprecated
2567 annofn = None
1fb07d10 2568 if self.params.get('writeannotations', False):
de6000d9 2569 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2570 if annofn:
e92caff5 2571 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2572 return
0c3d0f51 2573 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2574 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2575 elif not info_dict.get('annotations'):
2576 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2577 else:
2578 try:
6febd1c1 2579 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2580 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2581 annofile.write(info_dict['annotations'])
2582 except (KeyError, TypeError):
6febd1c1 2583 self.report_warning('There are no annotations to write.')
7b6fefc9 2584 except (OSError, IOError):
6febd1c1 2585 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2586 return
1fb07d10 2587
732044af 2588 # Write internet shortcut files
2589 url_link = webloc_link = desktop_link = False
2590 if self.params.get('writelink', False):
2591 if sys.platform == "darwin": # macOS.
2592 webloc_link = True
2593 elif sys.platform.startswith("linux"):
2594 desktop_link = True
2595 else: # if sys.platform in ['win32', 'cygwin']:
2596 url_link = True
2597 if self.params.get('writeurllink', False):
2598 url_link = True
2599 if self.params.get('writewebloclink', False):
2600 webloc_link = True
2601 if self.params.get('writedesktoplink', False):
2602 desktop_link = True
2603
2604 if url_link or webloc_link or desktop_link:
2605 if 'webpage_url' not in info_dict:
2606 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2607 return
2608 ascii_url = iri_to_uri(info_dict['webpage_url'])
2609
2610 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2611 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2612 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2613 self.to_screen('[info] Internet shortcut is already present')
2614 else:
2615 try:
2616 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2617 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2618 template_vars = {'url': ascii_url}
2619 if embed_filename:
2620 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2621 linkfile.write(template % template_vars)
2622 except (OSError, IOError):
2623 self.report_error('Cannot write internet shortcut ' + linkfn)
2624 return False
2625 return True
2626
2627 if url_link:
2628 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2629 return
2630 if webloc_link:
2631 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2632 return
2633 if desktop_link:
2634 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2635 return
2636
56d868db 2637 try:
2638 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2639 except PostProcessingError as err:
2640 self.report_error('Preprocessing: %s' % str(err))
2641 return
2642
732044af 2643 must_record_download_archive = False
56d868db 2644 if self.params.get('skip_download', False):
2645 info_dict['filepath'] = temp_filename
2646 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2647 info_dict['__files_to_move'] = files_to_move
2648 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2649 else:
2650 # Download
b868936c 2651 info_dict.setdefault('__postprocessors', [])
4340deca 2652 try:
0202b52a 2653
6b591b29 2654 def existing_file(*filepaths):
2655 ext = info_dict.get('ext')
2656 final_ext = self.params.get('final_ext', ext)
2657 existing_files = []
2658 for file in orderedSet(filepaths):
2659 if final_ext != ext:
2660 converted = replace_extension(file, final_ext, ext)
2661 if os.path.exists(encodeFilename(converted)):
2662 existing_files.append(converted)
2663 if os.path.exists(encodeFilename(file)):
2664 existing_files.append(file)
2665
2666 if not existing_files or self.params.get('overwrites', False):
2667 for file in orderedSet(existing_files):
2668 self.report_file_delete(file)
2669 os.remove(encodeFilename(file))
2670 return None
2671
6b591b29 2672 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2673 return existing_files[0]
0202b52a 2674
2675 success = True
4340deca 2676 if info_dict.get('requested_formats') is not None:
81cd954a
S
2677
2678 def compatible_formats(formats):
d03cfdce 2679 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2680 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2681 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2682 if len(video_formats) > 2 or len(audio_formats) > 2:
2683 return False
2684
81cd954a 2685 # Check extension
d03cfdce 2686 exts = set(format.get('ext') for format in formats)
2687 COMPATIBLE_EXTS = (
2688 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2689 set(('webm',)),
2690 )
2691 for ext_sets in COMPATIBLE_EXTS:
2692 if ext_sets.issuperset(exts):
2693 return True
81cd954a
S
2694 # TODO: Check acodec/vcodec
2695 return False
2696
2697 requested_formats = info_dict['requested_formats']
0202b52a 2698 old_ext = info_dict['ext']
3b297919 2699 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2700 info_dict['ext'] = 'mkv'
2701 self.report_warning(
2702 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2703 new_ext = info_dict['ext']
0202b52a 2704
124bc071 2705 def correct_ext(filename, ext=new_ext):
96fccc10 2706 if filename == '-':
2707 return filename
0202b52a 2708 filename_real_ext = os.path.splitext(filename)[1][1:]
2709 filename_wo_ext = (
2710 os.path.splitext(filename)[0]
124bc071 2711 if filename_real_ext in (old_ext, new_ext)
0202b52a 2712 else filename)
124bc071 2713 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2714
38c6902b 2715 # Ensure filename always has a correct extension for successful merge
0202b52a 2716 full_filename = correct_ext(full_filename)
2717 temp_filename = correct_ext(temp_filename)
2718 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2719 info_dict['__real_download'] = False
18e674b4 2720
2721 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2722 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2723 info_dict['protocol'] = _protocols.pop()
d5fe04f5 2724 directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
dbf5416a 2725 if dl_filename is not None:
6c7274ec 2726 self.report_file_already_downloaded(dl_filename)
96fccc10 2727 elif (directly_mergable and get_suitable_downloader(
a46a815b 2728 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2729 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2730 success, real_download = self.dl(temp_filename, info_dict)
2731 info_dict['__real_download'] = real_download
18e674b4 2732 else:
2733 downloaded = []
2734 merger = FFmpegMergerPP(self)
2735 if self.params.get('allow_unplayable_formats'):
2736 self.report_warning(
2737 'You have requested merging of multiple formats '
2738 'while also allowing unplayable formats to be downloaded. '
2739 'The formats won\'t be merged to prevent data corruption.')
2740 elif not merger.available:
2741 self.report_warning(
2742 'You have requested merging of multiple formats but ffmpeg is not installed. '
2743 'The formats won\'t be merged.')
2744
96fccc10 2745 if temp_filename == '-':
2746 reason = ('using a downloader other than ffmpeg' if directly_mergable
2747 else 'but the formats are incompatible for simultaneous download' if merger.available
2748 else 'but ffmpeg is not installed')
2749 self.report_warning(
2750 f'You have requested downloading multiple formats to stdout {reason}. '
2751 'The formats will be streamed one after the other')
2752 fname = temp_filename
dbf5416a 2753 for f in requested_formats:
2754 new_info = dict(info_dict)
2755 del new_info['requested_formats']
2756 new_info.update(f)
96fccc10 2757 if temp_filename != '-':
124bc071 2758 fname = prepend_extension(
2759 correct_ext(temp_filename, new_info['ext']),
2760 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2761 if not self._ensure_dir_exists(fname):
2762 return
a21e0ab1 2763 f['filepath'] = fname
96fccc10 2764 downloaded.append(fname)
dbf5416a 2765 partial_success, real_download = self.dl(fname, new_info)
2766 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2767 success = success and partial_success
2768 if merger.available and not self.params.get('allow_unplayable_formats'):
2769 info_dict['__postprocessors'].append(merger)
2770 info_dict['__files_to_merge'] = downloaded
2771 # Even if there were no downloads, it is being merged only now
2772 info_dict['__real_download'] = True
2773 else:
2774 for file in downloaded:
2775 files_to_move[file] = None
4340deca
P
2776 else:
2777 # Just a single file
0202b52a 2778 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2779 if dl_filename is None or dl_filename == temp_filename:
2780 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2781 # So we should try to resume the download
e8e73840 2782 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2783 info_dict['__real_download'] = real_download
6c7274ec 2784 else:
2785 self.report_file_already_downloaded(dl_filename)
0202b52a 2786
0202b52a 2787 dl_filename = dl_filename or temp_filename
c571435f 2788 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2789
3158150c 2790 except network_exceptions as err:
7960b056 2791 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2792 return
2793 except (OSError, IOError) as err:
2794 raise UnavailableVideoError(err)
2795 except (ContentTooShortError, ) as err:
2796 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2797 return
8222d8de 2798
de6000d9 2799 if success and full_filename != '-':
f17f8651 2800
fd7cfb64 2801 def fixup():
2802 do_fixup = True
2803 fixup_policy = self.params.get('fixup')
2804 vid = info_dict['id']
2805
2806 if fixup_policy in ('ignore', 'never'):
2807 return
2808 elif fixup_policy == 'warn':
2809 do_fixup = False
f89b3e2d 2810 elif fixup_policy != 'force':
2811 assert fixup_policy in ('detect_or_warn', None)
2812 if not info_dict.get('__real_download'):
2813 do_fixup = False
fd7cfb64 2814
2815 def ffmpeg_fixup(cndn, msg, cls):
2816 if not cndn:
2817 return
2818 if not do_fixup:
2819 self.report_warning(f'{vid}: {msg}')
2820 return
2821 pp = cls(self)
2822 if pp.available:
2823 info_dict['__postprocessors'].append(pp)
2824 else:
2825 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2826
2827 stretched_ratio = info_dict.get('stretched_ratio')
2828 ffmpeg_fixup(
2829 stretched_ratio not in (1, None),
2830 f'Non-uniform pixel ratio {stretched_ratio}',
2831 FFmpegFixupStretchedPP)
2832
2833 ffmpeg_fixup(
2834 (info_dict.get('requested_formats') is None
2835 and info_dict.get('container') == 'm4a_dash'
2836 and info_dict.get('ext') == 'm4a'),
2837 'writing DASH m4a. Only some players support this container',
2838 FFmpegFixupM4aPP)
2839
2840 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2841 if 'protocol' in info_dict else None)
84726743 2842 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2843 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2844 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2845 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2846
2847 fixup()
8222d8de 2848 try:
23c1a667 2849 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2850 except PostProcessingError as err:
2851 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2852 return
ab8e5e51
AM
2853 try:
2854 for ph in self._post_hooks:
23c1a667 2855 ph(info_dict['filepath'])
ab8e5e51
AM
2856 except Exception as err:
2857 self.report_error('post hooks: %s' % str(err))
2858 return
2d30509f 2859 must_record_download_archive = True
2860
2861 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2862 self.record_download_archive(info_dict)
c3e6ffba 2863 max_downloads = self.params.get('max_downloads')
2864 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2865 raise MaxDownloadsReached()
8222d8de
JMF
2866
2867 def download(self, url_list):
2868 """Download a given list of URLs."""
de6000d9 2869 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2870 if (len(url_list) > 1
2871 and outtmpl != '-'
2872 and '%' not in outtmpl
2873 and self.params.get('max_downloads') != 1):
acd69589 2874 raise SameFileError(outtmpl)
8222d8de
JMF
2875
2876 for url in url_list:
2877 try:
5f6a1245 2878 # It also downloads the videos
61aa5ba3
S
2879 res = self.extract_info(
2880 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2881 except UnavailableVideoError:
6febd1c1 2882 self.report_error('unable to download video')
8222d8de 2883 except MaxDownloadsReached:
8f18aca8 2884 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2885 raise
2886 except ExistingVideoReached:
8f18aca8 2887 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2888 raise
2889 except RejectedVideoReached:
8f18aca8 2890 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2891 raise
63e0be34
PH
2892 else:
2893 if self.params.get('dump_single_json', False):
277d6ff5 2894 self.post_extract(res)
6e84b215 2895 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2896
2897 return self._download_retcode
2898
1dcc4c0c 2899 def download_with_info_file(self, info_filename):
31bd3925
JMF
2900 with contextlib.closing(fileinput.FileInput(
2901 [info_filename], mode='r',
2902 openhook=fileinput.hook_encoded('utf-8'))) as f:
2903 # FileInput doesn't have a read method, we can't call json.load
8012d892 2904 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2905 try:
2906 self.process_ie_result(info, download=True)
d3f62c19 2907 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2908 webpage_url = info.get('webpage_url')
2909 if webpage_url is not None:
6febd1c1 2910 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2911 return self.download([webpage_url])
2912 else:
2913 raise
2914 return self._download_retcode
1dcc4c0c 2915
cb202fd2 2916 @staticmethod
8012d892 2917 def sanitize_info(info_dict, remove_private_keys=False):
2918 ''' Sanitize the infodict for converting to json '''
3ad56b42 2919 if info_dict is None:
2920 return info_dict
6e84b215 2921 info_dict.setdefault('epoch', int(time.time()))
2922 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2923 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2924 if remove_private_keys:
6e84b215 2925 remove_keys |= {
2926 'requested_formats', 'requested_subtitles', 'requested_entries',
2927 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2928 }
ae8f99e6 2929 empty_values = (None, {}, [], set(), tuple())
2930 reject = lambda k, v: k not in keep_keys and (
2931 k.startswith('_') or k in remove_keys or v in empty_values)
2932 else:
ae8f99e6 2933 reject = lambda k, v: k in remove_keys
5226731e 2934 filter_fn = lambda obj: (
b0249bca 2935 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2936 else obj if not isinstance(obj, dict)
ae8f99e6 2937 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2938 return filter_fn(info_dict)
cb202fd2 2939
8012d892 2940 @staticmethod
2941 def filter_requested_info(info_dict, actually_filter=True):
2942 ''' Alias of sanitize_info for backward compatibility '''
2943 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2944
dcf64d43 2945 def run_pp(self, pp, infodict):
5bfa4862 2946 files_to_delete = []
dcf64d43 2947 if '__files_to_move' not in infodict:
2948 infodict['__files_to_move'] = {}
b1940459 2949 try:
2950 files_to_delete, infodict = pp.run(infodict)
2951 except PostProcessingError as e:
2952 # Must be True and not 'only_download'
2953 if self.params.get('ignoreerrors') is True:
2954 self.report_error(e)
2955 return infodict
2956 raise
2957
5bfa4862 2958 if not files_to_delete:
dcf64d43 2959 return infodict
5bfa4862 2960 if self.params.get('keepvideo', False):
2961 for f in files_to_delete:
dcf64d43 2962 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2963 else:
2964 for old_filename in set(files_to_delete):
2965 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2966 try:
2967 os.remove(encodeFilename(old_filename))
2968 except (IOError, OSError):
2969 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2970 if old_filename in infodict['__files_to_move']:
2971 del infodict['__files_to_move'][old_filename]
2972 return infodict
5bfa4862 2973
277d6ff5 2974 @staticmethod
2975 def post_extract(info_dict):
2976 def actual_post_extract(info_dict):
2977 if info_dict.get('_type') in ('playlist', 'multi_video'):
2978 for video_dict in info_dict.get('entries', {}):
b050d210 2979 actual_post_extract(video_dict or {})
277d6ff5 2980 return
2981
07cce701 2982 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2983 extra = post_extractor().items()
2984 info_dict.update(extra)
07cce701 2985 info_dict.pop('__post_extractor', None)
277d6ff5 2986
4ec82a72 2987 original_infodict = info_dict.get('__original_infodict') or {}
2988 original_infodict.update(extra)
2989 original_infodict.pop('__post_extractor', None)
2990
b050d210 2991 actual_post_extract(info_dict or {})
277d6ff5 2992
56d868db 2993 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2994 info = dict(ie_info)
56d868db 2995 info['__files_to_move'] = files_to_move or {}
2996 for pp in self._pps[key]:
dcf64d43 2997 info = self.run_pp(pp, info)
56d868db 2998 return info, info.pop('__files_to_move', None)
5bfa4862 2999
dcf64d43 3000 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3001 """Run all the postprocessors on the given file."""
3002 info = dict(ie_info)
3003 info['filepath'] = filename
dcf64d43 3004 info['__files_to_move'] = files_to_move or {}
0202b52a 3005
56d868db 3006 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3007 info = self.run_pp(pp, info)
3008 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3009 del info['__files_to_move']
56d868db 3010 for pp in self._pps['after_move']:
dcf64d43 3011 info = self.run_pp(pp, info)
23c1a667 3012 return info
c1c9a79c 3013
5db07df6 3014 def _make_archive_id(self, info_dict):
e9fef7ee
S
3015 video_id = info_dict.get('id')
3016 if not video_id:
3017 return
5db07df6
PH
3018 # Future-proof against any change in case
3019 # and backwards compatibility with prior versions
e9fef7ee 3020 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3021 if extractor is None:
1211bb6d
S
3022 url = str_or_none(info_dict.get('url'))
3023 if not url:
3024 return
e9fef7ee 3025 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3026 for ie_key, ie in self._ies.items():
1211bb6d 3027 if ie.suitable(url):
8b7491c8 3028 extractor = ie_key
e9fef7ee
S
3029 break
3030 else:
3031 return
d0757229 3032 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3033
3034 def in_download_archive(self, info_dict):
3035 fn = self.params.get('download_archive')
3036 if fn is None:
3037 return False
3038
3039 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3040 if not vid_id:
7012b23c 3041 return False # Incomplete video information
5db07df6 3042
a45e8619 3043 return vid_id in self.archive
c1c9a79c
PH
3044
3045 def record_download_archive(self, info_dict):
3046 fn = self.params.get('download_archive')
3047 if fn is None:
3048 return
5db07df6
PH
3049 vid_id = self._make_archive_id(info_dict)
3050 assert vid_id
c1c9a79c 3051 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3052 archive_file.write(vid_id + '\n')
a45e8619 3053 self.archive.add(vid_id)
dd82ffea 3054
8c51aa65 3055 @staticmethod
8abeeb94 3056 def format_resolution(format, default='unknown'):
9359f3d4 3057 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3058 return 'audio only'
f49d89ee
PH
3059 if format.get('resolution') is not None:
3060 return format['resolution']
35615307
DA
3061 if format.get('width') and format.get('height'):
3062 res = '%dx%d' % (format['width'], format['height'])
3063 elif format.get('height'):
3064 res = '%sp' % format['height']
3065 elif format.get('width'):
388ae76b 3066 res = '%dx?' % format['width']
8c51aa65 3067 else:
8abeeb94 3068 res = default
9359f3d4
F
3069 if format.get('vcodec') == 'none' and format.get('acodec') == 'none':
3070 res += ' (images)'
8c51aa65
JMF
3071 return res
3072
c57f7757
PH
3073 def _format_note(self, fdict):
3074 res = ''
3075 if fdict.get('ext') in ['f4f', 'f4m']:
3076 res += '(unsupported) '
32f90364
PH
3077 if fdict.get('language'):
3078 if res:
3079 res += ' '
9016d76f 3080 res += '[%s] ' % fdict['language']
c57f7757
PH
3081 if fdict.get('format_note') is not None:
3082 res += fdict['format_note'] + ' '
3083 if fdict.get('tbr') is not None:
3084 res += '%4dk ' % fdict['tbr']
3085 if fdict.get('container') is not None:
3086 if res:
3087 res += ', '
3088 res += '%s container' % fdict['container']
3089bc74
S
3089 if (fdict.get('vcodec') is not None
3090 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3091 if res:
3092 res += ', '
3093 res += fdict['vcodec']
91c7271a 3094 if fdict.get('vbr') is not None:
c57f7757
PH
3095 res += '@'
3096 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3097 res += 'video@'
3098 if fdict.get('vbr') is not None:
3099 res += '%4dk' % fdict['vbr']
fbb21cf5 3100 if fdict.get('fps') is not None:
5d583bdf
S
3101 if res:
3102 res += ', '
3103 res += '%sfps' % fdict['fps']
c57f7757
PH
3104 if fdict.get('acodec') is not None:
3105 if res:
3106 res += ', '
3107 if fdict['acodec'] == 'none':
3108 res += 'video only'
3109 else:
3110 res += '%-5s' % fdict['acodec']
3111 elif fdict.get('abr') is not None:
3112 if res:
3113 res += ', '
3114 res += 'audio'
3115 if fdict.get('abr') is not None:
3116 res += '@%3dk' % fdict['abr']
3117 if fdict.get('asr') is not None:
3118 res += ' (%5dHz)' % fdict['asr']
3119 if fdict.get('filesize') is not None:
3120 if res:
3121 res += ', '
3122 res += format_bytes(fdict['filesize'])
9732d77e
PH
3123 elif fdict.get('filesize_approx') is not None:
3124 if res:
3125 res += ', '
3126 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3127 return res
91c7271a 3128
c57f7757 3129 def list_formats(self, info_dict):
94badb25 3130 formats = info_dict.get('formats', [info_dict])
53ed7066 3131 new_format = (
3132 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3133 and self.params.get('listformats_table', True) is not False)
76d321f6 3134 if new_format:
3135 table = [
3136 [
3137 format_field(f, 'format_id'),
3138 format_field(f, 'ext'),
3139 self.format_resolution(f),
3140 format_field(f, 'fps', '%d'),
3141 '|',
3142 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3143 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3144 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3145 '|',
3146 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3147 format_field(f, 'vbr', '%4dk'),
3148 format_field(f, 'acodec', default='unknown').replace('none', ''),
3149 format_field(f, 'abr', '%3dk'),
3150 format_field(f, 'asr', '%5dHz'),
3f698246 3151 ', '.join(filter(None, (
3152 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3153 format_field(f, 'language', '[%s]'),
3154 format_field(f, 'format_note'),
3155 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3156 ))),
3f698246 3157 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3158 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3159 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3160 else:
3161 table = [
3162 [
3163 format_field(f, 'format_id'),
3164 format_field(f, 'ext'),
3165 self.format_resolution(f),
3166 self._format_note(f)]
3167 for f in formats
3168 if f.get('preference') is None or f['preference'] >= -1000]
3169 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3170
cfb56d1a 3171 self.to_screen(
169dbde9 3172 '[info] Available formats for %s:' % info_dict['id'])
3173 self.to_stdout(render_table(
bc97cdae 3174 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3175
3176 def list_thumbnails(self, info_dict):
b0249bca 3177 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3178 if not thumbnails:
b7b72db9 3179 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3180 return
cfb56d1a
PH
3181
3182 self.to_screen(
3183 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3184 self.to_stdout(render_table(
cfb56d1a
PH
3185 ['ID', 'width', 'height', 'URL'],
3186 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3187
360e1ca5 3188 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3189 if not subtitles:
360e1ca5 3190 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3191 return
a504ced0 3192 self.to_screen(
edab9dbf 3193 'Available %s for %s:' % (name, video_id))
2412044c 3194
3195 def _row(lang, formats):
49c258e1 3196 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3197 if len(set(names)) == 1:
7aee40c1 3198 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3199 return [lang, ', '.join(names), ', '.join(exts)]
3200
169dbde9 3201 self.to_stdout(render_table(
2412044c 3202 ['Language', 'Name', 'Formats'],
3203 [_row(lang, formats) for lang, formats in subtitles.items()],
3204 hideEmpty=True))
a504ced0 3205
dca08720
PH
3206 def urlopen(self, req):
3207 """ Start an HTTP download """
82d8a8b6 3208 if isinstance(req, compat_basestring):
67dda517 3209 req = sanitized_Request(req)
19a41fc6 3210 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3211
3212 def print_debug_header(self):
3213 if not self.params.get('verbose'):
3214 return
62fec3b2 3215
c6afed48
PH
3216 stdout_encoding = getattr(
3217 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3218 encoding_str = (
734f90bb
PH
3219 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3220 locale.getpreferredencoding(),
3221 sys.getfilesystemencoding(),
c6afed48 3222 stdout_encoding,
b0472057 3223 self.get_encoding()))
4192b51c 3224 write_string(encoding_str, encoding=None)
734f90bb 3225
4c88ff87 3226 source = detect_variant()
3227 self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
e0986e31 3228 if _LAZY_LOADER:
f74980cb 3229 self._write_string('[debug] Lazy loading extractors enabled\n')
3ae5e797 3230 if plugin_extractors or plugin_postprocessors:
3231 self._write_string('[debug] Plugins: %s\n' % [
3232 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3233 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3234 if self.params.get('compat_opts'):
3235 self._write_string(
3236 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3237 try:
3238 sp = subprocess.Popen(
3239 ['git', 'rev-parse', '--short', 'HEAD'],
3240 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3241 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3242 out, err = process_communicate_or_kill(sp)
dca08720
PH
3243 out = out.decode().strip()
3244 if re.match('[0-9a-f]+', out):
f74980cb 3245 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3246 except Exception:
dca08720
PH
3247 try:
3248 sys.exc_clear()
70a1165b 3249 except Exception:
dca08720 3250 pass
b300cda4
S
3251
3252 def python_implementation():
3253 impl_name = platform.python_implementation()
3254 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3255 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3256 return impl_name
3257
e5813e53 3258 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3259 platform.python_version(),
3260 python_implementation(),
3261 platform.architecture()[0],
b300cda4 3262 platform_name()))
d28b5171 3263
73fac4e9 3264 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3265 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3266 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3267 exe_str = ', '.join(
2831b468 3268 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3269 ) or 'none'
d28b5171 3270 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3271
2831b468 3272 from .downloader.websocket import has_websockets
3273 from .postprocessor.embedthumbnail import has_mutagen
3274 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3275
ad3dc496 3276 lib_str = ', '.join(sorted(filter(None, (
edf65256 3277 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3278 has_websockets and 'websockets',
3279 has_mutagen and 'mutagen',
3280 SQLITE_AVAILABLE and 'sqlite',
3281 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3282 )))) or 'none'
2831b468 3283 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3284
dca08720
PH
3285 proxy_map = {}
3286 for handler in self._opener.handlers:
3287 if hasattr(handler, 'proxies'):
3288 proxy_map.update(handler.proxies)
734f90bb 3289 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3290
58b1f00d
PH
3291 if self.params.get('call_home', False):
3292 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3293 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3294 return
58b1f00d
PH
3295 latest_version = self.urlopen(
3296 'https://yt-dl.org/latest/version').read().decode('utf-8')
3297 if version_tuple(latest_version) > version_tuple(__version__):
3298 self.report_warning(
3299 'You are using an outdated version (newest version: %s)! '
3300 'See https://yt-dl.org/update if you need help updating.' %
3301 latest_version)
3302
e344693b 3303 def _setup_opener(self):
6ad14cab 3304 timeout_val = self.params.get('socket_timeout')
19a41fc6 3305 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3306
982ee69a 3307 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3308 opts_cookiefile = self.params.get('cookiefile')
3309 opts_proxy = self.params.get('proxy')
3310
982ee69a 3311 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3312
6a3f4c3f 3313 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3314 if opts_proxy is not None:
3315 if opts_proxy == '':
3316 proxies = {}
3317 else:
3318 proxies = {'http': opts_proxy, 'https': opts_proxy}
3319 else:
3320 proxies = compat_urllib_request.getproxies()
067aa17e 3321 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3322 if 'http' in proxies and 'https' not in proxies:
3323 proxies['https'] = proxies['http']
91410c9b 3324 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3325
3326 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3327 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3328 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3329 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3330 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3331
3332 # When passing our own FileHandler instance, build_opener won't add the
3333 # default FileHandler and allows us to disable the file protocol, which
3334 # can be used for malicious purposes (see
067aa17e 3335 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3336 file_handler = compat_urllib_request.FileHandler()
3337
3338 def file_open(*args, **kwargs):
7a5c1cfe 3339 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3340 file_handler.file_open = file_open
3341
3342 opener = compat_urllib_request.build_opener(
fca6dba8 3343 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3344
dca08720
PH
3345 # Delete the default user-agent header, which would otherwise apply in
3346 # cases where our custom HTTP handler doesn't come into play
067aa17e 3347 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3348 opener.addheaders = []
3349 self._opener = opener
62fec3b2
PH
3350
3351 def encode(self, s):
3352 if isinstance(s, bytes):
3353 return s # Already encoded
3354
3355 try:
3356 return s.encode(self.get_encoding())
3357 except UnicodeEncodeError as err:
3358 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3359 raise
3360
3361 def get_encoding(self):
3362 encoding = self.params.get('encoding')
3363 if encoding is None:
3364 encoding = preferredencoding()
3365 return encoding
ec82d85a 3366
80c03fa9 3367 def _write_info_json(self, label, ie_result, infofn):
3368 ''' Write infojson and returns True = written, False = skip, None = error '''
3369 if not self.params.get('writeinfojson'):
3370 return False
3371 elif not infofn:
3372 self.write_debug(f'Skipping writing {label} infojson')
3373 return False
3374 elif not self._ensure_dir_exists(infofn):
3375 return None
3376 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3377 self.to_screen(f'[info] {label.title()} metadata is already present')
3378 else:
3379 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3380 try:
3381 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3382 except (OSError, IOError):
3383 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3384 return None
3385 return True
3386
3387 def _write_description(self, label, ie_result, descfn):
3388 ''' Write description and returns True = written, False = skip, None = error '''
3389 if not self.params.get('writedescription'):
3390 return False
3391 elif not descfn:
3392 self.write_debug(f'Skipping writing {label} description')
3393 return False
3394 elif not self._ensure_dir_exists(descfn):
3395 return None
3396 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3397 self.to_screen(f'[info] {label.title()} description is already present')
3398 elif ie_result.get('description') is None:
3399 self.report_warning(f'There\'s no {label} description to write')
3400 return False
3401 else:
3402 try:
3403 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3404 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3405 descfile.write(ie_result['description'])
3406 except (OSError, IOError):
3407 self.report_error(f'Cannot write {label} description file {descfn}')
3408 return None
3409 return True
3410
3411 def _write_subtitles(self, info_dict, filename):
3412 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3413 ret = []
3414 subtitles = info_dict.get('requested_subtitles')
3415 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3416 # subtitles download errors are already managed as troubles in relevant IE
3417 # that way it will silently go on when used with unsupporting IE
3418 return ret
3419
3420 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3421 if not sub_filename_base:
3422 self.to_screen('[info] Skipping writing video subtitles')
3423 return ret
3424 for sub_lang, sub_info in subtitles.items():
3425 sub_format = sub_info['ext']
3426 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3427 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3428 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3429 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3430 sub_info['filepath'] = sub_filename
3431 ret.append((sub_filename, sub_filename_final))
3432 continue
3433
3434 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3435 if sub_info.get('data') is not None:
3436 try:
3437 # Use newline='' to prevent conversion of newline characters
3438 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3439 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3440 subfile.write(sub_info['data'])
3441 sub_info['filepath'] = sub_filename
3442 ret.append((sub_filename, sub_filename_final))
3443 continue
3444 except (OSError, IOError):
3445 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3446 return None
3447
3448 try:
3449 sub_copy = sub_info.copy()
3450 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3451 self.dl(sub_filename, sub_copy, subtitle=True)
3452 sub_info['filepath'] = sub_filename
3453 ret.append((sub_filename, sub_filename_final))
3454 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3455 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3456 continue
519804a9 3457 return ret
80c03fa9 3458
3459 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3460 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3461 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3462 thumbnails, ret = [], []
6c4fd172 3463 if write_all or self.params.get('writethumbnail', False):
0202b52a 3464 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3465 multiple = write_all and len(thumbnails) > 1
ec82d85a 3466
80c03fa9 3467 if thumb_filename_base is None:
3468 thumb_filename_base = filename
3469 if thumbnails and not thumb_filename_base:
3470 self.write_debug(f'Skipping writing {label} thumbnail')
3471 return ret
3472
981052c9 3473 for t in thumbnails[::-1]:
80c03fa9 3474 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3475 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3476 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3477 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3478
80c03fa9 3479 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3480 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3481 t['filepath'] = thumb_filename
80c03fa9 3482 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
ec82d85a 3483 else:
80c03fa9 3484 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3485 try:
3486 uf = self.urlopen(t['url'])
80c03fa9 3487 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3488 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3489 shutil.copyfileobj(uf, thumbf)
80c03fa9 3490 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3491 t['filepath'] = thumb_filename
3158150c 3492 except network_exceptions as err:
80c03fa9 3493 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3494 if ret and not write_all:
3495 break
0202b52a 3496 return ret