]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[postprocessor] Add plugin support
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
961ea474
S
30from string import ascii_letters
31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
edf65256 38 compat_pycrypto_AES,
7d1eb38a 39 compat_shlex_quote,
ce02ed60 40 compat_str,
67134eab 41 compat_tokenize_tokenize,
ce02ed60
PH
42 compat_urllib_error,
43 compat_urllib_request,
8b172c2e 44 compat_urllib_request_DataHandler,
8c25f81b 45)
982ee69a 46from .cookies import load_cookies
8c25f81b 47from .utils import (
eedb7ba5
S
48 age_restricted,
49 args_to_str,
ce02ed60
PH
50 ContentTooShortError,
51 date_from_str,
52 DateRange,
acd69589 53 DEFAULT_OUTTMPL,
ce02ed60 54 determine_ext,
b5559424 55 determine_protocol,
732044af 56 DOT_DESKTOP_LINK_TEMPLATE,
57 DOT_URL_LINK_TEMPLATE,
58 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 59 DownloadError,
c0384f22 60 encode_compat_str,
ce02ed60 61 encodeFilename,
498f5606 62 EntryNotInPlaylist,
a06916d9 63 error_to_compat_str,
8b0d7497 64 ExistingVideoReached,
590bc6f6 65 expand_path,
ce02ed60 66 ExtractorError,
e29663c6 67 float_or_none,
02dbf93f 68 format_bytes,
76d321f6 69 format_field,
901130bb 70 STR_FORMAT_RE_TMPL,
71 STR_FORMAT_TYPES,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
56a8fb4f 78 LazyList,
ce02ed60 79 locked_file,
0202b52a 80 make_dir,
dca08720 81 make_HTTPS_handler,
ce02ed60 82 MaxDownloadsReached,
3158150c 83 network_exceptions,
cd6fc19e 84 orderedSet,
a06916d9 85 OUTTMPL_TYPES,
b7ab0590 86 PagedList,
083c9df9 87 parse_filesize,
91410c9b 88 PerRequestProxyHandler,
dca08720 89 platform_name,
eedb7ba5 90 PostProcessingError,
ce02ed60 91 preferredencoding,
eedb7ba5 92 prepend_extension,
a06916d9 93 process_communicate_or_kill,
51fb4995 94 register_socks_protocols,
a06916d9 95 RejectedVideoReached,
cfb56d1a 96 render_table,
eedb7ba5 97 replace_extension,
ce02ed60
PH
98 SameFileError,
99 sanitize_filename,
1bb5c511 100 sanitize_path,
dcf77cf1 101 sanitize_url,
67dda517 102 sanitized_Request,
e5660ee6 103 std_headers,
1211bb6d 104 str_or_none,
e29663c6 105 strftime_or_none,
ce02ed60 106 subtitles_filename,
51d9739f 107 ThrottledDownload,
732044af 108 to_high_limit_path,
324ad820 109 traverse_obj,
6033d980 110 try_get,
ce02ed60 111 UnavailableVideoError,
29eb5174 112 url_basename,
7d1eb38a 113 variadic,
58b1f00d 114 version_tuple,
ce02ed60
PH
115 write_json_file,
116 write_string,
6a3f4c3f 117 YoutubeDLCookieProcessor,
dca08720 118 YoutubeDLHandler,
fca6dba8 119 YoutubeDLRedirectHandler,
ce02ed60 120)
a0e07d31 121from .cache import Cache
52a8a1e1 122from .extractor import (
123 gen_extractor_classes,
124 get_info_extractor,
125 _LAZY_LOADER,
3ae5e797 126 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 127)
4c54b89e 128from .extractor.openload import PhantomJSwrapper
52a8a1e1 129from .downloader import (
dbf5416a 130 FFmpegFD,
52a8a1e1 131 get_suitable_downloader,
132 shorten_protocol_name
133)
4c83c967 134from .downloader.rtmp import rtmpdump_version
4f026faf 135from .postprocessor import (
e36d50c5 136 get_postprocessor,
137 FFmpegFixupDurationPP,
f17f8651 138 FFmpegFixupM3u8PP,
62cd676c 139 FFmpegFixupM4aPP,
6271f1ca 140 FFmpegFixupStretchedPP,
e36d50c5 141 FFmpegFixupTimestampPP,
4f026faf
PH
142 FFmpegMergerPP,
143 FFmpegPostProcessor,
0202b52a 144 MoveFilesAfterDownloadPP,
3ae5e797 145 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 146)
4c88ff87 147from .update import detect_variant
dca08720 148from .version import __version__
8222d8de 149
e9c0cdd3
YCH
150if compat_os_name == 'nt':
151 import ctypes
152
2459b6e1 153
8222d8de
JMF
154class YoutubeDL(object):
155 """YoutubeDL class.
156
157 YoutubeDL objects are the ones responsible of downloading the
158 actual video file and writing it to disk if the user has requested
159 it, among some other tasks. In most cases there should be one per
160 program. As, given a video URL, the downloader doesn't know how to
161 extract all the needed information, task that InfoExtractors do, it
162 has to pass the URL to one of them.
163
164 For this, YoutubeDL objects have a method that allows
165 InfoExtractors to be registered in a given order. When it is passed
166 a URL, the YoutubeDL object handles it to the first InfoExtractor it
167 finds that reports being able to handle it. The InfoExtractor extracts
168 all the information about the video or videos the URL refers to, and
169 YoutubeDL process the extracted information, possibly using a File
170 Downloader to download the video.
171
172 YoutubeDL objects accept a lot of parameters. In order not to saturate
173 the object constructor with arguments, it receives a dictionary of
174 options instead. These options are available through the params
175 attribute for the InfoExtractors to use. The YoutubeDL also
176 registers itself as the downloader in charge for the InfoExtractors
177 that are added to it, so this is a "mutual registration".
178
179 Available options:
180
181 username: Username for authentication purposes.
182 password: Password for authentication purposes.
180940e0 183 videopassword: Password for accessing a video.
1da50aa3
S
184 ap_mso: Adobe Pass multiple-system operator identifier.
185 ap_username: Multiple-system operator account username.
186 ap_password: Multiple-system operator account password.
8222d8de
JMF
187 usenetrc: Use netrc for authentication instead.
188 verbose: Print additional info to stdout.
189 quiet: Do not print messages to stdout.
ad8915b7 190 no_warnings: Do not print out anything for warnings.
53c18592 191 forceprint: A list of templates to force print
192 forceurl: Force printing final URL. (Deprecated)
193 forcetitle: Force printing title. (Deprecated)
194 forceid: Force printing ID. (Deprecated)
195 forcethumbnail: Force printing thumbnail URL. (Deprecated)
196 forcedescription: Force printing description. (Deprecated)
197 forcefilename: Force printing final filename. (Deprecated)
198 forceduration: Force printing duration. (Deprecated)
8694c600 199 forcejson: Force printing info_dict as JSON.
63e0be34
PH
200 dump_single_json: Force printing the info_dict of the whole playlist
201 (or video) as a single JSON line.
c25228e5 202 force_write_download_archive: Force writing download archive regardless
203 of 'skip_download' or 'simulate'.
b7b04c78 204 simulate: Do not download the video files. If unset (or None),
205 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 206 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 207 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 208 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
209 extracting metadata even if the video is not actually
210 available for download (experimental)
c25228e5 211 format_sort: How to sort the video formats. see "Sorting Formats"
212 for more details.
213 format_sort_force: Force the given format_sort. see "Sorting Formats"
214 for more details.
215 allow_multiple_video_streams: Allow multiple video streams to be merged
216 into a single file
217 allow_multiple_audio_streams: Allow multiple audio streams to be merged
218 into a single file
0ba692ac 219 check_formats Whether to test if the formats are downloadable.
220 Can be True (check all), False (check none)
221 or None (check only if requested by extractor)
4524baf0 222 paths: Dictionary of output paths. The allowed keys are 'home'
223 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 224 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 225 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 226 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
227 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
228 restrictfilenames: Do not allow "&" and spaces in file names
229 trim_file_name: Limit length of filename (extension excluded)
4524baf0 230 windowsfilenames: Force the filenames to be windows compatible
b1940459 231 ignoreerrors: Do not stop on download/postprocessing errors.
232 Can be 'only_download' to ignore only download errors.
233 Default is 'only_download' for CLI, but False for API
26e2805c 234 skip_playlist_after_errors: Number of allowed failures until the rest of
235 the playlist is skipped
d22dec74 236 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 237 overwrites: Overwrite all video and metadata files if True,
238 overwrite only non-video files if None
239 and don't overwrite any file if False
34488702 240 For compatibility with youtube-dl,
241 "nooverwrites" may also be used instead
8222d8de
JMF
242 playliststart: Playlist item to start at.
243 playlistend: Playlist item to end at.
c14e88f0 244 playlist_items: Specific indices of playlist to download.
ff815fe6 245 playlistreverse: Download playlist items in reverse order.
75822ca7 246 playlistrandom: Download playlist items in random order.
8222d8de
JMF
247 matchtitle: Download only matching titles.
248 rejecttitle: Reject downloads for matching titles.
8bf9319e 249 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
250 logtostderr: Log messages to stderr instead of stdout.
251 writedescription: Write the video description to a .description file
252 writeinfojson: Write the video description to a .info.json file
75d43ca0 253 clean_infojson: Remove private fields from the infojson
34488702 254 getcomments: Extract video comments. This will not be written to disk
06167fbb 255 unless writeinfojson is also given
1fb07d10 256 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 257 writethumbnail: Write the thumbnail image to a file
c25228e5 258 allow_playlist_files: Whether to write playlists' description, infojson etc
259 also to disk when using the 'write*' options
ec82d85a 260 write_all_thumbnails: Write all thumbnail formats to files
732044af 261 writelink: Write an internet shortcut file, depending on the
262 current platform (.url/.webloc/.desktop)
263 writeurllink: Write a Windows internet shortcut file (.url)
264 writewebloclink: Write a macOS internet shortcut file (.webloc)
265 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 266 writesubtitles: Write the video subtitles to a file
741dd8ea 267 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 268 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 269 Downloads all the subtitles of the video
0b7f3118 270 (requires writesubtitles or writeautomaticsub)
8222d8de 271 listsubtitles: Lists all available subtitles for the video
a504ced0 272 subtitlesformat: The format code for subtitles
c32b0aab 273 subtitleslangs: List of languages of the subtitles to download (can be regex).
274 The list may contain "all" to refer to all the available
275 subtitles. The language can be prefixed with a "-" to
276 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
277 keepvideo: Keep the video file after post-processing
278 daterange: A DateRange object, download only if the upload_date is in the range.
279 skip_download: Skip the actual download of the video file
c35f9e72 280 cachedir: Location of the cache files in the filesystem.
a0e07d31 281 False to disable filesystem cache.
47192f92 282 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
283 age_limit: An integer representing the user's age in years.
284 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
285 min_views: An integer representing the minimum view count the video
286 must have in order to not be skipped.
287 Videos without view count information are always
288 downloaded. None for no limit.
289 max_views: An integer representing the maximum view count.
290 Videos that are more popular than that are not
291 downloaded.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
295 Videos already present in the file are not downloaded
296 again.
8a51f564 297 break_on_existing: Stop the download process after attempting to download a
298 file that is in the archive.
299 break_on_reject: Stop the download process when encountering a video that
300 has been filtered out.
301 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
302 cookiesfrombrowser: A tuple containing the name of the browser and the profile
303 name/path from where cookies are loaded.
304 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 305 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
306 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
307 At the moment, this is only supported by YouTube.
a1ee09e8 308 proxy: URL of the proxy server to use
38cce791 309 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 310 on geo-restricted sites.
e344693b 311 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
312 bidi_workaround: Work around buggy terminals without bidirectional text
313 support, using fridibi
a0ddb8a2 314 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 315 include_ads: Download ads as well
04b4d394
PH
316 default_search: Prepend this string if an input url is not valid.
317 'auto' for elaborate guessing
62fec3b2 318 encoding: Use this encoding instead of the system-specified.
e8ee972c 319 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
320 Pass in 'in_playlist' to only show this behavior for
321 playlist items.
4f026faf 322 postprocessors: A list of dictionaries, each with an entry
71b640cc 323 * key: The name of the postprocessor. See
7a5c1cfe 324 yt_dlp/postprocessor/__init__.py for a list.
56d868db 325 * when: When to run the postprocessor. Can be one of
326 pre_process|before_dl|post_process|after_move.
327 Assumed to be 'post_process' if not given
ab8e5e51
AM
328 post_hooks: A list of functions that get called as the final step
329 for each video file, after all postprocessors have been
330 called. The filename will be passed as the only argument.
71b640cc
PH
331 progress_hooks: A list of functions that get called on download
332 progress, with a dictionary with the entries
5cda4eda 333 * status: One of "downloading", "error", or "finished".
ee69b99a 334 Check this first and ignore unknown values.
3ba7740d 335 * info_dict: The extracted info_dict
71b640cc 336
5cda4eda 337 If status is one of "downloading", or "finished", the
ee69b99a
PH
338 following properties may also be present:
339 * filename: The final filename (always present)
5cda4eda 340 * tmpfilename: The filename we're currently writing to
71b640cc
PH
341 * downloaded_bytes: Bytes on disk
342 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
343 * total_bytes_estimate: Guess of the eventual file size,
344 None if unavailable.
345 * elapsed: The number of seconds since download started.
71b640cc
PH
346 * eta: The estimated time in seconds, None if unknown
347 * speed: The download speed in bytes/second, None if
348 unknown
5cda4eda
PH
349 * fragment_index: The counter of the currently
350 downloaded video fragment.
351 * fragment_count: The number of fragments (= individual
352 files that will be merged)
71b640cc
PH
353
354 Progress hooks are guaranteed to be called at least once
355 (with status "finished") if the download is successful.
45598f15 356 merge_output_format: Extension to use when merging formats.
6b591b29 357 final_ext: Expected final extension; used to detect when the file was
358 already downloaded and converted. "merge_output_format" is
359 replaced by this extension when given
6271f1ca
PH
360 fixup: Automatically correct known faults of the file.
361 One of:
362 - "never": do nothing
363 - "warn": only emit a warning
364 - "detect_or_warn": check whether we can do anything
62cd676c 365 about it, warn otherwise (default)
504f20dd 366 source_address: Client-side IP address to bind to.
6ec6cb4e 367 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 368 yt-dlp servers for debugging. (BROKEN)
1cf376f5 369 sleep_interval_requests: Number of seconds to sleep between requests
370 during extraction
7aa589a5
S
371 sleep_interval: Number of seconds to sleep before each download when
372 used alone or a lower bound of a range for randomized
373 sleep before each download (minimum possible number
374 of seconds to sleep) when used along with
375 max_sleep_interval.
376 max_sleep_interval:Upper bound of a range for randomized sleep before each
377 download (maximum possible number of seconds to sleep).
378 Must only be used along with sleep_interval.
379 Actual sleep time will be a random float from range
380 [sleep_interval; max_sleep_interval].
1cf376f5 381 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
382 listformats: Print an overview of available video formats and exit.
383 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
384 match_filter: A function that gets called with the info_dict of
385 every video.
386 If it returns a message, the video is ignored.
387 If it returns None, the video is downloaded.
388 match_filter_func in utils.py is one example for this.
7e5db8c9 389 no_color: Do not emit color codes in output.
0a840f58 390 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 391 HTTP header
0a840f58 392 geo_bypass_country:
773f291d
S
393 Two-letter ISO 3166-2 country code that will be used for
394 explicit geographic restriction bypassing via faking
504f20dd 395 X-Forwarded-For HTTP header
5f95927a
S
396 geo_bypass_ip_block:
397 IP range in CIDR notation that will be used similarly to
504f20dd 398 geo_bypass_country
71b640cc 399
85729c51 400 The following options determine which downloader is picked:
52a8a1e1 401 external_downloader: A dictionary of protocol keys and the executable of the
402 external downloader to use for it. The allowed protocols
403 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
404 Set the value to 'native' to use the native downloader
405 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
406 or {'m3u8': 'ffmpeg'} instead.
407 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
408 if True, otherwise use ffmpeg/avconv if False, otherwise
409 use downloader suggested by extractor if None.
53ed7066 410 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 411 The following options do not work when used through the API:
412 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 413 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 414 Refer __init__.py for their implementation
fe7e0c98 415
8222d8de 416 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 417 the downloader (see yt_dlp/downloader/common.py):
51d9739f 418 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
419 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
420 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
421
422 The following options are used by the post processors:
d4a24f40 423 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 424 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
425 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
426 to the binary or its containing directory.
43820c03 427 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 428 and a list of additional command-line arguments for the
429 postprocessor/executable. The dict can also have "PP+EXE" keys
430 which are used when the given exe is used by the given PP.
431 Use 'default' as the name for arguments to passed to all PP
432 For compatibility with youtube-dl, a single list of args
433 can also be used
e409895f 434
435 The following options are used by the extractors:
62bff2c1 436 extractor_retries: Number of times to retry for known errors
437 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 438 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 439 discontinuities such as ad breaks (default: False)
5d3a0e79 440 extractor_args: A dictionary of arguments to be passed to the extractors.
441 See "EXTRACTOR ARGUMENTS" for details.
442 Eg: {'youtube': {'skip': ['dash', 'hls']}}
443 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
444 If True (default), DASH manifests and related
62bff2c1 445 data will be downloaded and processed by extractor.
446 You can reduce network I/O by disabling it if you don't
447 care about DASH. (only for youtube)
5d3a0e79 448 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
449 If True (default), HLS manifests and related
62bff2c1 450 data will be downloaded and processed by extractor.
451 You can reduce network I/O by disabling it if you don't
452 care about HLS. (only for youtube)
8222d8de
JMF
453 """
454
c9969434
S
455 _NUMERIC_FIELDS = set((
456 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
457 'timestamp', 'upload_year', 'upload_month', 'upload_day',
458 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
459 'average_rating', 'comment_count', 'age_limit',
460 'start_time', 'end_time',
461 'chapter_number', 'season_number', 'episode_number',
462 'track_number', 'disc_number', 'release_year',
463 'playlist_index',
464 ))
465
8222d8de 466 params = None
8b7491c8 467 _ies = {}
56d868db 468 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 469 _printed_messages = set()
1cf376f5 470 _first_webpage_request = True
8222d8de
JMF
471 _download_retcode = None
472 _num_downloads = None
30a074c2 473 _playlist_level = 0
474 _playlist_urls = set()
8222d8de
JMF
475 _screen_file = None
476
3511266b 477 def __init__(self, params=None, auto_init=True):
8222d8de 478 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
479 if params is None:
480 params = {}
8b7491c8 481 self._ies = {}
56c73665 482 self._ies_instances = {}
56d868db 483 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 484 self._printed_messages = set()
1cf376f5 485 self._first_webpage_request = True
ab8e5e51 486 self._post_hooks = []
933605d7 487 self._progress_hooks = []
8222d8de
JMF
488 self._download_retcode = 0
489 self._num_downloads = 0
490 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 491 self._err_file = sys.stderr
4abf617b
S
492 self.params = {
493 # Default parameters
494 'nocheckcertificate': False,
495 }
496 self.params.update(params)
a0e07d31 497 self.cache = Cache(self)
34308b30 498
a61f4b28 499 if sys.version_info < (3, 6):
500 self.report_warning(
0181adef 501 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 502
88acdbc2 503 if self.params.get('allow_unplayable_formats'):
504 self.report_warning(
505 'You have asked for unplayable formats to be listed/downloaded. '
506 'This is a developer option intended for debugging. '
507 'If you experience any issues while using this option, DO NOT open a bug report')
508
be5df5ee
S
509 def check_deprecated(param, option, suggestion):
510 if self.params.get(param) is not None:
53ed7066 511 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
512 return True
513 return False
514
515 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
516 if self.params.get('geo_verification_proxy') is None:
517 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
518
0d1bb027 519 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
520 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 521 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 522
523 for msg in self.params.get('warnings', []):
524 self.report_warning(msg)
525
b868936c 526 if self.params.get('overwrites') is None:
527 self.params.pop('overwrites', None)
528 elif self.params.get('nooverwrites') is not None:
529 # nooverwrites was unnecessarily changed to overwrites
530 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
531 # This ensures compatibility with both keys
532 self.params['overwrites'] = not self.params['nooverwrites']
533 else:
534 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 535
0783b09b 536 if params.get('bidi_workaround', False):
1c088fa8
PH
537 try:
538 import pty
539 master, slave = pty.openpty()
003c69a8 540 width = compat_get_terminal_size().columns
1c088fa8
PH
541 if width is None:
542 width_args = []
543 else:
544 width_args = ['-w', str(width)]
5d681e96 545 sp_kwargs = dict(
1c088fa8
PH
546 stdin=subprocess.PIPE,
547 stdout=slave,
548 stderr=self._err_file)
5d681e96
PH
549 try:
550 self._output_process = subprocess.Popen(
551 ['bidiv'] + width_args, **sp_kwargs
552 )
553 except OSError:
5d681e96
PH
554 self._output_process = subprocess.Popen(
555 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
556 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 557 except OSError as ose:
66e7ace1 558 if ose.errno == errno.ENOENT:
6febd1c1 559 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
560 else:
561 raise
0783b09b 562
3089bc74
S
563 if (sys.platform != 'win32'
564 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
565 and not params.get('restrictfilenames', False)):
e9137224 566 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 567 self.report_warning(
6febd1c1 568 'Assuming --restrict-filenames since file system encoding '
1b725173 569 'cannot encode all characters. '
6febd1c1 570 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 571 self.params['restrictfilenames'] = True
34308b30 572
de6000d9 573 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 574
187986a8 575 # Creating format selector here allows us to catch syntax errors before the extraction
576 self.format_selector = (
577 None if self.params.get('format') is None
578 else self.build_format_selector(self.params['format']))
579
dca08720
PH
580 self._setup_opener()
581
4cd0a709 582 """Preload the archive, if any is specified"""
583 def preload_download_archive(fn):
584 if fn is None:
585 return False
0760b0a7 586 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 587 try:
588 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
589 for line in archive_file:
590 self.archive.add(line.strip())
591 except IOError as ioe:
592 if ioe.errno != errno.ENOENT:
593 raise
594 return False
595 return True
596
597 self.archive = set()
598 preload_download_archive(self.params.get('download_archive'))
599
3511266b
PH
600 if auto_init:
601 self.print_debug_header()
602 self.add_default_info_extractors()
603
4f026faf 604 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 605 pp_def = dict(pp_def_raw)
fd7cfb64 606 when = pp_def.pop('when', 'post_process')
607 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 608 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 609 self.add_post_processor(pp, when=when)
4f026faf 610
ab8e5e51
AM
611 for ph in self.params.get('post_hooks', []):
612 self.add_post_hook(ph)
613
71b640cc
PH
614 for ph in self.params.get('progress_hooks', []):
615 self.add_progress_hook(ph)
616
51fb4995
YCH
617 register_socks_protocols()
618
7d4111ed
PH
619 def warn_if_short_id(self, argv):
620 # short YouTube ID starting with dash?
621 idxs = [
622 i for i, a in enumerate(argv)
623 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
624 if idxs:
625 correct_argv = (
7a5c1cfe 626 ['yt-dlp']
3089bc74
S
627 + [a for i, a in enumerate(argv) if i not in idxs]
628 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
629 )
630 self.report_warning(
631 'Long argument string detected. '
632 'Use -- to separate parameters and URLs, like this:\n%s\n' %
633 args_to_str(correct_argv))
634
8222d8de
JMF
635 def add_info_extractor(self, ie):
636 """Add an InfoExtractor object to the end of the list."""
8b7491c8 637 ie_key = ie.ie_key()
638 self._ies[ie_key] = ie
e52d7f85 639 if not isinstance(ie, type):
8b7491c8 640 self._ies_instances[ie_key] = ie
e52d7f85 641 ie.set_downloader(self)
8222d8de 642
8b7491c8 643 def _get_info_extractor_class(self, ie_key):
644 ie = self._ies.get(ie_key)
645 if ie is None:
646 ie = get_info_extractor(ie_key)
647 self.add_info_extractor(ie)
648 return ie
649
56c73665
JMF
650 def get_info_extractor(self, ie_key):
651 """
652 Get an instance of an IE with name ie_key, it will try to get one from
653 the _ies list, if there's no instance it will create a new one and add
654 it to the extractor list.
655 """
656 ie = self._ies_instances.get(ie_key)
657 if ie is None:
658 ie = get_info_extractor(ie_key)()
659 self.add_info_extractor(ie)
660 return ie
661
023fa8c4
JMF
662 def add_default_info_extractors(self):
663 """
664 Add the InfoExtractors returned by gen_extractors to the end of the list
665 """
e52d7f85 666 for ie in gen_extractor_classes():
023fa8c4
JMF
667 self.add_info_extractor(ie)
668
56d868db 669 def add_post_processor(self, pp, when='post_process'):
8222d8de 670 """Add a PostProcessor object to the end of the chain."""
5bfa4862 671 self._pps[when].append(pp)
8222d8de
JMF
672 pp.set_downloader(self)
673
ab8e5e51
AM
674 def add_post_hook(self, ph):
675 """Add the post hook"""
676 self._post_hooks.append(ph)
677
933605d7
JMF
678 def add_progress_hook(self, ph):
679 """Add the progress hook (currently only for the file downloader)"""
680 self._progress_hooks.append(ph)
8ab470f1 681
1c088fa8 682 def _bidi_workaround(self, message):
5d681e96 683 if not hasattr(self, '_output_channel'):
1c088fa8
PH
684 return message
685
5d681e96 686 assert hasattr(self, '_output_process')
11b85ce6 687 assert isinstance(message, compat_str)
6febd1c1
PH
688 line_count = message.count('\n') + 1
689 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 690 self._output_process.stdin.flush()
6febd1c1 691 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 692 for _ in range(line_count))
6febd1c1 693 return res[:-len('\n')]
1c088fa8 694
b35496d8 695 def _write_string(self, message, out=None, only_once=False):
696 if only_once:
697 if message in self._printed_messages:
698 return
699 self._printed_messages.add(message)
700 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 701
848887eb 702 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 703 """Print message to stdout"""
8bf9319e 704 if self.params.get('logger'):
43afe285 705 self.params['logger'].debug(message)
835a1478 706 elif not quiet or self.params.get('verbose'):
707 self._write_string(
708 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
709 self._err_file if quiet else self._screen_file)
8222d8de 710
b35496d8 711 def to_stderr(self, message, only_once=False):
0760b0a7 712 """Print message to stderr"""
11b85ce6 713 assert isinstance(message, compat_str)
8bf9319e 714 if self.params.get('logger'):
43afe285
IB
715 self.params['logger'].error(message)
716 else:
b35496d8 717 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 718
1e5b9a95
PH
719 def to_console_title(self, message):
720 if not self.params.get('consoletitle', False):
721 return
4bede0d8
C
722 if compat_os_name == 'nt':
723 if ctypes.windll.kernel32.GetConsoleWindow():
724 # c_wchar_p() might not be necessary if `message` is
725 # already of type unicode()
726 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 727 elif 'TERM' in os.environ:
b46696bd 728 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 729
bdde425c
PH
730 def save_console_title(self):
731 if not self.params.get('consoletitle', False):
732 return
b7b04c78 733 if self.params.get('simulate'):
94c3442e 734 return
4bede0d8 735 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 736 # Save the title on stack
734f90bb 737 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
738
739 def restore_console_title(self):
740 if not self.params.get('consoletitle', False):
741 return
b7b04c78 742 if self.params.get('simulate'):
94c3442e 743 return
4bede0d8 744 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 745 # Restore the title from stack
734f90bb 746 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
747
748 def __enter__(self):
749 self.save_console_title()
750 return self
751
752 def __exit__(self, *args):
753 self.restore_console_title()
f89197d7 754
dca08720 755 if self.params.get('cookiefile') is not None:
1bab3437 756 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 757
8222d8de
JMF
758 def trouble(self, message=None, tb=None):
759 """Determine action to take when a download problem appears.
760
761 Depending on if the downloader has been configured to ignore
762 download errors or not, this method may throw an exception or
763 not when errors are found, after printing the message.
764
765 tb, if given, is additional traceback information.
766 """
767 if message is not None:
768 self.to_stderr(message)
769 if self.params.get('verbose'):
770 if tb is None:
771 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 772 tb = ''
8222d8de 773 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 774 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 775 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
776 else:
777 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 778 tb = ''.join(tb_data)
c19bc311 779 if tb:
780 self.to_stderr(tb)
b1940459 781 if not self.params.get('ignoreerrors'):
8222d8de
JMF
782 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
783 exc_info = sys.exc_info()[1].exc_info
784 else:
785 exc_info = sys.exc_info()
786 raise DownloadError(message, exc_info)
787 self._download_retcode = 1
788
0760b0a7 789 def to_screen(self, message, skip_eol=False):
790 """Print message to stdout if not in quiet mode"""
791 self.to_stdout(
792 message, skip_eol, quiet=self.params.get('quiet', False))
793
c84aeac6 794 def report_warning(self, message, only_once=False):
8222d8de
JMF
795 '''
796 Print the message to stderr, it will be prefixed with 'WARNING:'
797 If stderr is a tty file the 'WARNING:' will be colored
798 '''
6d07ce01
JMF
799 if self.params.get('logger') is not None:
800 self.params['logger'].warning(message)
8222d8de 801 else:
ad8915b7
PH
802 if self.params.get('no_warnings'):
803 return
e9c0cdd3 804 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
805 _msg_header = '\033[0;33mWARNING:\033[0m'
806 else:
807 _msg_header = 'WARNING:'
808 warning_message = '%s %s' % (_msg_header, message)
b35496d8 809 self.to_stderr(warning_message, only_once)
8222d8de
JMF
810
811 def report_error(self, message, tb=None):
812 '''
813 Do the same as trouble, but prefixes the message with 'ERROR:', colored
814 in red if stderr is a tty file.
815 '''
e9c0cdd3 816 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 817 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 818 else:
6febd1c1
PH
819 _msg_header = 'ERROR:'
820 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
821 self.trouble(error_message, tb)
822
b35496d8 823 def write_debug(self, message, only_once=False):
0760b0a7 824 '''Log debug message or Print message to stderr'''
825 if not self.params.get('verbose', False):
826 return
827 message = '[debug] %s' % message
828 if self.params.get('logger'):
829 self.params['logger'].debug(message)
830 else:
b35496d8 831 self.to_stderr(message, only_once)
0760b0a7 832
8222d8de
JMF
833 def report_file_already_downloaded(self, file_name):
834 """Report file has already been fully downloaded."""
835 try:
6febd1c1 836 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 837 except UnicodeEncodeError:
6febd1c1 838 self.to_screen('[download] The file has already been downloaded')
8222d8de 839
0c3d0f51 840 def report_file_delete(self, file_name):
841 """Report that existing file will be deleted."""
842 try:
c25228e5 843 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 844 except UnicodeEncodeError:
c25228e5 845 self.to_screen('Deleting existing file')
0c3d0f51 846
1151c407 847 def raise_no_formats(self, info, forced=False):
848 has_drm = info.get('__has_drm')
88acdbc2 849 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
850 expected = self.params.get('ignore_no_formats_error')
851 if forced or not expected:
1151c407 852 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
853 expected=has_drm or expected)
88acdbc2 854 else:
855 self.report_warning(msg)
856
de6000d9 857 def parse_outtmpl(self):
858 outtmpl_dict = self.params.get('outtmpl', {})
859 if not isinstance(outtmpl_dict, dict):
860 outtmpl_dict = {'default': outtmpl_dict}
861 outtmpl_dict.update({
862 k: v for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 863 if outtmpl_dict.get(k) is None})
de6000d9 864 for key, val in outtmpl_dict.items():
865 if isinstance(val, bytes):
866 self.report_warning(
867 'Parameter outtmpl is bytes, but should be a unicode string. '
868 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
869 return outtmpl_dict
870
21cd8fae 871 def get_output_path(self, dir_type='', filename=None):
872 paths = self.params.get('paths', {})
873 assert isinstance(paths, dict)
874 path = os.path.join(
875 expand_path(paths.get('home', '').strip()),
876 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
877 filename or '')
878
879 # Temporary fix for #4787
880 # 'Treat' all problem characters by passing filename through preferredencoding
881 # to workaround encoding issues with subprocess on python2 @ Windows
882 if sys.version_info < (3, 0) and sys.platform == 'win32':
883 path = encodeFilename(path, True).decode(preferredencoding())
884 return sanitize_path(path, force=self.params.get('windowsfilenames'))
885
76a264ac 886 @staticmethod
901130bb 887 def _outtmpl_expandpath(outtmpl):
888 # expand_path translates '%%' into '%' and '$$' into '$'
889 # correspondingly that is not what we want since we need to keep
890 # '%%' intact for template dict substitution step. Working around
891 # with boundary-alike separator hack.
892 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
893 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
894
895 # outtmpl should be expand_path'ed before template dict substitution
896 # because meta fields may contain env variables we don't want to
897 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
898 # title "Hello $PATH", we don't want `$PATH` to be expanded.
899 return expand_path(outtmpl).replace(sep, '')
900
901 @staticmethod
902 def escape_outtmpl(outtmpl):
903 ''' Escape any remaining strings like %s, %abc% etc. '''
904 return re.sub(
905 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
906 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
907 outtmpl)
908
909 @classmethod
910 def validate_outtmpl(cls, outtmpl):
76a264ac 911 ''' @return None or Exception object '''
7d1eb38a 912 outtmpl = re.sub(
524e2e4f 913 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 914 lambda mobj: f'{mobj.group(0)[:-1]}s',
915 cls._outtmpl_expandpath(outtmpl))
76a264ac 916 try:
7d1eb38a 917 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 918 return None
919 except ValueError as err:
920 return err
921
143db31d 922 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 923 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 924 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 925
6e84b215 926 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
927 for key in ('__original_infodict', '__postprocessors'):
928 info_dict.pop(key, None)
752cda38 929 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 930 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 931 if info_dict.get('duration', None) is not None
932 else None)
752cda38 933 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
934 if info_dict.get('resolution') is None:
935 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 936
143db31d 937 # For fields playlist_index and autonumber convert all occurrences
938 # of %(field)s to %(field)0Nd for backward compatibility
939 field_size_compat_map = {
752cda38 940 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
941 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 942 }
752cda38 943
385a27fa 944 TMPL_DICT = {}
524e2e4f 945 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 946 MATH_FUNCTIONS = {
947 '+': float.__add__,
948 '-': float.__sub__,
949 }
e625be0d 950 # Field is of the form key1.key2...
951 # where keys (except first) can be string, int or slice
2b8a2973 952 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 953 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
954 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 955 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
956 (?P<negate>-)?
385a27fa 957 (?P<fields>{field})
958 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 959 (?:>(?P<strf_format>.+?))?
7c37ff97 960 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 961 (?:\|(?P<default>.*?))?
385a27fa 962 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 963
2b8a2973 964 def _traverse_infodict(k):
965 k = k.split('.')
966 if k[0] == '':
967 k.pop(0)
968 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 969
752cda38 970 def get_value(mdict):
971 # Object traversal
2b8a2973 972 value = _traverse_infodict(mdict['fields'])
752cda38 973 # Negative
974 if mdict['negate']:
975 value = float_or_none(value)
976 if value is not None:
977 value *= -1
978 # Do maths
385a27fa 979 offset_key = mdict['maths']
980 if offset_key:
752cda38 981 value = float_or_none(value)
982 operator = None
385a27fa 983 while offset_key:
984 item = re.match(
985 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
986 offset_key).group(0)
987 offset_key = offset_key[len(item):]
988 if operator is None:
752cda38 989 operator = MATH_FUNCTIONS[item]
385a27fa 990 continue
991 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
992 offset = float_or_none(item)
993 if offset is None:
2b8a2973 994 offset = float_or_none(_traverse_infodict(item))
385a27fa 995 try:
996 value = operator(value, multiplier * offset)
997 except (TypeError, ZeroDivisionError):
998 return None
999 operator = None
752cda38 1000 # Datetime formatting
1001 if mdict['strf_format']:
7c37ff97 1002 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1003
1004 return value
1005
b868936c 1006 na = self.params.get('outtmpl_na_placeholder', 'NA')
1007
6e84b215 1008 def _dumpjson_default(obj):
1009 if isinstance(obj, (set, LazyList)):
1010 return list(obj)
1011 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1012
752cda38 1013 def create_key(outer_mobj):
1014 if not outer_mobj.group('has_key'):
901130bb 1015 return f'%{outer_mobj.group(0)}'
752cda38 1016 key = outer_mobj.group('key')
752cda38 1017 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1018 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1019 value, default = None, na
1020 while mobj:
e625be0d 1021 mobj = mobj.groupdict()
7c37ff97 1022 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1023 value = get_value(mobj)
7c37ff97 1024 if value is None and mobj['alternate']:
1025 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1026 else:
1027 break
752cda38 1028
b868936c 1029 fmt = outer_mobj.group('format')
752cda38 1030 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1031 fmt = '0{:d}d'.format(field_size_compat_map[key])
1032
1033 value = default if value is None else value
752cda38 1034
7d1eb38a 1035 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1036 if fmt[-1] == 'l': # list
91dd88b9 1037 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1038 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1039 elif fmt[-1] == 'j': # json
6e84b215 1040 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1041 elif fmt[-1] == 'q': # quoted
7d1eb38a 1042 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1043 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1044 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1045 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1046 elif fmt[-1] == 'U': # unicode normalized
1047 opts = outer_mobj.group('conversion') or ''
1048 value, fmt = unicodedata.normalize(
1049 # "+" = compatibility equivalence, "#" = NFD
1050 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1051 value), str_fmt
7d1eb38a 1052 elif fmt[-1] == 'c':
524e2e4f 1053 if value:
1054 value = str(value)[0]
76a264ac 1055 else:
524e2e4f 1056 fmt = str_fmt
76a264ac 1057 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1058 value = float_or_none(value)
752cda38 1059 if value is None:
1060 value, fmt = default, 's'
901130bb 1061
752cda38 1062 if sanitize:
1063 if fmt[-1] == 'r':
1064 # If value is an object, sanitize might convert it to a string
1065 # So we convert it to repr first
7d1eb38a 1066 value, fmt = repr(value), str_fmt
639f1cea 1067 if fmt[-1] in 'csr':
7c37ff97 1068 value = sanitize(initial_field, value)
901130bb 1069
b868936c 1070 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1071 TMPL_DICT[key] = value
b868936c 1072 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1073
385a27fa 1074 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1075
de6000d9 1076 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1077 try:
586a91b6 1078 sanitize = lambda k, v: sanitize_filename(
45598aab 1079 compat_str(v),
1bb5c511 1080 restricted=self.params.get('restrictfilenames'),
40df485f 1081 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1082 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1083 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1084 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1085 filename = outtmpl % template_dict
15da37c7 1086
143db31d 1087 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1088 if filename and force_ext is not None:
752cda38 1089 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1090
bdc3fd2f
U
1091 # https://github.com/blackjack4494/youtube-dlc/issues/85
1092 trim_file_name = self.params.get('trim_file_name', False)
1093 if trim_file_name:
1094 fn_groups = filename.rsplit('.')
1095 ext = fn_groups[-1]
1096 sub_ext = ''
1097 if len(fn_groups) > 2:
1098 sub_ext = fn_groups[-2]
1099 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1100
0202b52a 1101 return filename
8222d8de 1102 except ValueError as err:
6febd1c1 1103 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1104 return None
1105
de6000d9 1106 def prepare_filename(self, info_dict, dir_type='', warn=False):
1107 """Generate the output filename."""
21cd8fae 1108
de6000d9 1109 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1110 if not filename and dir_type not in ('', 'temp'):
1111 return ''
de6000d9 1112
c84aeac6 1113 if warn:
21cd8fae 1114 if not self.params.get('paths'):
de6000d9 1115 pass
1116 elif filename == '-':
c84aeac6 1117 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1118 elif os.path.isabs(filename):
c84aeac6 1119 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1120 if filename == '-' or not filename:
1121 return filename
1122
21cd8fae 1123 return self.get_output_path(dir_type, filename)
0202b52a 1124
120fe513 1125 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1126 """ Returns None if the file should be downloaded """
8222d8de 1127
c77495e3 1128 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1129
8b0d7497 1130 def check_filter():
8b0d7497 1131 if 'title' in info_dict:
1132 # This can happen when we're just evaluating the playlist
1133 title = info_dict['title']
1134 matchtitle = self.params.get('matchtitle', False)
1135 if matchtitle:
1136 if not re.search(matchtitle, title, re.IGNORECASE):
1137 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1138 rejecttitle = self.params.get('rejecttitle', False)
1139 if rejecttitle:
1140 if re.search(rejecttitle, title, re.IGNORECASE):
1141 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1142 date = info_dict.get('upload_date')
1143 if date is not None:
1144 dateRange = self.params.get('daterange', DateRange())
1145 if date not in dateRange:
1146 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1147 view_count = info_dict.get('view_count')
1148 if view_count is not None:
1149 min_views = self.params.get('min_views')
1150 if min_views is not None and view_count < min_views:
1151 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1152 max_views = self.params.get('max_views')
1153 if max_views is not None and view_count > max_views:
1154 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1155 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1156 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1157
8f18aca8 1158 match_filter = self.params.get('match_filter')
1159 if match_filter is not None:
1160 try:
1161 ret = match_filter(info_dict, incomplete=incomplete)
1162 except TypeError:
1163 # For backward compatibility
1164 ret = None if incomplete else match_filter(info_dict)
1165 if ret is not None:
1166 return ret
8b0d7497 1167 return None
1168
c77495e3 1169 if self.in_download_archive(info_dict):
1170 reason = '%s has already been recorded in the archive' % video_title
1171 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1172 else:
1173 reason = check_filter()
1174 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1175 if reason is not None:
120fe513 1176 if not silent:
1177 self.to_screen('[download] ' + reason)
c77495e3 1178 if self.params.get(break_opt, False):
1179 raise break_err()
8b0d7497 1180 return reason
fe7e0c98 1181
b6c45014
JMF
1182 @staticmethod
1183 def add_extra_info(info_dict, extra_info):
1184 '''Set the keys from extra_info in info dict if they are missing'''
1185 for key, value in extra_info.items():
1186 info_dict.setdefault(key, value)
1187
409e1828 1188 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1189 process=True, force_generic_extractor=False):
41d1cca3 1190 """
1191 Return a list with a dictionary for each video extracted.
1192
1193 Arguments:
1194 url -- URL to extract
1195
1196 Keyword arguments:
1197 download -- whether to download videos during extraction
1198 ie_key -- extractor key hint
1199 extra_info -- dictionary containing the extra values to add to each result
1200 process -- whether to resolve all unresolved references (URLs, playlist items),
1201 must be True for download to work.
1202 force_generic_extractor -- force using the generic extractor
1203 """
fe7e0c98 1204
409e1828 1205 if extra_info is None:
1206 extra_info = {}
1207
61aa5ba3 1208 if not ie_key and force_generic_extractor:
d22dec74
S
1209 ie_key = 'Generic'
1210
8222d8de 1211 if ie_key:
8b7491c8 1212 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1213 else:
1214 ies = self._ies
1215
8b7491c8 1216 for ie_key, ie in ies.items():
8222d8de
JMF
1217 if not ie.suitable(url):
1218 continue
1219
1220 if not ie.working():
6febd1c1
PH
1221 self.report_warning('The program functionality for this site has been marked as broken, '
1222 'and will probably not work.')
8222d8de 1223
1151c407 1224 temp_id = ie.get_temp_id(url)
a0566bbf 1225 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1226 self.to_screen("[%s] %s: has already been recorded in archive" % (
1227 ie_key, temp_id))
1228 break
8b7491c8 1229 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1230 else:
1231 self.report_error('no suitable InfoExtractor for URL %s' % url)
1232
8e5fecc8 1233 def __handle_extraction_exceptions(func):
1234
a0566bbf 1235 def wrapper(self, *args, **kwargs):
1236 try:
1237 return func(self, *args, **kwargs)
773f291d
S
1238 except GeoRestrictedError as e:
1239 msg = e.msg
1240 if e.countries:
1241 msg += '\nThis video is available in %s.' % ', '.join(
1242 map(ISO3166Utils.short2full, e.countries))
1243 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1244 self.report_error(msg)
fb043a6e 1245 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1246 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1247 except ThrottledDownload:
1248 self.to_stderr('\r')
1249 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1250 return wrapper(self, *args, **kwargs)
8e5fecc8 1251 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1252 raise
8222d8de 1253 except Exception as e:
b1940459 1254 if self.params.get('ignoreerrors'):
9b9c5355 1255 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1256 else:
1257 raise
a0566bbf 1258 return wrapper
1259
1260 @__handle_extraction_exceptions
58f197b7 1261 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1262 ie_result = ie.extract(url)
1263 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1264 return
1265 if isinstance(ie_result, list):
1266 # Backwards compatibility: old IE result format
1267 ie_result = {
1268 '_type': 'compat_list',
1269 'entries': ie_result,
1270 }
e37d0efb 1271 if extra_info.get('original_url'):
1272 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1273 self.add_default_extra_info(ie_result, ie, url)
1274 if process:
1275 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1276 else:
a0566bbf 1277 return ie_result
fe7e0c98 1278
ea38e55f 1279 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1280 if url is not None:
1281 self.add_extra_info(ie_result, {
1282 'webpage_url': url,
1283 'original_url': url,
1284 'webpage_url_basename': url_basename(url),
1285 })
1286 if ie is not None:
1287 self.add_extra_info(ie_result, {
1288 'extractor': ie.IE_NAME,
1289 'extractor_key': ie.ie_key(),
1290 })
ea38e55f 1291
58adec46 1292 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1293 """
1294 Take the result of the ie(may be modified) and resolve all unresolved
1295 references (URLs, playlist items).
1296
1297 It will also download the videos if 'download'.
1298 Returns the resolved ie_result.
1299 """
58adec46 1300 if extra_info is None:
1301 extra_info = {}
e8ee972c
PH
1302 result_type = ie_result.get('_type', 'video')
1303
057a5206 1304 if result_type in ('url', 'url_transparent'):
134c6ea8 1305 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1306 if ie_result.get('original_url'):
1307 extra_info.setdefault('original_url', ie_result['original_url'])
1308
057a5206 1309 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1310 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1311 or extract_flat is True):
ecb54191 1312 info_copy = ie_result.copy()
6033d980 1313 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1314 if ie and not ie_result.get('id'):
4614bc22 1315 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1316 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1317 self.add_extra_info(info_copy, extra_info)
ecb54191 1318 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1319 if self.params.get('force_write_download_archive', False):
1320 self.record_download_archive(info_copy)
e8ee972c
PH
1321 return ie_result
1322
8222d8de 1323 if result_type == 'video':
b6c45014 1324 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1325 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1326 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1327 if additional_urls:
e9f4ccd1 1328 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1329 if isinstance(additional_urls, compat_str):
1330 additional_urls = [additional_urls]
1331 self.to_screen(
1332 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1333 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1334 ie_result['additional_entries'] = [
1335 self.extract_info(
1336 url, download, extra_info,
1337 force_generic_extractor=self.params.get('force_generic_extractor'))
1338 for url in additional_urls
1339 ]
1340 return ie_result
8222d8de
JMF
1341 elif result_type == 'url':
1342 # We have to add extra_info to the results because it may be
1343 # contained in a playlist
07cce701 1344 return self.extract_info(
1345 ie_result['url'], download,
1346 ie_key=ie_result.get('ie_key'),
1347 extra_info=extra_info)
7fc3fa05
PH
1348 elif result_type == 'url_transparent':
1349 # Use the information from the embedding page
1350 info = self.extract_info(
1351 ie_result['url'], ie_key=ie_result.get('ie_key'),
1352 extra_info=extra_info, download=False, process=False)
1353
1640eb09
S
1354 # extract_info may return None when ignoreerrors is enabled and
1355 # extraction failed with an error, don't crash and return early
1356 # in this case
1357 if not info:
1358 return info
1359
412c617d
PH
1360 force_properties = dict(
1361 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1362 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1363 if f in force_properties:
1364 del force_properties[f]
1365 new_result = info.copy()
1366 new_result.update(force_properties)
7fc3fa05 1367
0563f7ac
S
1368 # Extracted info may not be a video result (i.e.
1369 # info.get('_type', 'video') != video) but rather an url or
1370 # url_transparent. In such cases outer metadata (from ie_result)
1371 # should be propagated to inner one (info). For this to happen
1372 # _type of info should be overridden with url_transparent. This
067aa17e 1373 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1374 if new_result.get('_type') == 'url':
1375 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1376
1377 return self.process_ie_result(
1378 new_result, download=download, extra_info=extra_info)
40fcba5e 1379 elif result_type in ('playlist', 'multi_video'):
30a074c2 1380 # Protect from infinite recursion due to recursively nested playlists
1381 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1382 webpage_url = ie_result['webpage_url']
1383 if webpage_url in self._playlist_urls:
7e85e872 1384 self.to_screen(
30a074c2 1385 '[download] Skipping already downloaded playlist: %s'
1386 % ie_result.get('title') or ie_result.get('id'))
1387 return
7e85e872 1388
30a074c2 1389 self._playlist_level += 1
1390 self._playlist_urls.add(webpage_url)
bc516a3f 1391 self._sanitize_thumbnails(ie_result)
30a074c2 1392 try:
1393 return self.__process_playlist(ie_result, download)
1394 finally:
1395 self._playlist_level -= 1
1396 if not self._playlist_level:
1397 self._playlist_urls.clear()
8222d8de 1398 elif result_type == 'compat_list':
c9bf4114
PH
1399 self.report_warning(
1400 'Extractor %s returned a compat_list result. '
1401 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1402
8222d8de 1403 def _fixup(r):
b868936c 1404 self.add_extra_info(r, {
1405 'extractor': ie_result['extractor'],
1406 'webpage_url': ie_result['webpage_url'],
1407 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1408 'extractor_key': ie_result['extractor_key'],
1409 })
8222d8de
JMF
1410 return r
1411 ie_result['entries'] = [
b6c45014 1412 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1413 for r in ie_result['entries']
1414 ]
1415 return ie_result
1416 else:
1417 raise Exception('Invalid result type: %s' % result_type)
1418
e92caff5 1419 def _ensure_dir_exists(self, path):
1420 return make_dir(path, self.report_error)
1421
30a074c2 1422 def __process_playlist(self, ie_result, download):
1423 # We process each entry in the playlist
1424 playlist = ie_result.get('title') or ie_result.get('id')
1425 self.to_screen('[download] Downloading playlist: %s' % playlist)
1426
498f5606 1427 if 'entries' not in ie_result:
1428 raise EntryNotInPlaylist()
1429 incomplete_entries = bool(ie_result.get('requested_entries'))
1430 if incomplete_entries:
1431 def fill_missing_entries(entries, indexes):
1432 ret = [None] * max(*indexes)
1433 for i, entry in zip(indexes, entries):
1434 ret[i - 1] = entry
1435 return ret
1436 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1437
30a074c2 1438 playlist_results = []
1439
56a8fb4f 1440 playliststart = self.params.get('playliststart', 1)
30a074c2 1441 playlistend = self.params.get('playlistend')
1442 # For backwards compatibility, interpret -1 as whole list
1443 if playlistend == -1:
1444 playlistend = None
1445
1446 playlistitems_str = self.params.get('playlist_items')
1447 playlistitems = None
1448 if playlistitems_str is not None:
1449 def iter_playlistitems(format):
1450 for string_segment in format.split(','):
1451 if '-' in string_segment:
1452 start, end = string_segment.split('-')
1453 for item in range(int(start), int(end) + 1):
1454 yield int(item)
1455 else:
1456 yield int(string_segment)
1457 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1458
1459 ie_entries = ie_result['entries']
56a8fb4f 1460 msg = (
1461 'Downloading %d videos' if not isinstance(ie_entries, list)
1462 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1463
1464 if isinstance(ie_entries, list):
1465 def get_entry(i):
1466 return ie_entries[i - 1]
1467 else:
1468 if not isinstance(ie_entries, PagedList):
1469 ie_entries = LazyList(ie_entries)
1470
1471 def get_entry(i):
1472 return YoutubeDL.__handle_extraction_exceptions(
1473 lambda self, i: ie_entries[i - 1]
1474 )(self, i)
50fed816 1475
56a8fb4f 1476 entries = []
ff1c7fc9 1477 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1478 for i in items:
1479 if i == 0:
1480 continue
56a8fb4f 1481 if playlistitems is None and playlistend is not None and playlistend < i:
1482 break
1483 entry = None
1484 try:
50fed816 1485 entry = get_entry(i)
56a8fb4f 1486 if entry is None:
498f5606 1487 raise EntryNotInPlaylist()
56a8fb4f 1488 except (IndexError, EntryNotInPlaylist):
1489 if incomplete_entries:
1490 raise EntryNotInPlaylist()
1491 elif not playlistitems:
1492 break
1493 entries.append(entry)
120fe513 1494 try:
1495 if entry is not None:
1496 self._match_entry(entry, incomplete=True, silent=True)
1497 except (ExistingVideoReached, RejectedVideoReached):
1498 break
56a8fb4f 1499 ie_result['entries'] = entries
30a074c2 1500
56a8fb4f 1501 # Save playlist_index before re-ordering
1502 entries = [
9e598870 1503 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1504 for i, entry in enumerate(entries, 1)
1505 if entry is not None]
1506 n_entries = len(entries)
498f5606 1507
498f5606 1508 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1509 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1510 ie_result['requested_entries'] = playlistitems
1511
1512 if self.params.get('allow_playlist_files', True):
1513 ie_copy = {
1514 'playlist': playlist,
1515 'playlist_id': ie_result.get('id'),
1516 'playlist_title': ie_result.get('title'),
1517 'playlist_uploader': ie_result.get('uploader'),
1518 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1519 'playlist_index': 0,
498f5606 1520 }
1521 ie_copy.update(dict(ie_result))
1522
80c03fa9 1523 if self._write_info_json('playlist', ie_result,
1524 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1525 return
1526 if self._write_description('playlist', ie_result,
1527 self.prepare_filename(ie_copy, 'pl_description')) is None:
1528 return
681de68e 1529 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1530 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1531
1532 if self.params.get('playlistreverse', False):
1533 entries = entries[::-1]
30a074c2 1534 if self.params.get('playlistrandom', False):
1535 random.shuffle(entries)
1536
1537 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1538
56a8fb4f 1539 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1540 failures = 0
1541 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1542 for i, entry_tuple in enumerate(entries, 1):
1543 playlist_index, entry = entry_tuple
81139999 1544 if 'playlist-index' in self.params.get('compat_opts', []):
1545 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1546 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1547 # This __x_forwarded_for_ip thing is a bit ugly but requires
1548 # minimal changes
1549 if x_forwarded_for:
1550 entry['__x_forwarded_for_ip'] = x_forwarded_for
1551 extra = {
1552 'n_entries': n_entries,
f59ae581 1553 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1554 'playlist_index': playlist_index,
1555 'playlist_autonumber': i,
30a074c2 1556 'playlist': playlist,
1557 'playlist_id': ie_result.get('id'),
1558 'playlist_title': ie_result.get('title'),
1559 'playlist_uploader': ie_result.get('uploader'),
1560 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1561 'extractor': ie_result['extractor'],
1562 'webpage_url': ie_result['webpage_url'],
1563 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1564 'extractor_key': ie_result['extractor_key'],
1565 }
1566
1567 if self._match_entry(entry, incomplete=True) is not None:
1568 continue
1569
1570 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1571 if not entry_result:
1572 failures += 1
1573 if failures >= max_failures:
1574 self.report_error(
1575 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1576 break
30a074c2 1577 # TODO: skip failed (empty) entries?
1578 playlist_results.append(entry_result)
1579 ie_result['entries'] = playlist_results
1580 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1581 return ie_result
1582
a0566bbf 1583 @__handle_extraction_exceptions
1584 def __process_iterable_entry(self, entry, download, extra_info):
1585 return self.process_ie_result(
1586 entry, download=download, extra_info=extra_info)
1587
67134eab
JMF
1588 def _build_format_filter(self, filter_spec):
1589 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1590
1591 OPERATORS = {
1592 '<': operator.lt,
1593 '<=': operator.le,
1594 '>': operator.gt,
1595 '>=': operator.ge,
1596 '=': operator.eq,
1597 '!=': operator.ne,
1598 }
67134eab 1599 operator_rex = re.compile(r'''(?x)\s*
187986a8 1600 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1601 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1602 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1603 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1604 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1605 if m:
1606 try:
1607 comparison_value = int(m.group('value'))
1608 except ValueError:
1609 comparison_value = parse_filesize(m.group('value'))
1610 if comparison_value is None:
1611 comparison_value = parse_filesize(m.group('value') + 'B')
1612 if comparison_value is None:
1613 raise ValueError(
1614 'Invalid value %r in format specification %r' % (
67134eab 1615 m.group('value'), filter_spec))
9ddb6925
S
1616 op = OPERATORS[m.group('op')]
1617
083c9df9 1618 if not m:
9ddb6925
S
1619 STR_OPERATORS = {
1620 '=': operator.eq,
10d33b34
YCH
1621 '^=': lambda attr, value: attr.startswith(value),
1622 '$=': lambda attr, value: attr.endswith(value),
1623 '*=': lambda attr, value: value in attr,
9ddb6925 1624 }
187986a8 1625 str_operator_rex = re.compile(r'''(?x)\s*
1626 (?P<key>[a-zA-Z0-9._-]+)\s*
1627 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1628 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1629 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1630 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1631 if m:
1632 comparison_value = m.group('value')
2cc779f4
S
1633 str_op = STR_OPERATORS[m.group('op')]
1634 if m.group('negation'):
e118a879 1635 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1636 else:
1637 op = str_op
083c9df9 1638
9ddb6925 1639 if not m:
187986a8 1640 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1641
1642 def _filter(f):
1643 actual_value = f.get(m.group('key'))
1644 if actual_value is None:
1645 return m.group('none_inclusive')
1646 return op(actual_value, comparison_value)
67134eab
JMF
1647 return _filter
1648
0017d9ad 1649 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1650
af0f7428
S
1651 def can_merge():
1652 merger = FFmpegMergerPP(self)
1653 return merger.available and merger.can_merge()
1654
91ebc640 1655 prefer_best = (
b7b04c78 1656 not self.params.get('simulate')
91ebc640 1657 and download
1658 and (
1659 not can_merge()
19807826 1660 or info_dict.get('is_live', False)
de6000d9 1661 or self.outtmpl_dict['default'] == '-'))
53ed7066 1662 compat = (
1663 prefer_best
1664 or self.params.get('allow_multiple_audio_streams', False)
1665 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1666
1667 return (
53ed7066 1668 'best/bestvideo+bestaudio' if prefer_best
1669 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1670 else 'bestvideo+bestaudio/best')
0017d9ad 1671
67134eab
JMF
1672 def build_format_selector(self, format_spec):
1673 def syntax_error(note, start):
1674 message = (
1675 'Invalid format specification: '
1676 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1677 return SyntaxError(message)
1678
1679 PICKFIRST = 'PICKFIRST'
1680 MERGE = 'MERGE'
1681 SINGLE = 'SINGLE'
0130afb7 1682 GROUP = 'GROUP'
67134eab
JMF
1683 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1684
91ebc640 1685 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1686 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1687
e8e73840 1688 check_formats = self.params.get('check_formats')
1689
67134eab
JMF
1690 def _parse_filter(tokens):
1691 filter_parts = []
1692 for type, string, start, _, _ in tokens:
1693 if type == tokenize.OP and string == ']':
1694 return ''.join(filter_parts)
1695 else:
1696 filter_parts.append(string)
1697
232541df 1698 def _remove_unused_ops(tokens):
17cc1534 1699 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1700 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1701 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1702 last_string, last_start, last_end, last_line = None, None, None, None
1703 for type, string, start, end, line in tokens:
1704 if type == tokenize.OP and string == '[':
1705 if last_string:
1706 yield tokenize.NAME, last_string, last_start, last_end, last_line
1707 last_string = None
1708 yield type, string, start, end, line
1709 # everything inside brackets will be handled by _parse_filter
1710 for type, string, start, end, line in tokens:
1711 yield type, string, start, end, line
1712 if type == tokenize.OP and string == ']':
1713 break
1714 elif type == tokenize.OP and string in ALLOWED_OPS:
1715 if last_string:
1716 yield tokenize.NAME, last_string, last_start, last_end, last_line
1717 last_string = None
1718 yield type, string, start, end, line
1719 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1720 if not last_string:
1721 last_string = string
1722 last_start = start
1723 last_end = end
1724 else:
1725 last_string += string
1726 if last_string:
1727 yield tokenize.NAME, last_string, last_start, last_end, last_line
1728
cf2ac6df 1729 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1730 selectors = []
1731 current_selector = None
1732 for type, string, start, _, _ in tokens:
1733 # ENCODING is only defined in python 3.x
1734 if type == getattr(tokenize, 'ENCODING', None):
1735 continue
1736 elif type in [tokenize.NAME, tokenize.NUMBER]:
1737 current_selector = FormatSelector(SINGLE, string, [])
1738 elif type == tokenize.OP:
cf2ac6df
JMF
1739 if string == ')':
1740 if not inside_group:
1741 # ')' will be handled by the parentheses group
1742 tokens.restore_last_token()
67134eab 1743 break
cf2ac6df 1744 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1745 tokens.restore_last_token()
1746 break
cf2ac6df
JMF
1747 elif inside_choice and string == ',':
1748 tokens.restore_last_token()
1749 break
1750 elif string == ',':
0a31a350
JMF
1751 if not current_selector:
1752 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1753 selectors.append(current_selector)
1754 current_selector = None
1755 elif string == '/':
d96d604e
JMF
1756 if not current_selector:
1757 raise syntax_error('"/" must follow a format selector', start)
67134eab 1758 first_choice = current_selector
cf2ac6df 1759 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1760 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1761 elif string == '[':
1762 if not current_selector:
1763 current_selector = FormatSelector(SINGLE, 'best', [])
1764 format_filter = _parse_filter(tokens)
1765 current_selector.filters.append(format_filter)
0130afb7
JMF
1766 elif string == '(':
1767 if current_selector:
1768 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1769 group = _parse_format_selection(tokens, inside_group=True)
1770 current_selector = FormatSelector(GROUP, group, [])
67134eab 1771 elif string == '+':
d03cfdce 1772 if not current_selector:
1773 raise syntax_error('Unexpected "+"', start)
1774 selector_1 = current_selector
1775 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1776 if not selector_2:
1777 raise syntax_error('Expected a selector', start)
1778 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1779 else:
1780 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1781 elif type == tokenize.ENDMARKER:
1782 break
1783 if current_selector:
1784 selectors.append(current_selector)
1785 return selectors
1786
f8d4ad9a 1787 def _merge(formats_pair):
1788 format_1, format_2 = formats_pair
1789
1790 formats_info = []
1791 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1792 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1793
1794 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1795 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1796 for (i, fmt_info) in enumerate(formats_info):
551f9388 1797 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1798 formats_info.pop(i)
1799 continue
1800 for aud_vid in ['audio', 'video']:
f8d4ad9a 1801 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1802 if get_no_more[aud_vid]:
1803 formats_info.pop(i)
f5510afe 1804 break
f8d4ad9a 1805 get_no_more[aud_vid] = True
1806
1807 if len(formats_info) == 1:
1808 return formats_info[0]
1809
1810 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1811 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1812
1813 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1814 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1815
1816 output_ext = self.params.get('merge_output_format')
1817 if not output_ext:
1818 if the_only_video:
1819 output_ext = the_only_video['ext']
1820 elif the_only_audio and not video_fmts:
1821 output_ext = the_only_audio['ext']
1822 else:
1823 output_ext = 'mkv'
1824
1825 new_dict = {
1826 'requested_formats': formats_info,
1827 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1828 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1829 'ext': output_ext,
1830 }
1831
1832 if the_only_video:
1833 new_dict.update({
1834 'width': the_only_video.get('width'),
1835 'height': the_only_video.get('height'),
1836 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1837 'fps': the_only_video.get('fps'),
1838 'vcodec': the_only_video.get('vcodec'),
1839 'vbr': the_only_video.get('vbr'),
1840 'stretched_ratio': the_only_video.get('stretched_ratio'),
1841 })
1842
1843 if the_only_audio:
1844 new_dict.update({
1845 'acodec': the_only_audio.get('acodec'),
1846 'abr': the_only_audio.get('abr'),
1847 })
1848
1849 return new_dict
1850
e8e73840 1851 def _check_formats(formats):
981052c9 1852 if not check_formats:
1853 yield from formats
b5ac45b1 1854 return
e8e73840 1855 for f in formats:
1856 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1857 temp_file = tempfile.NamedTemporaryFile(
1858 suffix='.tmp', delete=False,
1859 dir=self.get_output_path('temp') or None)
1860 temp_file.close()
fe346461 1861 try:
981052c9 1862 success, _ = self.dl(temp_file.name, f, test=True)
1863 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1864 success = False
fe346461 1865 finally:
21cd8fae 1866 if os.path.exists(temp_file.name):
1867 try:
1868 os.remove(temp_file.name)
1869 except OSError:
1870 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1871 if success:
e8e73840 1872 yield f
1873 else:
1874 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1875
67134eab 1876 def _build_selector_function(selector):
909d24dd 1877 if isinstance(selector, list): # ,
67134eab
JMF
1878 fs = [_build_selector_function(s) for s in selector]
1879
317f7ab6 1880 def selector_function(ctx):
67134eab 1881 for f in fs:
981052c9 1882 yield from f(ctx)
67134eab 1883 return selector_function
909d24dd 1884
1885 elif selector.type == GROUP: # ()
0130afb7 1886 selector_function = _build_selector_function(selector.selector)
909d24dd 1887
1888 elif selector.type == PICKFIRST: # /
67134eab
JMF
1889 fs = [_build_selector_function(s) for s in selector.selector]
1890
317f7ab6 1891 def selector_function(ctx):
67134eab 1892 for f in fs:
317f7ab6 1893 picked_formats = list(f(ctx))
67134eab
JMF
1894 if picked_formats:
1895 return picked_formats
1896 return []
67134eab 1897
981052c9 1898 elif selector.type == MERGE: # +
1899 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1900
1901 def selector_function(ctx):
1902 for pair in itertools.product(
1903 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1904 yield _merge(pair)
1905
909d24dd 1906 elif selector.type == SINGLE: # atom
598d185d 1907 format_spec = selector.selector or 'best'
909d24dd 1908
f8d4ad9a 1909 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1910 if format_spec == 'all':
1911 def selector_function(ctx):
981052c9 1912 yield from _check_formats(ctx['formats'])
f8d4ad9a 1913 elif format_spec == 'mergeall':
1914 def selector_function(ctx):
981052c9 1915 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1916 if not formats:
1917 return
921b76ca 1918 merged_format = formats[-1]
1919 for f in formats[-2::-1]:
f8d4ad9a 1920 merged_format = _merge((merged_format, f))
1921 yield merged_format
909d24dd 1922
1923 else:
e8e73840 1924 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1925 mobj = re.match(
1926 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1927 format_spec)
1928 if mobj is not None:
1929 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1930 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1931 format_type = (mobj.group('type') or [None])[0]
1932 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1933 format_modified = mobj.group('mod') is not None
909d24dd 1934
1935 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1936 _filter_f = (
eff63539 1937 (lambda f: f.get('%scodec' % format_type) != 'none')
1938 if format_type and format_modified # bv*, ba*, wv*, wa*
1939 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1940 if format_type # bv, ba, wv, wa
1941 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1942 if not format_modified # b, w
8326b00a 1943 else lambda f: True) # b*, w*
1944 filter_f = lambda f: _filter_f(f) and (
1945 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1946 else:
909d24dd 1947 filter_f = ((lambda f: f.get('ext') == format_spec)
1948 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1949 else (lambda f: f.get('format_id') == format_spec)) # id
1950
1951 def selector_function(ctx):
1952 formats = list(ctx['formats'])
909d24dd 1953 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1954 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1955 # for extractors with incomplete formats (audio only (soundcloud)
1956 # or video only (imgur)) best/worst will fallback to
1957 # best/worst {video,audio}-only format
e8e73840 1958 matches = formats
981052c9 1959 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1960 try:
e8e73840 1961 yield matches[format_idx - 1]
981052c9 1962 except IndexError:
1963 return
083c9df9 1964
67134eab 1965 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1966
317f7ab6
S
1967 def final_selector(ctx):
1968 ctx_copy = copy.deepcopy(ctx)
67134eab 1969 for _filter in filters:
317f7ab6
S
1970 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1971 return selector_function(ctx_copy)
67134eab 1972 return final_selector
083c9df9 1973
67134eab 1974 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1975 try:
232541df 1976 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1977 except tokenize.TokenError:
1978 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1979
1980 class TokenIterator(object):
1981 def __init__(self, tokens):
1982 self.tokens = tokens
1983 self.counter = 0
1984
1985 def __iter__(self):
1986 return self
1987
1988 def __next__(self):
1989 if self.counter >= len(self.tokens):
1990 raise StopIteration()
1991 value = self.tokens[self.counter]
1992 self.counter += 1
1993 return value
1994
1995 next = __next__
1996
1997 def restore_last_token(self):
1998 self.counter -= 1
1999
2000 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2001 return _build_selector_function(parsed_selector)
a9c58ad9 2002
e5660ee6
JMF
2003 def _calc_headers(self, info_dict):
2004 res = std_headers.copy()
2005
2006 add_headers = info_dict.get('http_headers')
2007 if add_headers:
2008 res.update(add_headers)
2009
2010 cookies = self._calc_cookies(info_dict)
2011 if cookies:
2012 res['Cookie'] = cookies
2013
0016b84e
S
2014 if 'X-Forwarded-For' not in res:
2015 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2016 if x_forwarded_for_ip:
2017 res['X-Forwarded-For'] = x_forwarded_for_ip
2018
e5660ee6
JMF
2019 return res
2020
2021 def _calc_cookies(self, info_dict):
5c2266df 2022 pr = sanitized_Request(info_dict['url'])
e5660ee6 2023 self.cookiejar.add_cookie_header(pr)
662435f7 2024 return pr.get_header('Cookie')
e5660ee6 2025
b0249bca 2026 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2027 thumbnails = info_dict.get('thumbnails')
2028 if thumbnails is None:
2029 thumbnail = info_dict.get('thumbnail')
2030 if thumbnail:
2031 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2032 if thumbnails:
2033 thumbnails.sort(key=lambda t: (
2034 t.get('preference') if t.get('preference') is not None else -1,
2035 t.get('width') if t.get('width') is not None else -1,
2036 t.get('height') if t.get('height') is not None else -1,
2037 t.get('id') if t.get('id') is not None else '',
2038 t.get('url')))
b0249bca 2039
0ba692ac 2040 def thumbnail_tester():
2041 if self.params.get('check_formats'):
cca80fe6 2042 test_all = True
2043 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2044 else:
cca80fe6 2045 test_all = False
0ba692ac 2046 to_screen = self.write_debug
2047
2048 def test_thumbnail(t):
cca80fe6 2049 if not test_all and not t.get('_test_url'):
2050 return True
0ba692ac 2051 to_screen('Testing thumbnail %s' % t['id'])
2052 try:
2053 self.urlopen(HEADRequest(t['url']))
2054 except network_exceptions as err:
2055 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2056 t['id'], t['url'], error_to_compat_str(err)))
2057 return False
2058 return True
2059
2060 return test_thumbnail
b0249bca 2061
bc516a3f 2062 for i, t in enumerate(thumbnails):
bc516a3f 2063 if t.get('id') is None:
2064 t['id'] = '%d' % i
b0249bca 2065 if t.get('width') and t.get('height'):
2066 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2067 t['url'] = sanitize_url(t['url'])
0ba692ac 2068
2069 if self.params.get('check_formats') is not False:
2070 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2071 else:
2072 info_dict['thumbnails'] = thumbnails
bc516a3f 2073
dd82ffea
JMF
2074 def process_video_result(self, info_dict, download=True):
2075 assert info_dict.get('_type', 'video') == 'video'
2076
bec1fad2
PH
2077 if 'id' not in info_dict:
2078 raise ExtractorError('Missing "id" field in extractor result')
2079 if 'title' not in info_dict:
1151c407 2080 raise ExtractorError('Missing "title" field in extractor result',
2081 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2082
c9969434
S
2083 def report_force_conversion(field, field_not, conversion):
2084 self.report_warning(
2085 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2086 % (field, field_not, conversion))
2087
2088 def sanitize_string_field(info, string_field):
2089 field = info.get(string_field)
2090 if field is None or isinstance(field, compat_str):
2091 return
2092 report_force_conversion(string_field, 'a string', 'string')
2093 info[string_field] = compat_str(field)
2094
2095 def sanitize_numeric_fields(info):
2096 for numeric_field in self._NUMERIC_FIELDS:
2097 field = info.get(numeric_field)
2098 if field is None or isinstance(field, compat_numeric_types):
2099 continue
2100 report_force_conversion(numeric_field, 'numeric', 'int')
2101 info[numeric_field] = int_or_none(field)
2102
2103 sanitize_string_field(info_dict, 'id')
2104 sanitize_numeric_fields(info_dict)
be6217b2 2105
dd82ffea
JMF
2106 if 'playlist' not in info_dict:
2107 # It isn't part of a playlist
2108 info_dict['playlist'] = None
2109 info_dict['playlist_index'] = None
2110
bc516a3f 2111 self._sanitize_thumbnails(info_dict)
d5519808 2112
536a55da 2113 thumbnail = info_dict.get('thumbnail')
bc516a3f 2114 thumbnails = info_dict.get('thumbnails')
536a55da
S
2115 if thumbnail:
2116 info_dict['thumbnail'] = sanitize_url(thumbnail)
2117 elif thumbnails:
d5519808
PH
2118 info_dict['thumbnail'] = thumbnails[-1]['url']
2119
ae30b840 2120 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2121 info_dict['display_id'] = info_dict['id']
2122
10db0d2f 2123 for ts_key, date_key in (
2124 ('timestamp', 'upload_date'),
2125 ('release_timestamp', 'release_date'),
2126 ):
2127 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2128 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2129 # see http://bugs.python.org/issue1646728)
2130 try:
2131 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2132 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2133 except (ValueError, OverflowError, OSError):
2134 pass
9d2ecdbc 2135
ae30b840 2136 live_keys = ('is_live', 'was_live')
2137 live_status = info_dict.get('live_status')
2138 if live_status is None:
2139 for key in live_keys:
2140 if info_dict.get(key) is False:
2141 continue
2142 if info_dict.get(key):
2143 live_status = key
2144 break
2145 if all(info_dict.get(key) is False for key in live_keys):
2146 live_status = 'not_live'
2147 if live_status:
2148 info_dict['live_status'] = live_status
2149 for key in live_keys:
2150 if info_dict.get(key) is None:
2151 info_dict[key] = (live_status == key)
2152
33d2fc2f
S
2153 # Auto generate title fields corresponding to the *_number fields when missing
2154 # in order to always have clean titles. This is very common for TV series.
2155 for field in ('chapter', 'season', 'episode'):
2156 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2157 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2158
05108a49
S
2159 for cc_kind in ('subtitles', 'automatic_captions'):
2160 cc = info_dict.get(cc_kind)
2161 if cc:
2162 for _, subtitle in cc.items():
2163 for subtitle_format in subtitle:
2164 if subtitle_format.get('url'):
2165 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2166 if subtitle_format.get('ext') is None:
2167 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2168
2169 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2170 subtitles = info_dict.get('subtitles')
4bba3716 2171
360e1ca5 2172 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2173 info_dict['id'], subtitles, automatic_captions)
a504ced0 2174
dd82ffea
JMF
2175 # We now pick which formats have to be downloaded
2176 if info_dict.get('formats') is None:
2177 # There's only one format available
2178 formats = [info_dict]
2179 else:
2180 formats = info_dict['formats']
2181
e0493e90 2182 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2183 if not self.params.get('allow_unplayable_formats'):
2184 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2185
db95dc13 2186 if not formats:
1151c407 2187 self.raise_no_formats(info_dict)
db95dc13 2188
73af5cc8
S
2189 def is_wellformed(f):
2190 url = f.get('url')
a5ac0c47 2191 if not url:
73af5cc8
S
2192 self.report_warning(
2193 '"url" field is missing or empty - skipping format, '
2194 'there is an error in extractor')
a5ac0c47
S
2195 return False
2196 if isinstance(url, bytes):
2197 sanitize_string_field(f, 'url')
2198 return True
73af5cc8
S
2199
2200 # Filter out malformed formats for better extraction robustness
2201 formats = list(filter(is_wellformed, formats))
2202
181c7053
S
2203 formats_dict = {}
2204
dd82ffea 2205 # We check that all the formats have the format and format_id fields
db95dc13 2206 for i, format in enumerate(formats):
c9969434
S
2207 sanitize_string_field(format, 'format_id')
2208 sanitize_numeric_fields(format)
dcf77cf1 2209 format['url'] = sanitize_url(format['url'])
e74e3b63 2210 if not format.get('format_id'):
8016c922 2211 format['format_id'] = compat_str(i)
e2effb08
S
2212 else:
2213 # Sanitize format_id from characters used in format selector expression
ec85ded8 2214 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2215 format_id = format['format_id']
2216 if format_id not in formats_dict:
2217 formats_dict[format_id] = []
2218 formats_dict[format_id].append(format)
2219
2220 # Make sure all formats have unique format_id
2221 for format_id, ambiguous_formats in formats_dict.items():
2222 if len(ambiguous_formats) > 1:
2223 for i, format in enumerate(ambiguous_formats):
2224 format['format_id'] = '%s-%d' % (format_id, i)
2225
2226 for i, format in enumerate(formats):
8c51aa65 2227 if format.get('format') is None:
6febd1c1 2228 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2229 id=format['format_id'],
2230 res=self.format_resolution(format),
b868936c 2231 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2232 )
c1002e96 2233 # Automatically determine file extension if missing
5b1d8575 2234 if format.get('ext') is None:
cce929ea 2235 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2236 # Automatically determine protocol if missing (useful for format
2237 # selection purposes)
6f0be937 2238 if format.get('protocol') is None:
b5559424 2239 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2240 # Add HTTP headers, so that external programs can use them from the
2241 # json output
2242 full_format_info = info_dict.copy()
2243 full_format_info.update(format)
2244 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2245 # Remove private housekeeping stuff
2246 if '__x_forwarded_for_ip' in info_dict:
2247 del info_dict['__x_forwarded_for_ip']
dd82ffea 2248
4bcc7bd1 2249 # TODO Central sorting goes here
99e206d5 2250
88acdbc2 2251 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2252 # only set the 'formats' fields if the original info_dict list them
2253 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2254 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2255 # which can't be exported to json
b3d9ef88 2256 info_dict['formats'] = formats
4ec82a72 2257
2258 info_dict, _ = self.pre_process(info_dict)
2259
b7b04c78 2260 if self.params.get('list_thumbnails'):
2261 self.list_thumbnails(info_dict)
2262 if self.params.get('listformats'):
86c66b2d 2263 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2264 self.to_screen('%s has no formats' % info_dict['id'])
2265 else:
2266 self.list_formats(info_dict)
b7b04c78 2267 if self.params.get('listsubtitles'):
2268 if 'automatic_captions' in info_dict:
2269 self.list_subtitles(
2270 info_dict['id'], automatic_captions, 'automatic captions')
2271 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2272 list_only = self.params.get('simulate') is None and (
2273 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2274 if list_only:
b7b04c78 2275 # Without this printing, -F --print-json will not work
169dbde9 2276 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2277 return
2278
187986a8 2279 format_selector = self.format_selector
2280 if format_selector is None:
0017d9ad 2281 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2282 self.write_debug('Default format spec: %s' % req_format)
187986a8 2283 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2284
2285 # While in format selection we may need to have an access to the original
2286 # format set in order to calculate some metrics or do some processing.
2287 # For now we need to be able to guess whether original formats provided
2288 # by extractor are incomplete or not (i.e. whether extractor provides only
2289 # video-only or audio-only formats) for proper formats selection for
2290 # extractors with such incomplete formats (see
067aa17e 2291 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2292 # Since formats may be filtered during format selection and may not match
2293 # the original formats the results may be incorrect. Thus original formats
2294 # or pre-calculated metrics should be passed to format selection routines
2295 # as well.
2296 # We will pass a context object containing all necessary additional data
2297 # instead of just formats.
2298 # This fixes incorrect format selection issue (see
067aa17e 2299 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2300 incomplete_formats = (
317f7ab6 2301 # All formats are video-only or
3089bc74 2302 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2303 # all formats are audio-only
3089bc74 2304 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2305
2306 ctx = {
2307 'formats': formats,
2308 'incomplete_formats': incomplete_formats,
2309 }
2310
2311 formats_to_download = list(format_selector(ctx))
dd82ffea 2312 if not formats_to_download:
b7da73eb 2313 if not self.params.get('ignore_no_formats_error'):
1151c407 2314 raise ExtractorError('Requested format is not available', expected=True,
2315 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2316 else:
2317 self.report_warning('Requested format is not available')
4513a41a
A
2318 # Process what we can, even without any available formats.
2319 self.process_info(dict(info_dict))
b7da73eb 2320 elif download:
2321 self.to_screen(
07cce701 2322 '[info] %s: Downloading %d format(s): %s' % (
2323 info_dict['id'], len(formats_to_download),
2324 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2325 for fmt in formats_to_download:
dd82ffea 2326 new_info = dict(info_dict)
4ec82a72 2327 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2328 new_info['__original_infodict'] = info_dict
b7da73eb 2329 new_info.update(fmt)
dd82ffea
JMF
2330 self.process_info(new_info)
2331 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2332 if formats_to_download:
2333 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2334 return info_dict
2335
98c70d6f 2336 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2337 """Select the requested subtitles and their format"""
98c70d6f
JMF
2338 available_subs = {}
2339 if normal_subtitles and self.params.get('writesubtitles'):
2340 available_subs.update(normal_subtitles)
2341 if automatic_captions and self.params.get('writeautomaticsub'):
2342 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2343 if lang not in available_subs:
2344 available_subs[lang] = cap_info
2345
4d171848
JMF
2346 if (not self.params.get('writesubtitles') and not
2347 self.params.get('writeautomaticsub') or not
2348 available_subs):
2349 return None
a504ced0 2350
c32b0aab 2351 all_sub_langs = available_subs.keys()
a504ced0 2352 if self.params.get('allsubtitles', False):
c32b0aab 2353 requested_langs = all_sub_langs
2354 elif self.params.get('subtitleslangs', False):
77c4a9ef 2355 # A list is used so that the order of languages will be the same as
2356 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2357 requested_langs = []
2358 for lang_re in self.params.get('subtitleslangs'):
2359 if lang_re == 'all':
2360 requested_langs.extend(all_sub_langs)
c32b0aab 2361 continue
77c4a9ef 2362 discard = lang_re[0] == '-'
c32b0aab 2363 if discard:
77c4a9ef 2364 lang_re = lang_re[1:]
2365 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2366 if discard:
2367 for lang in current_langs:
77c4a9ef 2368 while lang in requested_langs:
2369 requested_langs.remove(lang)
c32b0aab 2370 else:
77c4a9ef 2371 requested_langs.extend(current_langs)
2372 requested_langs = orderedSet(requested_langs)
c32b0aab 2373 elif 'en' in available_subs:
2374 requested_langs = ['en']
a504ced0 2375 else:
c32b0aab 2376 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2377 if requested_langs:
2378 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2379
2380 formats_query = self.params.get('subtitlesformat', 'best')
2381 formats_preference = formats_query.split('/') if formats_query else []
2382 subs = {}
2383 for lang in requested_langs:
2384 formats = available_subs.get(lang)
2385 if formats is None:
2386 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2387 continue
a504ced0
JMF
2388 for ext in formats_preference:
2389 if ext == 'best':
2390 f = formats[-1]
2391 break
2392 matches = list(filter(lambda f: f['ext'] == ext, formats))
2393 if matches:
2394 f = matches[-1]
2395 break
2396 else:
2397 f = formats[-1]
2398 self.report_warning(
2399 'No subtitle format found matching "%s" for language %s, '
2400 'using %s' % (formats_query, lang, f['ext']))
2401 subs[lang] = f
2402 return subs
2403
d06daf23 2404 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2405 def print_mandatory(field, actual_field=None):
2406 if actual_field is None:
2407 actual_field = field
d06daf23 2408 if (self.params.get('force%s' % field, False)
53c18592 2409 and (not incomplete or info_dict.get(actual_field) is not None)):
2410 self.to_stdout(info_dict[actual_field])
d06daf23
S
2411
2412 def print_optional(field):
2413 if (self.params.get('force%s' % field, False)
2414 and info_dict.get(field) is not None):
2415 self.to_stdout(info_dict[field])
2416
53c18592 2417 info_dict = info_dict.copy()
2418 if filename is not None:
2419 info_dict['filename'] = filename
2420 if info_dict.get('requested_formats') is not None:
2421 # For RTMP URLs, also include the playpath
2422 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2423 elif 'url' in info_dict:
2424 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2425
2b8a2973 2426 if self.params.get('forceprint') or self.params.get('forcejson'):
2427 self.post_extract(info_dict)
53c18592 2428 for tmpl in self.params.get('forceprint', []):
2429 if re.match(r'\w+$', tmpl):
2430 tmpl = '%({})s'.format(tmpl)
2431 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2432 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2433
d06daf23
S
2434 print_mandatory('title')
2435 print_mandatory('id')
53c18592 2436 print_mandatory('url', 'urls')
d06daf23
S
2437 print_optional('thumbnail')
2438 print_optional('description')
53c18592 2439 print_optional('filename')
b868936c 2440 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2441 self.to_stdout(formatSeconds(info_dict['duration']))
2442 print_mandatory('format')
53c18592 2443
2b8a2973 2444 if self.params.get('forcejson'):
6e84b215 2445 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2446
e8e73840 2447 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2448 if not info.get('url'):
1151c407 2449 self.raise_no_formats(info, True)
e8e73840 2450
2451 if test:
2452 verbose = self.params.get('verbose')
2453 params = {
2454 'test': True,
2455 'quiet': not verbose,
2456 'verbose': verbose,
2457 'noprogress': not verbose,
2458 'nopart': True,
2459 'skip_unavailable_fragments': False,
2460 'keep_fragments': False,
2461 'overwrites': True,
2462 '_no_ytdl_file': True,
2463 }
2464 else:
2465 params = self.params
96fccc10 2466 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2467 if not test:
2468 for ph in self._progress_hooks:
2469 fd.add_progress_hook(ph)
18e674b4 2470 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2471 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2472 new_info = dict(info)
2473 if new_info.get('http_headers') is None:
2474 new_info['http_headers'] = self._calc_headers(new_info)
2475 return fd.download(name, new_info, subtitle)
2476
8222d8de
JMF
2477 def process_info(self, info_dict):
2478 """Process a single resolved IE result."""
2479
2480 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2481
2482 max_downloads = self.params.get('max_downloads')
2483 if max_downloads is not None:
2484 if self._num_downloads >= int(max_downloads):
2485 raise MaxDownloadsReached()
8222d8de 2486
d06daf23 2487 # TODO: backward compatibility, to be removed
8222d8de 2488 info_dict['fulltitle'] = info_dict['title']
8222d8de 2489
4513a41a 2490 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2491 info_dict['format'] = info_dict['ext']
2492
c77495e3 2493 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2494 return
2495
277d6ff5 2496 self.post_extract(info_dict)
fd288278 2497 self._num_downloads += 1
8222d8de 2498
dcf64d43 2499 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2500 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2501 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2502 files_to_move = {}
8222d8de
JMF
2503
2504 # Forced printings
4513a41a 2505 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2506
b7b04c78 2507 if self.params.get('simulate'):
2d30509f 2508 if self.params.get('force_write_download_archive', False):
2509 self.record_download_archive(info_dict)
2d30509f 2510 # Do nothing else if in simulate mode
8222d8de
JMF
2511 return
2512
de6000d9 2513 if full_filename is None:
8222d8de 2514 return
e92caff5 2515 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2516 return
e92caff5 2517 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2518 return
2519
80c03fa9 2520 if self._write_description('video', info_dict,
2521 self.prepare_filename(info_dict, 'description')) is None:
2522 return
2523
2524 sub_files = self._write_subtitles(info_dict, temp_filename)
2525 if sub_files is None:
2526 return
2527 files_to_move.update(dict(sub_files))
2528
2529 thumb_files = self._write_thumbnails(
2530 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2531 if thumb_files is None:
2532 return
2533 files_to_move.update(dict(thumb_files))
8222d8de 2534
80c03fa9 2535 infofn = self.prepare_filename(info_dict, 'infojson')
2536 _infojson_written = self._write_info_json('video', info_dict, infofn)
2537 if _infojson_written:
2538 info_dict['__infojson_filename'] = infofn
2539 elif _infojson_written is None:
2540 return
2541
2542 # Note: Annotations are deprecated
2543 annofn = None
1fb07d10 2544 if self.params.get('writeannotations', False):
de6000d9 2545 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2546 if annofn:
e92caff5 2547 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2548 return
0c3d0f51 2549 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2550 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2551 elif not info_dict.get('annotations'):
2552 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2553 else:
2554 try:
6febd1c1 2555 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2556 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2557 annofile.write(info_dict['annotations'])
2558 except (KeyError, TypeError):
6febd1c1 2559 self.report_warning('There are no annotations to write.')
7b6fefc9 2560 except (OSError, IOError):
6febd1c1 2561 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2562 return
1fb07d10 2563
732044af 2564 # Write internet shortcut files
2565 url_link = webloc_link = desktop_link = False
2566 if self.params.get('writelink', False):
2567 if sys.platform == "darwin": # macOS.
2568 webloc_link = True
2569 elif sys.platform.startswith("linux"):
2570 desktop_link = True
2571 else: # if sys.platform in ['win32', 'cygwin']:
2572 url_link = True
2573 if self.params.get('writeurllink', False):
2574 url_link = True
2575 if self.params.get('writewebloclink', False):
2576 webloc_link = True
2577 if self.params.get('writedesktoplink', False):
2578 desktop_link = True
2579
2580 if url_link or webloc_link or desktop_link:
2581 if 'webpage_url' not in info_dict:
2582 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2583 return
2584 ascii_url = iri_to_uri(info_dict['webpage_url'])
2585
2586 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2587 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2588 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2589 self.to_screen('[info] Internet shortcut is already present')
2590 else:
2591 try:
2592 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2593 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2594 template_vars = {'url': ascii_url}
2595 if embed_filename:
2596 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2597 linkfile.write(template % template_vars)
2598 except (OSError, IOError):
2599 self.report_error('Cannot write internet shortcut ' + linkfn)
2600 return False
2601 return True
2602
2603 if url_link:
2604 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2605 return
2606 if webloc_link:
2607 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2608 return
2609 if desktop_link:
2610 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2611 return
2612
56d868db 2613 try:
2614 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2615 except PostProcessingError as err:
2616 self.report_error('Preprocessing: %s' % str(err))
2617 return
2618
732044af 2619 must_record_download_archive = False
56d868db 2620 if self.params.get('skip_download', False):
2621 info_dict['filepath'] = temp_filename
2622 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2623 info_dict['__files_to_move'] = files_to_move
2624 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2625 else:
2626 # Download
b868936c 2627 info_dict.setdefault('__postprocessors', [])
4340deca 2628 try:
0202b52a 2629
6b591b29 2630 def existing_file(*filepaths):
2631 ext = info_dict.get('ext')
2632 final_ext = self.params.get('final_ext', ext)
2633 existing_files = []
2634 for file in orderedSet(filepaths):
2635 if final_ext != ext:
2636 converted = replace_extension(file, final_ext, ext)
2637 if os.path.exists(encodeFilename(converted)):
2638 existing_files.append(converted)
2639 if os.path.exists(encodeFilename(file)):
2640 existing_files.append(file)
2641
2642 if not existing_files or self.params.get('overwrites', False):
2643 for file in orderedSet(existing_files):
2644 self.report_file_delete(file)
2645 os.remove(encodeFilename(file))
2646 return None
2647
6b591b29 2648 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2649 return existing_files[0]
0202b52a 2650
2651 success = True
4340deca 2652 if info_dict.get('requested_formats') is not None:
81cd954a
S
2653
2654 def compatible_formats(formats):
d03cfdce 2655 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2656 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2657 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2658 if len(video_formats) > 2 or len(audio_formats) > 2:
2659 return False
2660
81cd954a 2661 # Check extension
d03cfdce 2662 exts = set(format.get('ext') for format in formats)
2663 COMPATIBLE_EXTS = (
2664 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2665 set(('webm',)),
2666 )
2667 for ext_sets in COMPATIBLE_EXTS:
2668 if ext_sets.issuperset(exts):
2669 return True
81cd954a
S
2670 # TODO: Check acodec/vcodec
2671 return False
2672
2673 requested_formats = info_dict['requested_formats']
0202b52a 2674 old_ext = info_dict['ext']
3b297919 2675 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2676 info_dict['ext'] = 'mkv'
2677 self.report_warning(
2678 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2679 new_ext = info_dict['ext']
0202b52a 2680
124bc071 2681 def correct_ext(filename, ext=new_ext):
96fccc10 2682 if filename == '-':
2683 return filename
0202b52a 2684 filename_real_ext = os.path.splitext(filename)[1][1:]
2685 filename_wo_ext = (
2686 os.path.splitext(filename)[0]
124bc071 2687 if filename_real_ext in (old_ext, new_ext)
0202b52a 2688 else filename)
124bc071 2689 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2690
38c6902b 2691 # Ensure filename always has a correct extension for successful merge
0202b52a 2692 full_filename = correct_ext(full_filename)
2693 temp_filename = correct_ext(temp_filename)
2694 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2695 info_dict['__real_download'] = False
18e674b4 2696
2697 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2698 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2699 info_dict['protocol'] = _protocols.pop()
d5fe04f5 2700 directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
dbf5416a 2701 if dl_filename is not None:
6c7274ec 2702 self.report_file_already_downloaded(dl_filename)
96fccc10 2703 elif (directly_mergable and get_suitable_downloader(
a46a815b 2704 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2705 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2706 success, real_download = self.dl(temp_filename, info_dict)
2707 info_dict['__real_download'] = real_download
18e674b4 2708 else:
2709 downloaded = []
2710 merger = FFmpegMergerPP(self)
2711 if self.params.get('allow_unplayable_formats'):
2712 self.report_warning(
2713 'You have requested merging of multiple formats '
2714 'while also allowing unplayable formats to be downloaded. '
2715 'The formats won\'t be merged to prevent data corruption.')
2716 elif not merger.available:
2717 self.report_warning(
2718 'You have requested merging of multiple formats but ffmpeg is not installed. '
2719 'The formats won\'t be merged.')
2720
96fccc10 2721 if temp_filename == '-':
2722 reason = ('using a downloader other than ffmpeg' if directly_mergable
2723 else 'but the formats are incompatible for simultaneous download' if merger.available
2724 else 'but ffmpeg is not installed')
2725 self.report_warning(
2726 f'You have requested downloading multiple formats to stdout {reason}. '
2727 'The formats will be streamed one after the other')
2728 fname = temp_filename
dbf5416a 2729 for f in requested_formats:
2730 new_info = dict(info_dict)
2731 del new_info['requested_formats']
2732 new_info.update(f)
96fccc10 2733 if temp_filename != '-':
124bc071 2734 fname = prepend_extension(
2735 correct_ext(temp_filename, new_info['ext']),
2736 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2737 if not self._ensure_dir_exists(fname):
2738 return
a21e0ab1 2739 f['filepath'] = fname
96fccc10 2740 downloaded.append(fname)
dbf5416a 2741 partial_success, real_download = self.dl(fname, new_info)
2742 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2743 success = success and partial_success
2744 if merger.available and not self.params.get('allow_unplayable_formats'):
2745 info_dict['__postprocessors'].append(merger)
2746 info_dict['__files_to_merge'] = downloaded
2747 # Even if there were no downloads, it is being merged only now
2748 info_dict['__real_download'] = True
2749 else:
2750 for file in downloaded:
2751 files_to_move[file] = None
4340deca
P
2752 else:
2753 # Just a single file
0202b52a 2754 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2755 if dl_filename is None or dl_filename == temp_filename:
2756 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2757 # So we should try to resume the download
e8e73840 2758 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2759 info_dict['__real_download'] = real_download
6c7274ec 2760 else:
2761 self.report_file_already_downloaded(dl_filename)
0202b52a 2762
0202b52a 2763 dl_filename = dl_filename or temp_filename
c571435f 2764 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2765
3158150c 2766 except network_exceptions as err:
7960b056 2767 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2768 return
2769 except (OSError, IOError) as err:
2770 raise UnavailableVideoError(err)
2771 except (ContentTooShortError, ) as err:
2772 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2773 return
8222d8de 2774
de6000d9 2775 if success and full_filename != '-':
f17f8651 2776
fd7cfb64 2777 def fixup():
2778 do_fixup = True
2779 fixup_policy = self.params.get('fixup')
2780 vid = info_dict['id']
2781
2782 if fixup_policy in ('ignore', 'never'):
2783 return
2784 elif fixup_policy == 'warn':
2785 do_fixup = False
f89b3e2d 2786 elif fixup_policy != 'force':
2787 assert fixup_policy in ('detect_or_warn', None)
2788 if not info_dict.get('__real_download'):
2789 do_fixup = False
fd7cfb64 2790
2791 def ffmpeg_fixup(cndn, msg, cls):
2792 if not cndn:
2793 return
2794 if not do_fixup:
2795 self.report_warning(f'{vid}: {msg}')
2796 return
2797 pp = cls(self)
2798 if pp.available:
2799 info_dict['__postprocessors'].append(pp)
2800 else:
2801 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2802
2803 stretched_ratio = info_dict.get('stretched_ratio')
2804 ffmpeg_fixup(
2805 stretched_ratio not in (1, None),
2806 f'Non-uniform pixel ratio {stretched_ratio}',
2807 FFmpegFixupStretchedPP)
2808
2809 ffmpeg_fixup(
2810 (info_dict.get('requested_formats') is None
2811 and info_dict.get('container') == 'm4a_dash'
2812 and info_dict.get('ext') == 'm4a'),
2813 'writing DASH m4a. Only some players support this container',
2814 FFmpegFixupM4aPP)
2815
2816 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2817 if 'protocol' in info_dict else None)
2818 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2819 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2820 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2821
2822 fixup()
8222d8de 2823 try:
23c1a667 2824 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2825 except PostProcessingError as err:
2826 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2827 return
ab8e5e51
AM
2828 try:
2829 for ph in self._post_hooks:
23c1a667 2830 ph(info_dict['filepath'])
ab8e5e51
AM
2831 except Exception as err:
2832 self.report_error('post hooks: %s' % str(err))
2833 return
2d30509f 2834 must_record_download_archive = True
2835
2836 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2837 self.record_download_archive(info_dict)
c3e6ffba 2838 max_downloads = self.params.get('max_downloads')
2839 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2840 raise MaxDownloadsReached()
8222d8de
JMF
2841
2842 def download(self, url_list):
2843 """Download a given list of URLs."""
de6000d9 2844 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2845 if (len(url_list) > 1
2846 and outtmpl != '-'
2847 and '%' not in outtmpl
2848 and self.params.get('max_downloads') != 1):
acd69589 2849 raise SameFileError(outtmpl)
8222d8de
JMF
2850
2851 for url in url_list:
2852 try:
5f6a1245 2853 # It also downloads the videos
61aa5ba3
S
2854 res = self.extract_info(
2855 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2856 except UnavailableVideoError:
6febd1c1 2857 self.report_error('unable to download video')
8222d8de 2858 except MaxDownloadsReached:
8f18aca8 2859 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2860 raise
2861 except ExistingVideoReached:
8f18aca8 2862 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2863 raise
2864 except RejectedVideoReached:
8f18aca8 2865 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2866 raise
63e0be34
PH
2867 else:
2868 if self.params.get('dump_single_json', False):
277d6ff5 2869 self.post_extract(res)
6e84b215 2870 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2871
2872 return self._download_retcode
2873
1dcc4c0c 2874 def download_with_info_file(self, info_filename):
31bd3925
JMF
2875 with contextlib.closing(fileinput.FileInput(
2876 [info_filename], mode='r',
2877 openhook=fileinput.hook_encoded('utf-8'))) as f:
2878 # FileInput doesn't have a read method, we can't call json.load
8012d892 2879 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2880 try:
2881 self.process_ie_result(info, download=True)
d3f62c19 2882 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2883 webpage_url = info.get('webpage_url')
2884 if webpage_url is not None:
6febd1c1 2885 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2886 return self.download([webpage_url])
2887 else:
2888 raise
2889 return self._download_retcode
1dcc4c0c 2890
cb202fd2 2891 @staticmethod
8012d892 2892 def sanitize_info(info_dict, remove_private_keys=False):
2893 ''' Sanitize the infodict for converting to json '''
3ad56b42 2894 if info_dict is None:
2895 return info_dict
6e84b215 2896 info_dict.setdefault('epoch', int(time.time()))
2897 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2898 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2899 if remove_private_keys:
6e84b215 2900 remove_keys |= {
2901 'requested_formats', 'requested_subtitles', 'requested_entries',
2902 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2903 }
ae8f99e6 2904 empty_values = (None, {}, [], set(), tuple())
2905 reject = lambda k, v: k not in keep_keys and (
2906 k.startswith('_') or k in remove_keys or v in empty_values)
2907 else:
ae8f99e6 2908 reject = lambda k, v: k in remove_keys
5226731e 2909 filter_fn = lambda obj: (
b0249bca 2910 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2911 else obj if not isinstance(obj, dict)
ae8f99e6 2912 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2913 return filter_fn(info_dict)
cb202fd2 2914
8012d892 2915 @staticmethod
2916 def filter_requested_info(info_dict, actually_filter=True):
2917 ''' Alias of sanitize_info for backward compatibility '''
2918 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2919
dcf64d43 2920 def run_pp(self, pp, infodict):
5bfa4862 2921 files_to_delete = []
dcf64d43 2922 if '__files_to_move' not in infodict:
2923 infodict['__files_to_move'] = {}
b1940459 2924 try:
2925 files_to_delete, infodict = pp.run(infodict)
2926 except PostProcessingError as e:
2927 # Must be True and not 'only_download'
2928 if self.params.get('ignoreerrors') is True:
2929 self.report_error(e)
2930 return infodict
2931 raise
2932
5bfa4862 2933 if not files_to_delete:
dcf64d43 2934 return infodict
5bfa4862 2935 if self.params.get('keepvideo', False):
2936 for f in files_to_delete:
dcf64d43 2937 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2938 else:
2939 for old_filename in set(files_to_delete):
2940 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2941 try:
2942 os.remove(encodeFilename(old_filename))
2943 except (IOError, OSError):
2944 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2945 if old_filename in infodict['__files_to_move']:
2946 del infodict['__files_to_move'][old_filename]
2947 return infodict
5bfa4862 2948
277d6ff5 2949 @staticmethod
2950 def post_extract(info_dict):
2951 def actual_post_extract(info_dict):
2952 if info_dict.get('_type') in ('playlist', 'multi_video'):
2953 for video_dict in info_dict.get('entries', {}):
b050d210 2954 actual_post_extract(video_dict or {})
277d6ff5 2955 return
2956
07cce701 2957 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2958 extra = post_extractor().items()
2959 info_dict.update(extra)
07cce701 2960 info_dict.pop('__post_extractor', None)
277d6ff5 2961
4ec82a72 2962 original_infodict = info_dict.get('__original_infodict') or {}
2963 original_infodict.update(extra)
2964 original_infodict.pop('__post_extractor', None)
2965
b050d210 2966 actual_post_extract(info_dict or {})
277d6ff5 2967
56d868db 2968 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2969 info = dict(ie_info)
56d868db 2970 info['__files_to_move'] = files_to_move or {}
2971 for pp in self._pps[key]:
dcf64d43 2972 info = self.run_pp(pp, info)
56d868db 2973 return info, info.pop('__files_to_move', None)
5bfa4862 2974
dcf64d43 2975 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2976 """Run all the postprocessors on the given file."""
2977 info = dict(ie_info)
2978 info['filepath'] = filename
dcf64d43 2979 info['__files_to_move'] = files_to_move or {}
0202b52a 2980
56d868db 2981 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2982 info = self.run_pp(pp, info)
2983 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2984 del info['__files_to_move']
56d868db 2985 for pp in self._pps['after_move']:
dcf64d43 2986 info = self.run_pp(pp, info)
23c1a667 2987 return info
c1c9a79c 2988
5db07df6 2989 def _make_archive_id(self, info_dict):
e9fef7ee
S
2990 video_id = info_dict.get('id')
2991 if not video_id:
2992 return
5db07df6
PH
2993 # Future-proof against any change in case
2994 # and backwards compatibility with prior versions
e9fef7ee 2995 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2996 if extractor is None:
1211bb6d
S
2997 url = str_or_none(info_dict.get('url'))
2998 if not url:
2999 return
e9fef7ee 3000 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3001 for ie_key, ie in self._ies.items():
1211bb6d 3002 if ie.suitable(url):
8b7491c8 3003 extractor = ie_key
e9fef7ee
S
3004 break
3005 else:
3006 return
d0757229 3007 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3008
3009 def in_download_archive(self, info_dict):
3010 fn = self.params.get('download_archive')
3011 if fn is None:
3012 return False
3013
3014 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3015 if not vid_id:
7012b23c 3016 return False # Incomplete video information
5db07df6 3017
a45e8619 3018 return vid_id in self.archive
c1c9a79c
PH
3019
3020 def record_download_archive(self, info_dict):
3021 fn = self.params.get('download_archive')
3022 if fn is None:
3023 return
5db07df6
PH
3024 vid_id = self._make_archive_id(info_dict)
3025 assert vid_id
c1c9a79c 3026 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3027 archive_file.write(vid_id + '\n')
a45e8619 3028 self.archive.add(vid_id)
dd82ffea 3029
8c51aa65 3030 @staticmethod
8abeeb94 3031 def format_resolution(format, default='unknown'):
fb04e403 3032 if format.get('vcodec') == 'none':
8326b00a 3033 if format.get('acodec') == 'none':
3034 return 'images'
fb04e403 3035 return 'audio only'
f49d89ee
PH
3036 if format.get('resolution') is not None:
3037 return format['resolution']
35615307
DA
3038 if format.get('width') and format.get('height'):
3039 res = '%dx%d' % (format['width'], format['height'])
3040 elif format.get('height'):
3041 res = '%sp' % format['height']
3042 elif format.get('width'):
388ae76b 3043 res = '%dx?' % format['width']
8c51aa65 3044 else:
8abeeb94 3045 res = default
8c51aa65
JMF
3046 return res
3047
c57f7757
PH
3048 def _format_note(self, fdict):
3049 res = ''
3050 if fdict.get('ext') in ['f4f', 'f4m']:
3051 res += '(unsupported) '
32f90364
PH
3052 if fdict.get('language'):
3053 if res:
3054 res += ' '
9016d76f 3055 res += '[%s] ' % fdict['language']
c57f7757
PH
3056 if fdict.get('format_note') is not None:
3057 res += fdict['format_note'] + ' '
3058 if fdict.get('tbr') is not None:
3059 res += '%4dk ' % fdict['tbr']
3060 if fdict.get('container') is not None:
3061 if res:
3062 res += ', '
3063 res += '%s container' % fdict['container']
3089bc74
S
3064 if (fdict.get('vcodec') is not None
3065 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3066 if res:
3067 res += ', '
3068 res += fdict['vcodec']
91c7271a 3069 if fdict.get('vbr') is not None:
c57f7757
PH
3070 res += '@'
3071 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3072 res += 'video@'
3073 if fdict.get('vbr') is not None:
3074 res += '%4dk' % fdict['vbr']
fbb21cf5 3075 if fdict.get('fps') is not None:
5d583bdf
S
3076 if res:
3077 res += ', '
3078 res += '%sfps' % fdict['fps']
c57f7757
PH
3079 if fdict.get('acodec') is not None:
3080 if res:
3081 res += ', '
3082 if fdict['acodec'] == 'none':
3083 res += 'video only'
3084 else:
3085 res += '%-5s' % fdict['acodec']
3086 elif fdict.get('abr') is not None:
3087 if res:
3088 res += ', '
3089 res += 'audio'
3090 if fdict.get('abr') is not None:
3091 res += '@%3dk' % fdict['abr']
3092 if fdict.get('asr') is not None:
3093 res += ' (%5dHz)' % fdict['asr']
3094 if fdict.get('filesize') is not None:
3095 if res:
3096 res += ', '
3097 res += format_bytes(fdict['filesize'])
9732d77e
PH
3098 elif fdict.get('filesize_approx') is not None:
3099 if res:
3100 res += ', '
3101 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3102 return res
91c7271a 3103
c57f7757 3104 def list_formats(self, info_dict):
94badb25 3105 formats = info_dict.get('formats', [info_dict])
53ed7066 3106 new_format = (
3107 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3108 and self.params.get('listformats_table', True) is not False)
76d321f6 3109 if new_format:
3110 table = [
3111 [
3112 format_field(f, 'format_id'),
3113 format_field(f, 'ext'),
3114 self.format_resolution(f),
3115 format_field(f, 'fps', '%d'),
3116 '|',
3117 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3118 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3119 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3120 '|',
3121 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3122 format_field(f, 'vbr', '%4dk'),
3123 format_field(f, 'acodec', default='unknown').replace('none', ''),
3124 format_field(f, 'abr', '%3dk'),
3125 format_field(f, 'asr', '%5dHz'),
3f698246 3126 ', '.join(filter(None, (
3127 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3128 format_field(f, 'language', '[%s]'),
3129 format_field(f, 'format_note'),
3130 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3131 ))),
3f698246 3132 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3133 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3134 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3135 else:
3136 table = [
3137 [
3138 format_field(f, 'format_id'),
3139 format_field(f, 'ext'),
3140 self.format_resolution(f),
3141 self._format_note(f)]
3142 for f in formats
3143 if f.get('preference') is None or f['preference'] >= -1000]
3144 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3145
cfb56d1a 3146 self.to_screen(
169dbde9 3147 '[info] Available formats for %s:' % info_dict['id'])
3148 self.to_stdout(render_table(
bc97cdae 3149 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3150
3151 def list_thumbnails(self, info_dict):
b0249bca 3152 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3153 if not thumbnails:
b7b72db9 3154 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3155 return
cfb56d1a
PH
3156
3157 self.to_screen(
3158 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3159 self.to_stdout(render_table(
cfb56d1a
PH
3160 ['ID', 'width', 'height', 'URL'],
3161 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3162
360e1ca5 3163 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3164 if not subtitles:
360e1ca5 3165 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3166 return
a504ced0 3167 self.to_screen(
edab9dbf 3168 'Available %s for %s:' % (name, video_id))
2412044c 3169
3170 def _row(lang, formats):
49c258e1 3171 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3172 if len(set(names)) == 1:
7aee40c1 3173 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3174 return [lang, ', '.join(names), ', '.join(exts)]
3175
169dbde9 3176 self.to_stdout(render_table(
2412044c 3177 ['Language', 'Name', 'Formats'],
3178 [_row(lang, formats) for lang, formats in subtitles.items()],
3179 hideEmpty=True))
a504ced0 3180
dca08720
PH
3181 def urlopen(self, req):
3182 """ Start an HTTP download """
82d8a8b6 3183 if isinstance(req, compat_basestring):
67dda517 3184 req = sanitized_Request(req)
19a41fc6 3185 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3186
3187 def print_debug_header(self):
3188 if not self.params.get('verbose'):
3189 return
62fec3b2 3190
c6afed48
PH
3191 stdout_encoding = getattr(
3192 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3193 encoding_str = (
734f90bb
PH
3194 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3195 locale.getpreferredencoding(),
3196 sys.getfilesystemencoding(),
c6afed48 3197 stdout_encoding,
b0472057 3198 self.get_encoding()))
4192b51c 3199 write_string(encoding_str, encoding=None)
734f90bb 3200
4c88ff87 3201 source = detect_variant()
3202 self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
e0986e31 3203 if _LAZY_LOADER:
f74980cb 3204 self._write_string('[debug] Lazy loading extractors enabled\n')
3ae5e797 3205 if plugin_extractors or plugin_postprocessors:
3206 self._write_string('[debug] Plugins: %s\n' % [
3207 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3208 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3209 if self.params.get('compat_opts'):
3210 self._write_string(
3211 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3212 try:
3213 sp = subprocess.Popen(
3214 ['git', 'rev-parse', '--short', 'HEAD'],
3215 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3216 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3217 out, err = process_communicate_or_kill(sp)
dca08720
PH
3218 out = out.decode().strip()
3219 if re.match('[0-9a-f]+', out):
f74980cb 3220 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3221 except Exception:
dca08720
PH
3222 try:
3223 sys.exc_clear()
70a1165b 3224 except Exception:
dca08720 3225 pass
b300cda4
S
3226
3227 def python_implementation():
3228 impl_name = platform.python_implementation()
3229 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3230 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3231 return impl_name
3232
e5813e53 3233 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3234 platform.python_version(),
3235 python_implementation(),
3236 platform.architecture()[0],
b300cda4 3237 platform_name()))
d28b5171 3238
73fac4e9 3239 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3240 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3241 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3242 exe_str = ', '.join(
2831b468 3243 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3244 ) or 'none'
d28b5171 3245 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3246
2831b468 3247 from .downloader.websocket import has_websockets
3248 from .postprocessor.embedthumbnail import has_mutagen
3249 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3250
ad3dc496 3251 lib_str = ', '.join(sorted(filter(None, (
edf65256 3252 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3253 has_websockets and 'websockets',
3254 has_mutagen and 'mutagen',
3255 SQLITE_AVAILABLE and 'sqlite',
3256 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3257 )))) or 'none'
2831b468 3258 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3259
dca08720
PH
3260 proxy_map = {}
3261 for handler in self._opener.handlers:
3262 if hasattr(handler, 'proxies'):
3263 proxy_map.update(handler.proxies)
734f90bb 3264 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3265
58b1f00d
PH
3266 if self.params.get('call_home', False):
3267 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3268 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3269 return
58b1f00d
PH
3270 latest_version = self.urlopen(
3271 'https://yt-dl.org/latest/version').read().decode('utf-8')
3272 if version_tuple(latest_version) > version_tuple(__version__):
3273 self.report_warning(
3274 'You are using an outdated version (newest version: %s)! '
3275 'See https://yt-dl.org/update if you need help updating.' %
3276 latest_version)
3277
e344693b 3278 def _setup_opener(self):
6ad14cab 3279 timeout_val = self.params.get('socket_timeout')
19a41fc6 3280 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3281
982ee69a 3282 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3283 opts_cookiefile = self.params.get('cookiefile')
3284 opts_proxy = self.params.get('proxy')
3285
982ee69a 3286 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3287
6a3f4c3f 3288 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3289 if opts_proxy is not None:
3290 if opts_proxy == '':
3291 proxies = {}
3292 else:
3293 proxies = {'http': opts_proxy, 'https': opts_proxy}
3294 else:
3295 proxies = compat_urllib_request.getproxies()
067aa17e 3296 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3297 if 'http' in proxies and 'https' not in proxies:
3298 proxies['https'] = proxies['http']
91410c9b 3299 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3300
3301 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3302 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3303 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3304 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3305 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3306
3307 # When passing our own FileHandler instance, build_opener won't add the
3308 # default FileHandler and allows us to disable the file protocol, which
3309 # can be used for malicious purposes (see
067aa17e 3310 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3311 file_handler = compat_urllib_request.FileHandler()
3312
3313 def file_open(*args, **kwargs):
7a5c1cfe 3314 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3315 file_handler.file_open = file_open
3316
3317 opener = compat_urllib_request.build_opener(
fca6dba8 3318 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3319
dca08720
PH
3320 # Delete the default user-agent header, which would otherwise apply in
3321 # cases where our custom HTTP handler doesn't come into play
067aa17e 3322 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3323 opener.addheaders = []
3324 self._opener = opener
62fec3b2
PH
3325
3326 def encode(self, s):
3327 if isinstance(s, bytes):
3328 return s # Already encoded
3329
3330 try:
3331 return s.encode(self.get_encoding())
3332 except UnicodeEncodeError as err:
3333 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3334 raise
3335
3336 def get_encoding(self):
3337 encoding = self.params.get('encoding')
3338 if encoding is None:
3339 encoding = preferredencoding()
3340 return encoding
ec82d85a 3341
80c03fa9 3342 def _write_info_json(self, label, ie_result, infofn):
3343 ''' Write infojson and returns True = written, False = skip, None = error '''
3344 if not self.params.get('writeinfojson'):
3345 return False
3346 elif not infofn:
3347 self.write_debug(f'Skipping writing {label} infojson')
3348 return False
3349 elif not self._ensure_dir_exists(infofn):
3350 return None
3351 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3352 self.to_screen(f'[info] {label.title()} metadata is already present')
3353 else:
3354 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3355 try:
3356 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3357 except (OSError, IOError):
3358 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3359 return None
3360 return True
3361
3362 def _write_description(self, label, ie_result, descfn):
3363 ''' Write description and returns True = written, False = skip, None = error '''
3364 if not self.params.get('writedescription'):
3365 return False
3366 elif not descfn:
3367 self.write_debug(f'Skipping writing {label} description')
3368 return False
3369 elif not self._ensure_dir_exists(descfn):
3370 return None
3371 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3372 self.to_screen(f'[info] {label.title()} description is already present')
3373 elif ie_result.get('description') is None:
3374 self.report_warning(f'There\'s no {label} description to write')
3375 return False
3376 else:
3377 try:
3378 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3379 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3380 descfile.write(ie_result['description'])
3381 except (OSError, IOError):
3382 self.report_error(f'Cannot write {label} description file {descfn}')
3383 return None
3384 return True
3385
3386 def _write_subtitles(self, info_dict, filename):
3387 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3388 ret = []
3389 subtitles = info_dict.get('requested_subtitles')
3390 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3391 # subtitles download errors are already managed as troubles in relevant IE
3392 # that way it will silently go on when used with unsupporting IE
3393 return ret
3394
3395 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3396 if not sub_filename_base:
3397 self.to_screen('[info] Skipping writing video subtitles')
3398 return ret
3399 for sub_lang, sub_info in subtitles.items():
3400 sub_format = sub_info['ext']
3401 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3402 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3403 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3404 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3405 sub_info['filepath'] = sub_filename
3406 ret.append((sub_filename, sub_filename_final))
3407 continue
3408
3409 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3410 if sub_info.get('data') is not None:
3411 try:
3412 # Use newline='' to prevent conversion of newline characters
3413 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3414 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3415 subfile.write(sub_info['data'])
3416 sub_info['filepath'] = sub_filename
3417 ret.append((sub_filename, sub_filename_final))
3418 continue
3419 except (OSError, IOError):
3420 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3421 return None
3422
3423 try:
3424 sub_copy = sub_info.copy()
3425 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3426 self.dl(sub_filename, sub_copy, subtitle=True)
3427 sub_info['filepath'] = sub_filename
3428 ret.append((sub_filename, sub_filename_final))
3429 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3430 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3431 continue
3432 return ret
3433
3434 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3435 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3436 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3437 thumbnails, ret = [], []
6c4fd172 3438 if write_all or self.params.get('writethumbnail', False):
0202b52a 3439 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3440 multiple = write_all and len(thumbnails) > 1
ec82d85a 3441
80c03fa9 3442 if thumb_filename_base is None:
3443 thumb_filename_base = filename
3444 if thumbnails and not thumb_filename_base:
3445 self.write_debug(f'Skipping writing {label} thumbnail')
3446 return ret
3447
981052c9 3448 for t in thumbnails[::-1]:
80c03fa9 3449 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3450 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3451 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3452 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3453
80c03fa9 3454 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3455 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3456 t['filepath'] = thumb_filename
80c03fa9 3457 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
ec82d85a 3458 else:
80c03fa9 3459 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3460 try:
3461 uf = self.urlopen(t['url'])
80c03fa9 3462 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3463 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3464 shutil.copyfileobj(uf, thumbf)
80c03fa9 3465 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3466 t['filepath'] = thumb_filename
3158150c 3467 except network_exceptions as err:
80c03fa9 3468 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3469 if ret and not write_all:
3470 break
0202b52a 3471 return ret