]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[embedsubtitle] Fix error when duration is unknown
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
961ea474
S
30from string import ascii_letters
31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
edf65256 38 compat_pycrypto_AES,
7d1eb38a 39 compat_shlex_quote,
ce02ed60 40 compat_str,
67134eab 41 compat_tokenize_tokenize,
ce02ed60
PH
42 compat_urllib_error,
43 compat_urllib_request,
8b172c2e 44 compat_urllib_request_DataHandler,
8c25f81b 45)
982ee69a 46from .cookies import load_cookies
8c25f81b 47from .utils import (
eedb7ba5
S
48 age_restricted,
49 args_to_str,
ce02ed60
PH
50 ContentTooShortError,
51 date_from_str,
52 DateRange,
acd69589 53 DEFAULT_OUTTMPL,
ce02ed60 54 determine_ext,
b5559424 55 determine_protocol,
732044af 56 DOT_DESKTOP_LINK_TEMPLATE,
57 DOT_URL_LINK_TEMPLATE,
58 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 59 DownloadError,
c0384f22 60 encode_compat_str,
ce02ed60 61 encodeFilename,
498f5606 62 EntryNotInPlaylist,
a06916d9 63 error_to_compat_str,
8b0d7497 64 ExistingVideoReached,
590bc6f6 65 expand_path,
ce02ed60 66 ExtractorError,
e29663c6 67 float_or_none,
02dbf93f 68 format_bytes,
76d321f6 69 format_field,
901130bb 70 STR_FORMAT_RE_TMPL,
71 STR_FORMAT_TYPES,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
56a8fb4f 78 LazyList,
ce02ed60 79 locked_file,
0202b52a 80 make_dir,
dca08720 81 make_HTTPS_handler,
ce02ed60 82 MaxDownloadsReached,
3158150c 83 network_exceptions,
cd6fc19e 84 orderedSet,
a06916d9 85 OUTTMPL_TYPES,
b7ab0590 86 PagedList,
083c9df9 87 parse_filesize,
91410c9b 88 PerRequestProxyHandler,
dca08720 89 platform_name,
eedb7ba5 90 PostProcessingError,
ce02ed60 91 preferredencoding,
eedb7ba5 92 prepend_extension,
a06916d9 93 process_communicate_or_kill,
51fb4995 94 register_socks_protocols,
a06916d9 95 RejectedVideoReached,
cfb56d1a 96 render_table,
eedb7ba5 97 replace_extension,
ce02ed60
PH
98 SameFileError,
99 sanitize_filename,
1bb5c511 100 sanitize_path,
dcf77cf1 101 sanitize_url,
67dda517 102 sanitized_Request,
e5660ee6 103 std_headers,
1211bb6d 104 str_or_none,
e29663c6 105 strftime_or_none,
ce02ed60 106 subtitles_filename,
51d9739f 107 ThrottledDownload,
732044af 108 to_high_limit_path,
324ad820 109 traverse_obj,
6033d980 110 try_get,
ce02ed60 111 UnavailableVideoError,
29eb5174 112 url_basename,
7d1eb38a 113 variadic,
58b1f00d 114 version_tuple,
ce02ed60
PH
115 write_json_file,
116 write_string,
6a3f4c3f 117 YoutubeDLCookieProcessor,
dca08720 118 YoutubeDLHandler,
fca6dba8 119 YoutubeDLRedirectHandler,
ce02ed60 120)
a0e07d31 121from .cache import Cache
52a8a1e1 122from .extractor import (
123 gen_extractor_classes,
124 get_info_extractor,
125 _LAZY_LOADER,
126 _PLUGIN_CLASSES
127)
4c54b89e 128from .extractor.openload import PhantomJSwrapper
52a8a1e1 129from .downloader import (
dbf5416a 130 FFmpegFD,
52a8a1e1 131 get_suitable_downloader,
132 shorten_protocol_name
133)
4c83c967 134from .downloader.rtmp import rtmpdump_version
4f026faf 135from .postprocessor import (
e36d50c5 136 get_postprocessor,
137 FFmpegFixupDurationPP,
f17f8651 138 FFmpegFixupM3u8PP,
62cd676c 139 FFmpegFixupM4aPP,
6271f1ca 140 FFmpegFixupStretchedPP,
e36d50c5 141 FFmpegFixupTimestampPP,
4f026faf
PH
142 FFmpegMergerPP,
143 FFmpegPostProcessor,
0202b52a 144 MoveFilesAfterDownloadPP,
4f026faf 145)
4c88ff87 146from .update import detect_variant
dca08720 147from .version import __version__
8222d8de 148
e9c0cdd3
YCH
149if compat_os_name == 'nt':
150 import ctypes
151
2459b6e1 152
8222d8de
JMF
153class YoutubeDL(object):
154 """YoutubeDL class.
155
156 YoutubeDL objects are the ones responsible of downloading the
157 actual video file and writing it to disk if the user has requested
158 it, among some other tasks. In most cases there should be one per
159 program. As, given a video URL, the downloader doesn't know how to
160 extract all the needed information, task that InfoExtractors do, it
161 has to pass the URL to one of them.
162
163 For this, YoutubeDL objects have a method that allows
164 InfoExtractors to be registered in a given order. When it is passed
165 a URL, the YoutubeDL object handles it to the first InfoExtractor it
166 finds that reports being able to handle it. The InfoExtractor extracts
167 all the information about the video or videos the URL refers to, and
168 YoutubeDL process the extracted information, possibly using a File
169 Downloader to download the video.
170
171 YoutubeDL objects accept a lot of parameters. In order not to saturate
172 the object constructor with arguments, it receives a dictionary of
173 options instead. These options are available through the params
174 attribute for the InfoExtractors to use. The YoutubeDL also
175 registers itself as the downloader in charge for the InfoExtractors
176 that are added to it, so this is a "mutual registration".
177
178 Available options:
179
180 username: Username for authentication purposes.
181 password: Password for authentication purposes.
180940e0 182 videopassword: Password for accessing a video.
1da50aa3
S
183 ap_mso: Adobe Pass multiple-system operator identifier.
184 ap_username: Multiple-system operator account username.
185 ap_password: Multiple-system operator account password.
8222d8de
JMF
186 usenetrc: Use netrc for authentication instead.
187 verbose: Print additional info to stdout.
188 quiet: Do not print messages to stdout.
ad8915b7 189 no_warnings: Do not print out anything for warnings.
53c18592 190 forceprint: A list of templates to force print
191 forceurl: Force printing final URL. (Deprecated)
192 forcetitle: Force printing title. (Deprecated)
193 forceid: Force printing ID. (Deprecated)
194 forcethumbnail: Force printing thumbnail URL. (Deprecated)
195 forcedescription: Force printing description. (Deprecated)
196 forcefilename: Force printing final filename. (Deprecated)
197 forceduration: Force printing duration. (Deprecated)
8694c600 198 forcejson: Force printing info_dict as JSON.
63e0be34
PH
199 dump_single_json: Force printing the info_dict of the whole playlist
200 (or video) as a single JSON line.
c25228e5 201 force_write_download_archive: Force writing download archive regardless
202 of 'skip_download' or 'simulate'.
b7b04c78 203 simulate: Do not download the video files. If unset (or None),
204 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 205 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 206 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 207 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
208 extracting metadata even if the video is not actually
209 available for download (experimental)
c25228e5 210 format_sort: How to sort the video formats. see "Sorting Formats"
211 for more details.
212 format_sort_force: Force the given format_sort. see "Sorting Formats"
213 for more details.
214 allow_multiple_video_streams: Allow multiple video streams to be merged
215 into a single file
216 allow_multiple_audio_streams: Allow multiple audio streams to be merged
217 into a single file
0ba692ac 218 check_formats Whether to test if the formats are downloadable.
219 Can be True (check all), False (check none)
220 or None (check only if requested by extractor)
4524baf0 221 paths: Dictionary of output paths. The allowed keys are 'home'
222 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 223 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 224 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 225 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
226 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
227 restrictfilenames: Do not allow "&" and spaces in file names
228 trim_file_name: Limit length of filename (extension excluded)
4524baf0 229 windowsfilenames: Force the filenames to be windows compatible
b1940459 230 ignoreerrors: Do not stop on download/postprocessing errors.
231 Can be 'only_download' to ignore only download errors.
232 Default is 'only_download' for CLI, but False for API
26e2805c 233 skip_playlist_after_errors: Number of allowed failures until the rest of
234 the playlist is skipped
d22dec74 235 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 236 overwrites: Overwrite all video and metadata files if True,
237 overwrite only non-video files if None
238 and don't overwrite any file if False
34488702 239 For compatibility with youtube-dl,
240 "nooverwrites" may also be used instead
8222d8de
JMF
241 playliststart: Playlist item to start at.
242 playlistend: Playlist item to end at.
c14e88f0 243 playlist_items: Specific indices of playlist to download.
ff815fe6 244 playlistreverse: Download playlist items in reverse order.
75822ca7 245 playlistrandom: Download playlist items in random order.
8222d8de
JMF
246 matchtitle: Download only matching titles.
247 rejecttitle: Reject downloads for matching titles.
8bf9319e 248 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
249 logtostderr: Log messages to stderr instead of stdout.
250 writedescription: Write the video description to a .description file
251 writeinfojson: Write the video description to a .info.json file
75d43ca0 252 clean_infojson: Remove private fields from the infojson
34488702 253 getcomments: Extract video comments. This will not be written to disk
06167fbb 254 unless writeinfojson is also given
1fb07d10 255 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 256 writethumbnail: Write the thumbnail image to a file
c25228e5 257 allow_playlist_files: Whether to write playlists' description, infojson etc
258 also to disk when using the 'write*' options
ec82d85a 259 write_all_thumbnails: Write all thumbnail formats to files
732044af 260 writelink: Write an internet shortcut file, depending on the
261 current platform (.url/.webloc/.desktop)
262 writeurllink: Write a Windows internet shortcut file (.url)
263 writewebloclink: Write a macOS internet shortcut file (.webloc)
264 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 265 writesubtitles: Write the video subtitles to a file
741dd8ea 266 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 267 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 268 Downloads all the subtitles of the video
0b7f3118 269 (requires writesubtitles or writeautomaticsub)
8222d8de 270 listsubtitles: Lists all available subtitles for the video
a504ced0 271 subtitlesformat: The format code for subtitles
c32b0aab 272 subtitleslangs: List of languages of the subtitles to download (can be regex).
273 The list may contain "all" to refer to all the available
274 subtitles. The language can be prefixed with a "-" to
275 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
276 keepvideo: Keep the video file after post-processing
277 daterange: A DateRange object, download only if the upload_date is in the range.
278 skip_download: Skip the actual download of the video file
c35f9e72 279 cachedir: Location of the cache files in the filesystem.
a0e07d31 280 False to disable filesystem cache.
47192f92 281 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
282 age_limit: An integer representing the user's age in years.
283 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
284 min_views: An integer representing the minimum view count the video
285 must have in order to not be skipped.
286 Videos without view count information are always
287 downloaded. None for no limit.
288 max_views: An integer representing the maximum view count.
289 Videos that are more popular than that are not
290 downloaded.
291 Videos without view count information are always
292 downloaded. None for no limit.
293 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
294 Videos already present in the file are not downloaded
295 again.
8a51f564 296 break_on_existing: Stop the download process after attempting to download a
297 file that is in the archive.
298 break_on_reject: Stop the download process when encountering a video that
299 has been filtered out.
300 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
301 cookiesfrombrowser: A tuple containing the name of the browser and the profile
302 name/path from where cookies are loaded.
303 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 304 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
305 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
306 At the moment, this is only supported by YouTube.
a1ee09e8 307 proxy: URL of the proxy server to use
38cce791 308 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 309 on geo-restricted sites.
e344693b 310 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
311 bidi_workaround: Work around buggy terminals without bidirectional text
312 support, using fridibi
a0ddb8a2 313 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 314 include_ads: Download ads as well
04b4d394
PH
315 default_search: Prepend this string if an input url is not valid.
316 'auto' for elaborate guessing
62fec3b2 317 encoding: Use this encoding instead of the system-specified.
e8ee972c 318 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
319 Pass in 'in_playlist' to only show this behavior for
320 playlist items.
4f026faf 321 postprocessors: A list of dictionaries, each with an entry
71b640cc 322 * key: The name of the postprocessor. See
7a5c1cfe 323 yt_dlp/postprocessor/__init__.py for a list.
56d868db 324 * when: When to run the postprocessor. Can be one of
325 pre_process|before_dl|post_process|after_move.
326 Assumed to be 'post_process' if not given
ab8e5e51
AM
327 post_hooks: A list of functions that get called as the final step
328 for each video file, after all postprocessors have been
329 called. The filename will be passed as the only argument.
71b640cc
PH
330 progress_hooks: A list of functions that get called on download
331 progress, with a dictionary with the entries
5cda4eda 332 * status: One of "downloading", "error", or "finished".
ee69b99a 333 Check this first and ignore unknown values.
3ba7740d 334 * info_dict: The extracted info_dict
71b640cc 335
5cda4eda 336 If status is one of "downloading", or "finished", the
ee69b99a
PH
337 following properties may also be present:
338 * filename: The final filename (always present)
5cda4eda 339 * tmpfilename: The filename we're currently writing to
71b640cc
PH
340 * downloaded_bytes: Bytes on disk
341 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
342 * total_bytes_estimate: Guess of the eventual file size,
343 None if unavailable.
344 * elapsed: The number of seconds since download started.
71b640cc
PH
345 * eta: The estimated time in seconds, None if unknown
346 * speed: The download speed in bytes/second, None if
347 unknown
5cda4eda
PH
348 * fragment_index: The counter of the currently
349 downloaded video fragment.
350 * fragment_count: The number of fragments (= individual
351 files that will be merged)
71b640cc
PH
352
353 Progress hooks are guaranteed to be called at least once
354 (with status "finished") if the download is successful.
45598f15 355 merge_output_format: Extension to use when merging formats.
6b591b29 356 final_ext: Expected final extension; used to detect when the file was
357 already downloaded and converted. "merge_output_format" is
358 replaced by this extension when given
6271f1ca
PH
359 fixup: Automatically correct known faults of the file.
360 One of:
361 - "never": do nothing
362 - "warn": only emit a warning
363 - "detect_or_warn": check whether we can do anything
62cd676c 364 about it, warn otherwise (default)
504f20dd 365 source_address: Client-side IP address to bind to.
6ec6cb4e 366 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 367 yt-dlp servers for debugging. (BROKEN)
1cf376f5 368 sleep_interval_requests: Number of seconds to sleep between requests
369 during extraction
7aa589a5
S
370 sleep_interval: Number of seconds to sleep before each download when
371 used alone or a lower bound of a range for randomized
372 sleep before each download (minimum possible number
373 of seconds to sleep) when used along with
374 max_sleep_interval.
375 max_sleep_interval:Upper bound of a range for randomized sleep before each
376 download (maximum possible number of seconds to sleep).
377 Must only be used along with sleep_interval.
378 Actual sleep time will be a random float from range
379 [sleep_interval; max_sleep_interval].
1cf376f5 380 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
381 listformats: Print an overview of available video formats and exit.
382 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
383 match_filter: A function that gets called with the info_dict of
384 every video.
385 If it returns a message, the video is ignored.
386 If it returns None, the video is downloaded.
387 match_filter_func in utils.py is one example for this.
7e5db8c9 388 no_color: Do not emit color codes in output.
0a840f58 389 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 390 HTTP header
0a840f58 391 geo_bypass_country:
773f291d
S
392 Two-letter ISO 3166-2 country code that will be used for
393 explicit geographic restriction bypassing via faking
504f20dd 394 X-Forwarded-For HTTP header
5f95927a
S
395 geo_bypass_ip_block:
396 IP range in CIDR notation that will be used similarly to
504f20dd 397 geo_bypass_country
71b640cc 398
85729c51 399 The following options determine which downloader is picked:
52a8a1e1 400 external_downloader: A dictionary of protocol keys and the executable of the
401 external downloader to use for it. The allowed protocols
402 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
403 Set the value to 'native' to use the native downloader
404 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
405 or {'m3u8': 'ffmpeg'} instead.
406 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
407 if True, otherwise use ffmpeg/avconv if False, otherwise
408 use downloader suggested by extractor if None.
53ed7066 409 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 410 The following options do not work when used through the API:
411 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 412 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 413 Refer __init__.py for their implementation
fe7e0c98 414
8222d8de 415 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 416 the downloader (see yt_dlp/downloader/common.py):
51d9739f 417 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
418 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
419 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
420
421 The following options are used by the post processors:
d4a24f40 422 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 423 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
424 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
425 to the binary or its containing directory.
43820c03 426 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 427 and a list of additional command-line arguments for the
428 postprocessor/executable. The dict can also have "PP+EXE" keys
429 which are used when the given exe is used by the given PP.
430 Use 'default' as the name for arguments to passed to all PP
431 For compatibility with youtube-dl, a single list of args
432 can also be used
e409895f 433
434 The following options are used by the extractors:
62bff2c1 435 extractor_retries: Number of times to retry for known errors
436 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 437 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 438 discontinuities such as ad breaks (default: False)
5d3a0e79 439 extractor_args: A dictionary of arguments to be passed to the extractors.
440 See "EXTRACTOR ARGUMENTS" for details.
441 Eg: {'youtube': {'skip': ['dash', 'hls']}}
442 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
443 If True (default), DASH manifests and related
62bff2c1 444 data will be downloaded and processed by extractor.
445 You can reduce network I/O by disabling it if you don't
446 care about DASH. (only for youtube)
5d3a0e79 447 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
448 If True (default), HLS manifests and related
62bff2c1 449 data will be downloaded and processed by extractor.
450 You can reduce network I/O by disabling it if you don't
451 care about HLS. (only for youtube)
8222d8de
JMF
452 """
453
c9969434
S
454 _NUMERIC_FIELDS = set((
455 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
456 'timestamp', 'upload_year', 'upload_month', 'upload_day',
457 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
458 'average_rating', 'comment_count', 'age_limit',
459 'start_time', 'end_time',
460 'chapter_number', 'season_number', 'episode_number',
461 'track_number', 'disc_number', 'release_year',
462 'playlist_index',
463 ))
464
8222d8de 465 params = None
8b7491c8 466 _ies = {}
56d868db 467 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 468 _printed_messages = set()
1cf376f5 469 _first_webpage_request = True
8222d8de
JMF
470 _download_retcode = None
471 _num_downloads = None
30a074c2 472 _playlist_level = 0
473 _playlist_urls = set()
8222d8de
JMF
474 _screen_file = None
475
3511266b 476 def __init__(self, params=None, auto_init=True):
8222d8de 477 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
478 if params is None:
479 params = {}
8b7491c8 480 self._ies = {}
56c73665 481 self._ies_instances = {}
56d868db 482 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 483 self._printed_messages = set()
1cf376f5 484 self._first_webpage_request = True
ab8e5e51 485 self._post_hooks = []
933605d7 486 self._progress_hooks = []
8222d8de
JMF
487 self._download_retcode = 0
488 self._num_downloads = 0
489 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 490 self._err_file = sys.stderr
4abf617b
S
491 self.params = {
492 # Default parameters
493 'nocheckcertificate': False,
494 }
495 self.params.update(params)
a0e07d31 496 self.cache = Cache(self)
34308b30 497
a61f4b28 498 if sys.version_info < (3, 6):
499 self.report_warning(
0181adef 500 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 501
88acdbc2 502 if self.params.get('allow_unplayable_formats'):
503 self.report_warning(
504 'You have asked for unplayable formats to be listed/downloaded. '
505 'This is a developer option intended for debugging. '
506 'If you experience any issues while using this option, DO NOT open a bug report')
507
be5df5ee
S
508 def check_deprecated(param, option, suggestion):
509 if self.params.get(param) is not None:
53ed7066 510 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
511 return True
512 return False
513
514 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
515 if self.params.get('geo_verification_proxy') is None:
516 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
517
0d1bb027 518 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
519 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 520 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 521
522 for msg in self.params.get('warnings', []):
523 self.report_warning(msg)
524
b868936c 525 if self.params.get('overwrites') is None:
526 self.params.pop('overwrites', None)
527 elif self.params.get('nooverwrites') is not None:
528 # nooverwrites was unnecessarily changed to overwrites
529 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
530 # This ensures compatibility with both keys
531 self.params['overwrites'] = not self.params['nooverwrites']
532 else:
533 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 534
0783b09b 535 if params.get('bidi_workaround', False):
1c088fa8
PH
536 try:
537 import pty
538 master, slave = pty.openpty()
003c69a8 539 width = compat_get_terminal_size().columns
1c088fa8
PH
540 if width is None:
541 width_args = []
542 else:
543 width_args = ['-w', str(width)]
5d681e96 544 sp_kwargs = dict(
1c088fa8
PH
545 stdin=subprocess.PIPE,
546 stdout=slave,
547 stderr=self._err_file)
5d681e96
PH
548 try:
549 self._output_process = subprocess.Popen(
550 ['bidiv'] + width_args, **sp_kwargs
551 )
552 except OSError:
5d681e96
PH
553 self._output_process = subprocess.Popen(
554 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
555 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 556 except OSError as ose:
66e7ace1 557 if ose.errno == errno.ENOENT:
6febd1c1 558 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
559 else:
560 raise
0783b09b 561
3089bc74
S
562 if (sys.platform != 'win32'
563 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
564 and not params.get('restrictfilenames', False)):
e9137224 565 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 566 self.report_warning(
6febd1c1 567 'Assuming --restrict-filenames since file system encoding '
1b725173 568 'cannot encode all characters. '
6febd1c1 569 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 570 self.params['restrictfilenames'] = True
34308b30 571
de6000d9 572 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 573
187986a8 574 # Creating format selector here allows us to catch syntax errors before the extraction
575 self.format_selector = (
576 None if self.params.get('format') is None
577 else self.build_format_selector(self.params['format']))
578
dca08720
PH
579 self._setup_opener()
580
4cd0a709 581 """Preload the archive, if any is specified"""
582 def preload_download_archive(fn):
583 if fn is None:
584 return False
0760b0a7 585 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 586 try:
587 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
588 for line in archive_file:
589 self.archive.add(line.strip())
590 except IOError as ioe:
591 if ioe.errno != errno.ENOENT:
592 raise
593 return False
594 return True
595
596 self.archive = set()
597 preload_download_archive(self.params.get('download_archive'))
598
3511266b
PH
599 if auto_init:
600 self.print_debug_header()
601 self.add_default_info_extractors()
602
4f026faf 603 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 604 pp_def = dict(pp_def_raw)
fd7cfb64 605 when = pp_def.pop('when', 'post_process')
606 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 607 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 608 self.add_post_processor(pp, when=when)
4f026faf 609
ab8e5e51
AM
610 for ph in self.params.get('post_hooks', []):
611 self.add_post_hook(ph)
612
71b640cc
PH
613 for ph in self.params.get('progress_hooks', []):
614 self.add_progress_hook(ph)
615
51fb4995
YCH
616 register_socks_protocols()
617
7d4111ed
PH
618 def warn_if_short_id(self, argv):
619 # short YouTube ID starting with dash?
620 idxs = [
621 i for i, a in enumerate(argv)
622 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
623 if idxs:
624 correct_argv = (
7a5c1cfe 625 ['yt-dlp']
3089bc74
S
626 + [a for i, a in enumerate(argv) if i not in idxs]
627 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
628 )
629 self.report_warning(
630 'Long argument string detected. '
631 'Use -- to separate parameters and URLs, like this:\n%s\n' %
632 args_to_str(correct_argv))
633
8222d8de
JMF
634 def add_info_extractor(self, ie):
635 """Add an InfoExtractor object to the end of the list."""
8b7491c8 636 ie_key = ie.ie_key()
637 self._ies[ie_key] = ie
e52d7f85 638 if not isinstance(ie, type):
8b7491c8 639 self._ies_instances[ie_key] = ie
e52d7f85 640 ie.set_downloader(self)
8222d8de 641
8b7491c8 642 def _get_info_extractor_class(self, ie_key):
643 ie = self._ies.get(ie_key)
644 if ie is None:
645 ie = get_info_extractor(ie_key)
646 self.add_info_extractor(ie)
647 return ie
648
56c73665
JMF
649 def get_info_extractor(self, ie_key):
650 """
651 Get an instance of an IE with name ie_key, it will try to get one from
652 the _ies list, if there's no instance it will create a new one and add
653 it to the extractor list.
654 """
655 ie = self._ies_instances.get(ie_key)
656 if ie is None:
657 ie = get_info_extractor(ie_key)()
658 self.add_info_extractor(ie)
659 return ie
660
023fa8c4
JMF
661 def add_default_info_extractors(self):
662 """
663 Add the InfoExtractors returned by gen_extractors to the end of the list
664 """
e52d7f85 665 for ie in gen_extractor_classes():
023fa8c4
JMF
666 self.add_info_extractor(ie)
667
56d868db 668 def add_post_processor(self, pp, when='post_process'):
8222d8de 669 """Add a PostProcessor object to the end of the chain."""
5bfa4862 670 self._pps[when].append(pp)
8222d8de
JMF
671 pp.set_downloader(self)
672
ab8e5e51
AM
673 def add_post_hook(self, ph):
674 """Add the post hook"""
675 self._post_hooks.append(ph)
676
933605d7
JMF
677 def add_progress_hook(self, ph):
678 """Add the progress hook (currently only for the file downloader)"""
679 self._progress_hooks.append(ph)
8ab470f1 680
1c088fa8 681 def _bidi_workaround(self, message):
5d681e96 682 if not hasattr(self, '_output_channel'):
1c088fa8
PH
683 return message
684
5d681e96 685 assert hasattr(self, '_output_process')
11b85ce6 686 assert isinstance(message, compat_str)
6febd1c1
PH
687 line_count = message.count('\n') + 1
688 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 689 self._output_process.stdin.flush()
6febd1c1 690 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 691 for _ in range(line_count))
6febd1c1 692 return res[:-len('\n')]
1c088fa8 693
b35496d8 694 def _write_string(self, message, out=None, only_once=False):
695 if only_once:
696 if message in self._printed_messages:
697 return
698 self._printed_messages.add(message)
699 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 700
848887eb 701 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 702 """Print message to stdout"""
8bf9319e 703 if self.params.get('logger'):
43afe285 704 self.params['logger'].debug(message)
835a1478 705 elif not quiet or self.params.get('verbose'):
706 self._write_string(
707 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
708 self._err_file if quiet else self._screen_file)
8222d8de 709
b35496d8 710 def to_stderr(self, message, only_once=False):
0760b0a7 711 """Print message to stderr"""
11b85ce6 712 assert isinstance(message, compat_str)
8bf9319e 713 if self.params.get('logger'):
43afe285
IB
714 self.params['logger'].error(message)
715 else:
b35496d8 716 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 717
1e5b9a95
PH
718 def to_console_title(self, message):
719 if not self.params.get('consoletitle', False):
720 return
4bede0d8
C
721 if compat_os_name == 'nt':
722 if ctypes.windll.kernel32.GetConsoleWindow():
723 # c_wchar_p() might not be necessary if `message` is
724 # already of type unicode()
725 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 726 elif 'TERM' in os.environ:
b46696bd 727 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 728
bdde425c
PH
729 def save_console_title(self):
730 if not self.params.get('consoletitle', False):
731 return
b7b04c78 732 if self.params.get('simulate'):
94c3442e 733 return
4bede0d8 734 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 735 # Save the title on stack
734f90bb 736 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
737
738 def restore_console_title(self):
739 if not self.params.get('consoletitle', False):
740 return
b7b04c78 741 if self.params.get('simulate'):
94c3442e 742 return
4bede0d8 743 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 744 # Restore the title from stack
734f90bb 745 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
746
747 def __enter__(self):
748 self.save_console_title()
749 return self
750
751 def __exit__(self, *args):
752 self.restore_console_title()
f89197d7 753
dca08720 754 if self.params.get('cookiefile') is not None:
1bab3437 755 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 756
8222d8de
JMF
757 def trouble(self, message=None, tb=None):
758 """Determine action to take when a download problem appears.
759
760 Depending on if the downloader has been configured to ignore
761 download errors or not, this method may throw an exception or
762 not when errors are found, after printing the message.
763
764 tb, if given, is additional traceback information.
765 """
766 if message is not None:
767 self.to_stderr(message)
768 if self.params.get('verbose'):
769 if tb is None:
770 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 771 tb = ''
8222d8de 772 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 773 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 774 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
775 else:
776 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 777 tb = ''.join(tb_data)
c19bc311 778 if tb:
779 self.to_stderr(tb)
b1940459 780 if not self.params.get('ignoreerrors'):
8222d8de
JMF
781 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
782 exc_info = sys.exc_info()[1].exc_info
783 else:
784 exc_info = sys.exc_info()
785 raise DownloadError(message, exc_info)
786 self._download_retcode = 1
787
0760b0a7 788 def to_screen(self, message, skip_eol=False):
789 """Print message to stdout if not in quiet mode"""
790 self.to_stdout(
791 message, skip_eol, quiet=self.params.get('quiet', False))
792
c84aeac6 793 def report_warning(self, message, only_once=False):
8222d8de
JMF
794 '''
795 Print the message to stderr, it will be prefixed with 'WARNING:'
796 If stderr is a tty file the 'WARNING:' will be colored
797 '''
6d07ce01
JMF
798 if self.params.get('logger') is not None:
799 self.params['logger'].warning(message)
8222d8de 800 else:
ad8915b7
PH
801 if self.params.get('no_warnings'):
802 return
e9c0cdd3 803 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
804 _msg_header = '\033[0;33mWARNING:\033[0m'
805 else:
806 _msg_header = 'WARNING:'
807 warning_message = '%s %s' % (_msg_header, message)
b35496d8 808 self.to_stderr(warning_message, only_once)
8222d8de
JMF
809
810 def report_error(self, message, tb=None):
811 '''
812 Do the same as trouble, but prefixes the message with 'ERROR:', colored
813 in red if stderr is a tty file.
814 '''
e9c0cdd3 815 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 816 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 817 else:
6febd1c1
PH
818 _msg_header = 'ERROR:'
819 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
820 self.trouble(error_message, tb)
821
b35496d8 822 def write_debug(self, message, only_once=False):
0760b0a7 823 '''Log debug message or Print message to stderr'''
824 if not self.params.get('verbose', False):
825 return
826 message = '[debug] %s' % message
827 if self.params.get('logger'):
828 self.params['logger'].debug(message)
829 else:
b35496d8 830 self.to_stderr(message, only_once)
0760b0a7 831
8222d8de
JMF
832 def report_file_already_downloaded(self, file_name):
833 """Report file has already been fully downloaded."""
834 try:
6febd1c1 835 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 836 except UnicodeEncodeError:
6febd1c1 837 self.to_screen('[download] The file has already been downloaded')
8222d8de 838
0c3d0f51 839 def report_file_delete(self, file_name):
840 """Report that existing file will be deleted."""
841 try:
c25228e5 842 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 843 except UnicodeEncodeError:
c25228e5 844 self.to_screen('Deleting existing file')
0c3d0f51 845
1151c407 846 def raise_no_formats(self, info, forced=False):
847 has_drm = info.get('__has_drm')
88acdbc2 848 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
849 expected = self.params.get('ignore_no_formats_error')
850 if forced or not expected:
1151c407 851 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
852 expected=has_drm or expected)
88acdbc2 853 else:
854 self.report_warning(msg)
855
de6000d9 856 def parse_outtmpl(self):
857 outtmpl_dict = self.params.get('outtmpl', {})
858 if not isinstance(outtmpl_dict, dict):
859 outtmpl_dict = {'default': outtmpl_dict}
860 outtmpl_dict.update({
861 k: v for k, v in DEFAULT_OUTTMPL.items()
862 if not outtmpl_dict.get(k)})
863 for key, val in outtmpl_dict.items():
864 if isinstance(val, bytes):
865 self.report_warning(
866 'Parameter outtmpl is bytes, but should be a unicode string. '
867 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
868 return outtmpl_dict
869
21cd8fae 870 def get_output_path(self, dir_type='', filename=None):
871 paths = self.params.get('paths', {})
872 assert isinstance(paths, dict)
873 path = os.path.join(
874 expand_path(paths.get('home', '').strip()),
875 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
876 filename or '')
877
878 # Temporary fix for #4787
879 # 'Treat' all problem characters by passing filename through preferredencoding
880 # to workaround encoding issues with subprocess on python2 @ Windows
881 if sys.version_info < (3, 0) and sys.platform == 'win32':
882 path = encodeFilename(path, True).decode(preferredencoding())
883 return sanitize_path(path, force=self.params.get('windowsfilenames'))
884
76a264ac 885 @staticmethod
901130bb 886 def _outtmpl_expandpath(outtmpl):
887 # expand_path translates '%%' into '%' and '$$' into '$'
888 # correspondingly that is not what we want since we need to keep
889 # '%%' intact for template dict substitution step. Working around
890 # with boundary-alike separator hack.
891 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
892 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
893
894 # outtmpl should be expand_path'ed before template dict substitution
895 # because meta fields may contain env variables we don't want to
896 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
897 # title "Hello $PATH", we don't want `$PATH` to be expanded.
898 return expand_path(outtmpl).replace(sep, '')
899
900 @staticmethod
901 def escape_outtmpl(outtmpl):
902 ''' Escape any remaining strings like %s, %abc% etc. '''
903 return re.sub(
904 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
905 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
906 outtmpl)
907
908 @classmethod
909 def validate_outtmpl(cls, outtmpl):
76a264ac 910 ''' @return None or Exception object '''
7d1eb38a 911 outtmpl = re.sub(
524e2e4f 912 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 913 lambda mobj: f'{mobj.group(0)[:-1]}s',
914 cls._outtmpl_expandpath(outtmpl))
76a264ac 915 try:
7d1eb38a 916 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 917 return None
918 except ValueError as err:
919 return err
920
143db31d 921 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 922 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 923 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 924
6e84b215 925 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
926 for key in ('__original_infodict', '__postprocessors'):
927 info_dict.pop(key, None)
752cda38 928 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 929 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 930 if info_dict.get('duration', None) is not None
931 else None)
752cda38 932 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
933 if info_dict.get('resolution') is None:
934 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 935
143db31d 936 # For fields playlist_index and autonumber convert all occurrences
937 # of %(field)s to %(field)0Nd for backward compatibility
938 field_size_compat_map = {
752cda38 939 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
940 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 941 }
752cda38 942
385a27fa 943 TMPL_DICT = {}
524e2e4f 944 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 945 MATH_FUNCTIONS = {
946 '+': float.__add__,
947 '-': float.__sub__,
948 }
e625be0d 949 # Field is of the form key1.key2...
950 # where keys (except first) can be string, int or slice
2b8a2973 951 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 952 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
953 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 954 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
955 (?P<negate>-)?
385a27fa 956 (?P<fields>{field})
957 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 958 (?:>(?P<strf_format>.+?))?
7c37ff97 959 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 960 (?:\|(?P<default>.*?))?
385a27fa 961 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 962
2b8a2973 963 def _traverse_infodict(k):
964 k = k.split('.')
965 if k[0] == '':
966 k.pop(0)
967 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 968
752cda38 969 def get_value(mdict):
970 # Object traversal
2b8a2973 971 value = _traverse_infodict(mdict['fields'])
752cda38 972 # Negative
973 if mdict['negate']:
974 value = float_or_none(value)
975 if value is not None:
976 value *= -1
977 # Do maths
385a27fa 978 offset_key = mdict['maths']
979 if offset_key:
752cda38 980 value = float_or_none(value)
981 operator = None
385a27fa 982 while offset_key:
983 item = re.match(
984 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
985 offset_key).group(0)
986 offset_key = offset_key[len(item):]
987 if operator is None:
752cda38 988 operator = MATH_FUNCTIONS[item]
385a27fa 989 continue
990 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
991 offset = float_or_none(item)
992 if offset is None:
2b8a2973 993 offset = float_or_none(_traverse_infodict(item))
385a27fa 994 try:
995 value = operator(value, multiplier * offset)
996 except (TypeError, ZeroDivisionError):
997 return None
998 operator = None
752cda38 999 # Datetime formatting
1000 if mdict['strf_format']:
7c37ff97 1001 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1002
1003 return value
1004
b868936c 1005 na = self.params.get('outtmpl_na_placeholder', 'NA')
1006
6e84b215 1007 def _dumpjson_default(obj):
1008 if isinstance(obj, (set, LazyList)):
1009 return list(obj)
1010 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1011
752cda38 1012 def create_key(outer_mobj):
1013 if not outer_mobj.group('has_key'):
901130bb 1014 return f'%{outer_mobj.group(0)}'
752cda38 1015 key = outer_mobj.group('key')
752cda38 1016 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1017 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1018 value, default = None, na
1019 while mobj:
e625be0d 1020 mobj = mobj.groupdict()
7c37ff97 1021 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1022 value = get_value(mobj)
7c37ff97 1023 if value is None and mobj['alternate']:
1024 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1025 else:
1026 break
752cda38 1027
b868936c 1028 fmt = outer_mobj.group('format')
752cda38 1029 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1030 fmt = '0{:d}d'.format(field_size_compat_map[key])
1031
1032 value = default if value is None else value
752cda38 1033
7d1eb38a 1034 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1035 if fmt[-1] == 'l': # list
91dd88b9 1036 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1037 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1038 elif fmt[-1] == 'j': # json
6e84b215 1039 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1040 elif fmt[-1] == 'q': # quoted
7d1eb38a 1041 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1042 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1043 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1044 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1045 elif fmt[-1] == 'U': # unicode normalized
1046 opts = outer_mobj.group('conversion') or ''
1047 value, fmt = unicodedata.normalize(
1048 # "+" = compatibility equivalence, "#" = NFD
1049 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1050 value), str_fmt
7d1eb38a 1051 elif fmt[-1] == 'c':
524e2e4f 1052 if value:
1053 value = str(value)[0]
76a264ac 1054 else:
524e2e4f 1055 fmt = str_fmt
76a264ac 1056 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1057 value = float_or_none(value)
752cda38 1058 if value is None:
1059 value, fmt = default, 's'
901130bb 1060
752cda38 1061 if sanitize:
1062 if fmt[-1] == 'r':
1063 # If value is an object, sanitize might convert it to a string
1064 # So we convert it to repr first
7d1eb38a 1065 value, fmt = repr(value), str_fmt
639f1cea 1066 if fmt[-1] in 'csr':
7c37ff97 1067 value = sanitize(initial_field, value)
901130bb 1068
b868936c 1069 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1070 TMPL_DICT[key] = value
b868936c 1071 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1072
385a27fa 1073 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1074
de6000d9 1075 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1076 try:
586a91b6 1077 sanitize = lambda k, v: sanitize_filename(
45598aab 1078 compat_str(v),
1bb5c511 1079 restricted=self.params.get('restrictfilenames'),
40df485f 1080 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1081 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1082 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1083 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1084 filename = outtmpl % template_dict
15da37c7 1085
143db31d 1086 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1087 if force_ext is not None:
752cda38 1088 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1089
bdc3fd2f
U
1090 # https://github.com/blackjack4494/youtube-dlc/issues/85
1091 trim_file_name = self.params.get('trim_file_name', False)
1092 if trim_file_name:
1093 fn_groups = filename.rsplit('.')
1094 ext = fn_groups[-1]
1095 sub_ext = ''
1096 if len(fn_groups) > 2:
1097 sub_ext = fn_groups[-2]
1098 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1099
0202b52a 1100 return filename
8222d8de 1101 except ValueError as err:
6febd1c1 1102 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1103 return None
1104
de6000d9 1105 def prepare_filename(self, info_dict, dir_type='', warn=False):
1106 """Generate the output filename."""
21cd8fae 1107
de6000d9 1108 filename = self._prepare_filename(info_dict, dir_type or 'default')
1109
c84aeac6 1110 if warn:
21cd8fae 1111 if not self.params.get('paths'):
de6000d9 1112 pass
1113 elif filename == '-':
c84aeac6 1114 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1115 elif os.path.isabs(filename):
c84aeac6 1116 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1117 if filename == '-' or not filename:
1118 return filename
1119
21cd8fae 1120 return self.get_output_path(dir_type, filename)
0202b52a 1121
120fe513 1122 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1123 """ Returns None if the file should be downloaded """
8222d8de 1124
c77495e3 1125 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1126
8b0d7497 1127 def check_filter():
8b0d7497 1128 if 'title' in info_dict:
1129 # This can happen when we're just evaluating the playlist
1130 title = info_dict['title']
1131 matchtitle = self.params.get('matchtitle', False)
1132 if matchtitle:
1133 if not re.search(matchtitle, title, re.IGNORECASE):
1134 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1135 rejecttitle = self.params.get('rejecttitle', False)
1136 if rejecttitle:
1137 if re.search(rejecttitle, title, re.IGNORECASE):
1138 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1139 date = info_dict.get('upload_date')
1140 if date is not None:
1141 dateRange = self.params.get('daterange', DateRange())
1142 if date not in dateRange:
1143 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1144 view_count = info_dict.get('view_count')
1145 if view_count is not None:
1146 min_views = self.params.get('min_views')
1147 if min_views is not None and view_count < min_views:
1148 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1149 max_views = self.params.get('max_views')
1150 if max_views is not None and view_count > max_views:
1151 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1152 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1153 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1154
8f18aca8 1155 match_filter = self.params.get('match_filter')
1156 if match_filter is not None:
1157 try:
1158 ret = match_filter(info_dict, incomplete=incomplete)
1159 except TypeError:
1160 # For backward compatibility
1161 ret = None if incomplete else match_filter(info_dict)
1162 if ret is not None:
1163 return ret
8b0d7497 1164 return None
1165
c77495e3 1166 if self.in_download_archive(info_dict):
1167 reason = '%s has already been recorded in the archive' % video_title
1168 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1169 else:
1170 reason = check_filter()
1171 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1172 if reason is not None:
120fe513 1173 if not silent:
1174 self.to_screen('[download] ' + reason)
c77495e3 1175 if self.params.get(break_opt, False):
1176 raise break_err()
8b0d7497 1177 return reason
fe7e0c98 1178
b6c45014
JMF
1179 @staticmethod
1180 def add_extra_info(info_dict, extra_info):
1181 '''Set the keys from extra_info in info dict if they are missing'''
1182 for key, value in extra_info.items():
1183 info_dict.setdefault(key, value)
1184
409e1828 1185 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1186 process=True, force_generic_extractor=False):
41d1cca3 1187 """
1188 Return a list with a dictionary for each video extracted.
1189
1190 Arguments:
1191 url -- URL to extract
1192
1193 Keyword arguments:
1194 download -- whether to download videos during extraction
1195 ie_key -- extractor key hint
1196 extra_info -- dictionary containing the extra values to add to each result
1197 process -- whether to resolve all unresolved references (URLs, playlist items),
1198 must be True for download to work.
1199 force_generic_extractor -- force using the generic extractor
1200 """
fe7e0c98 1201
409e1828 1202 if extra_info is None:
1203 extra_info = {}
1204
61aa5ba3 1205 if not ie_key and force_generic_extractor:
d22dec74
S
1206 ie_key = 'Generic'
1207
8222d8de 1208 if ie_key:
8b7491c8 1209 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1210 else:
1211 ies = self._ies
1212
8b7491c8 1213 for ie_key, ie in ies.items():
8222d8de
JMF
1214 if not ie.suitable(url):
1215 continue
1216
1217 if not ie.working():
6febd1c1
PH
1218 self.report_warning('The program functionality for this site has been marked as broken, '
1219 'and will probably not work.')
8222d8de 1220
1151c407 1221 temp_id = ie.get_temp_id(url)
a0566bbf 1222 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1223 self.to_screen("[%s] %s: has already been recorded in archive" % (
1224 ie_key, temp_id))
1225 break
8b7491c8 1226 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1227 else:
1228 self.report_error('no suitable InfoExtractor for URL %s' % url)
1229
8e5fecc8 1230 def __handle_extraction_exceptions(func):
1231
a0566bbf 1232 def wrapper(self, *args, **kwargs):
1233 try:
1234 return func(self, *args, **kwargs)
773f291d
S
1235 except GeoRestrictedError as e:
1236 msg = e.msg
1237 if e.countries:
1238 msg += '\nThis video is available in %s.' % ', '.join(
1239 map(ISO3166Utils.short2full, e.countries))
1240 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1241 self.report_error(msg)
fb043a6e 1242 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1243 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1244 except ThrottledDownload:
1245 self.to_stderr('\r')
1246 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1247 return wrapper(self, *args, **kwargs)
8e5fecc8 1248 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1249 raise
8222d8de 1250 except Exception as e:
b1940459 1251 if self.params.get('ignoreerrors'):
9b9c5355 1252 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1253 else:
1254 raise
a0566bbf 1255 return wrapper
1256
1257 @__handle_extraction_exceptions
58f197b7 1258 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1259 ie_result = ie.extract(url)
1260 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1261 return
1262 if isinstance(ie_result, list):
1263 # Backwards compatibility: old IE result format
1264 ie_result = {
1265 '_type': 'compat_list',
1266 'entries': ie_result,
1267 }
e37d0efb 1268 if extra_info.get('original_url'):
1269 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1270 self.add_default_extra_info(ie_result, ie, url)
1271 if process:
1272 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1273 else:
a0566bbf 1274 return ie_result
fe7e0c98 1275
ea38e55f 1276 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1277 if url is not None:
1278 self.add_extra_info(ie_result, {
1279 'webpage_url': url,
1280 'original_url': url,
1281 'webpage_url_basename': url_basename(url),
1282 })
1283 if ie is not None:
1284 self.add_extra_info(ie_result, {
1285 'extractor': ie.IE_NAME,
1286 'extractor_key': ie.ie_key(),
1287 })
ea38e55f 1288
58adec46 1289 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1290 """
1291 Take the result of the ie(may be modified) and resolve all unresolved
1292 references (URLs, playlist items).
1293
1294 It will also download the videos if 'download'.
1295 Returns the resolved ie_result.
1296 """
58adec46 1297 if extra_info is None:
1298 extra_info = {}
e8ee972c
PH
1299 result_type = ie_result.get('_type', 'video')
1300
057a5206 1301 if result_type in ('url', 'url_transparent'):
134c6ea8 1302 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1303 if ie_result.get('original_url'):
1304 extra_info.setdefault('original_url', ie_result['original_url'])
1305
057a5206 1306 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1307 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1308 or extract_flat is True):
ecb54191 1309 info_copy = ie_result.copy()
6033d980 1310 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1311 if ie and not ie_result.get('id'):
4614bc22 1312 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1313 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1314 self.add_extra_info(info_copy, extra_info)
ecb54191 1315 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1316 if self.params.get('force_write_download_archive', False):
1317 self.record_download_archive(info_copy)
e8ee972c
PH
1318 return ie_result
1319
8222d8de 1320 if result_type == 'video':
b6c45014 1321 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1322 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1323 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1324 if additional_urls:
e9f4ccd1 1325 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1326 if isinstance(additional_urls, compat_str):
1327 additional_urls = [additional_urls]
1328 self.to_screen(
1329 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1330 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1331 ie_result['additional_entries'] = [
1332 self.extract_info(
1333 url, download, extra_info,
1334 force_generic_extractor=self.params.get('force_generic_extractor'))
1335 for url in additional_urls
1336 ]
1337 return ie_result
8222d8de
JMF
1338 elif result_type == 'url':
1339 # We have to add extra_info to the results because it may be
1340 # contained in a playlist
07cce701 1341 return self.extract_info(
1342 ie_result['url'], download,
1343 ie_key=ie_result.get('ie_key'),
1344 extra_info=extra_info)
7fc3fa05
PH
1345 elif result_type == 'url_transparent':
1346 # Use the information from the embedding page
1347 info = self.extract_info(
1348 ie_result['url'], ie_key=ie_result.get('ie_key'),
1349 extra_info=extra_info, download=False, process=False)
1350
1640eb09
S
1351 # extract_info may return None when ignoreerrors is enabled and
1352 # extraction failed with an error, don't crash and return early
1353 # in this case
1354 if not info:
1355 return info
1356
412c617d
PH
1357 force_properties = dict(
1358 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1359 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1360 if f in force_properties:
1361 del force_properties[f]
1362 new_result = info.copy()
1363 new_result.update(force_properties)
7fc3fa05 1364
0563f7ac
S
1365 # Extracted info may not be a video result (i.e.
1366 # info.get('_type', 'video') != video) but rather an url or
1367 # url_transparent. In such cases outer metadata (from ie_result)
1368 # should be propagated to inner one (info). For this to happen
1369 # _type of info should be overridden with url_transparent. This
067aa17e 1370 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1371 if new_result.get('_type') == 'url':
1372 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1373
1374 return self.process_ie_result(
1375 new_result, download=download, extra_info=extra_info)
40fcba5e 1376 elif result_type in ('playlist', 'multi_video'):
30a074c2 1377 # Protect from infinite recursion due to recursively nested playlists
1378 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1379 webpage_url = ie_result['webpage_url']
1380 if webpage_url in self._playlist_urls:
7e85e872 1381 self.to_screen(
30a074c2 1382 '[download] Skipping already downloaded playlist: %s'
1383 % ie_result.get('title') or ie_result.get('id'))
1384 return
7e85e872 1385
30a074c2 1386 self._playlist_level += 1
1387 self._playlist_urls.add(webpage_url)
bc516a3f 1388 self._sanitize_thumbnails(ie_result)
30a074c2 1389 try:
1390 return self.__process_playlist(ie_result, download)
1391 finally:
1392 self._playlist_level -= 1
1393 if not self._playlist_level:
1394 self._playlist_urls.clear()
8222d8de 1395 elif result_type == 'compat_list':
c9bf4114
PH
1396 self.report_warning(
1397 'Extractor %s returned a compat_list result. '
1398 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1399
8222d8de 1400 def _fixup(r):
b868936c 1401 self.add_extra_info(r, {
1402 'extractor': ie_result['extractor'],
1403 'webpage_url': ie_result['webpage_url'],
1404 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1405 'extractor_key': ie_result['extractor_key'],
1406 })
8222d8de
JMF
1407 return r
1408 ie_result['entries'] = [
b6c45014 1409 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1410 for r in ie_result['entries']
1411 ]
1412 return ie_result
1413 else:
1414 raise Exception('Invalid result type: %s' % result_type)
1415
e92caff5 1416 def _ensure_dir_exists(self, path):
1417 return make_dir(path, self.report_error)
1418
30a074c2 1419 def __process_playlist(self, ie_result, download):
1420 # We process each entry in the playlist
1421 playlist = ie_result.get('title') or ie_result.get('id')
1422 self.to_screen('[download] Downloading playlist: %s' % playlist)
1423
498f5606 1424 if 'entries' not in ie_result:
1425 raise EntryNotInPlaylist()
1426 incomplete_entries = bool(ie_result.get('requested_entries'))
1427 if incomplete_entries:
1428 def fill_missing_entries(entries, indexes):
1429 ret = [None] * max(*indexes)
1430 for i, entry in zip(indexes, entries):
1431 ret[i - 1] = entry
1432 return ret
1433 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1434
30a074c2 1435 playlist_results = []
1436
56a8fb4f 1437 playliststart = self.params.get('playliststart', 1)
30a074c2 1438 playlistend = self.params.get('playlistend')
1439 # For backwards compatibility, interpret -1 as whole list
1440 if playlistend == -1:
1441 playlistend = None
1442
1443 playlistitems_str = self.params.get('playlist_items')
1444 playlistitems = None
1445 if playlistitems_str is not None:
1446 def iter_playlistitems(format):
1447 for string_segment in format.split(','):
1448 if '-' in string_segment:
1449 start, end = string_segment.split('-')
1450 for item in range(int(start), int(end) + 1):
1451 yield int(item)
1452 else:
1453 yield int(string_segment)
1454 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1455
1456 ie_entries = ie_result['entries']
56a8fb4f 1457 msg = (
1458 'Downloading %d videos' if not isinstance(ie_entries, list)
1459 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1460
1461 if isinstance(ie_entries, list):
1462 def get_entry(i):
1463 return ie_entries[i - 1]
1464 else:
1465 if not isinstance(ie_entries, PagedList):
1466 ie_entries = LazyList(ie_entries)
1467
1468 def get_entry(i):
1469 return YoutubeDL.__handle_extraction_exceptions(
1470 lambda self, i: ie_entries[i - 1]
1471 )(self, i)
50fed816 1472
56a8fb4f 1473 entries = []
ff1c7fc9 1474 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1475 for i in items:
1476 if i == 0:
1477 continue
56a8fb4f 1478 if playlistitems is None and playlistend is not None and playlistend < i:
1479 break
1480 entry = None
1481 try:
50fed816 1482 entry = get_entry(i)
56a8fb4f 1483 if entry is None:
498f5606 1484 raise EntryNotInPlaylist()
56a8fb4f 1485 except (IndexError, EntryNotInPlaylist):
1486 if incomplete_entries:
1487 raise EntryNotInPlaylist()
1488 elif not playlistitems:
1489 break
1490 entries.append(entry)
120fe513 1491 try:
1492 if entry is not None:
1493 self._match_entry(entry, incomplete=True, silent=True)
1494 except (ExistingVideoReached, RejectedVideoReached):
1495 break
56a8fb4f 1496 ie_result['entries'] = entries
30a074c2 1497
56a8fb4f 1498 # Save playlist_index before re-ordering
1499 entries = [
9e598870 1500 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1501 for i, entry in enumerate(entries, 1)
1502 if entry is not None]
1503 n_entries = len(entries)
498f5606 1504
498f5606 1505 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1506 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1507 ie_result['requested_entries'] = playlistitems
1508
1509 if self.params.get('allow_playlist_files', True):
1510 ie_copy = {
1511 'playlist': playlist,
1512 'playlist_id': ie_result.get('id'),
1513 'playlist_title': ie_result.get('title'),
1514 'playlist_uploader': ie_result.get('uploader'),
1515 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1516 'playlist_index': 0,
498f5606 1517 }
1518 ie_copy.update(dict(ie_result))
1519
1520 if self.params.get('writeinfojson', False):
1521 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1522 if not self._ensure_dir_exists(encodeFilename(infofn)):
1523 return
1524 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1525 self.to_screen('[info] Playlist metadata is already present')
1526 else:
1527 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1528 try:
8012d892 1529 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
498f5606 1530 except (OSError, IOError):
1531 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1532
681de68e 1533 # TODO: This should be passed to ThumbnailsConvertor if necessary
1534 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1535
498f5606 1536 if self.params.get('writedescription', False):
1537 descfn = self.prepare_filename(ie_copy, 'pl_description')
1538 if not self._ensure_dir_exists(encodeFilename(descfn)):
1539 return
1540 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1541 self.to_screen('[info] Playlist description is already present')
1542 elif ie_result.get('description') is None:
1543 self.report_warning('There\'s no playlist description to write.')
1544 else:
1545 try:
1546 self.to_screen('[info] Writing playlist description to: ' + descfn)
1547 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1548 descfile.write(ie_result['description'])
1549 except (OSError, IOError):
1550 self.report_error('Cannot write playlist description file ' + descfn)
1551 return
30a074c2 1552
1553 if self.params.get('playlistreverse', False):
1554 entries = entries[::-1]
30a074c2 1555 if self.params.get('playlistrandom', False):
1556 random.shuffle(entries)
1557
1558 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1559
56a8fb4f 1560 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1561 failures = 0
1562 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1563 for i, entry_tuple in enumerate(entries, 1):
1564 playlist_index, entry = entry_tuple
81139999 1565 if 'playlist-index' in self.params.get('compat_opts', []):
1566 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1567 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1568 # This __x_forwarded_for_ip thing is a bit ugly but requires
1569 # minimal changes
1570 if x_forwarded_for:
1571 entry['__x_forwarded_for_ip'] = x_forwarded_for
1572 extra = {
1573 'n_entries': n_entries,
f59ae581 1574 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1575 'playlist_index': playlist_index,
1576 'playlist_autonumber': i,
30a074c2 1577 'playlist': playlist,
1578 'playlist_id': ie_result.get('id'),
1579 'playlist_title': ie_result.get('title'),
1580 'playlist_uploader': ie_result.get('uploader'),
1581 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1582 'extractor': ie_result['extractor'],
1583 'webpage_url': ie_result['webpage_url'],
1584 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1585 'extractor_key': ie_result['extractor_key'],
1586 }
1587
1588 if self._match_entry(entry, incomplete=True) is not None:
1589 continue
1590
1591 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1592 if not entry_result:
1593 failures += 1
1594 if failures >= max_failures:
1595 self.report_error(
1596 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1597 break
30a074c2 1598 # TODO: skip failed (empty) entries?
1599 playlist_results.append(entry_result)
1600 ie_result['entries'] = playlist_results
1601 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1602 return ie_result
1603
a0566bbf 1604 @__handle_extraction_exceptions
1605 def __process_iterable_entry(self, entry, download, extra_info):
1606 return self.process_ie_result(
1607 entry, download=download, extra_info=extra_info)
1608
67134eab
JMF
1609 def _build_format_filter(self, filter_spec):
1610 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1611
1612 OPERATORS = {
1613 '<': operator.lt,
1614 '<=': operator.le,
1615 '>': operator.gt,
1616 '>=': operator.ge,
1617 '=': operator.eq,
1618 '!=': operator.ne,
1619 }
67134eab 1620 operator_rex = re.compile(r'''(?x)\s*
187986a8 1621 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1622 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1623 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1624 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1625 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1626 if m:
1627 try:
1628 comparison_value = int(m.group('value'))
1629 except ValueError:
1630 comparison_value = parse_filesize(m.group('value'))
1631 if comparison_value is None:
1632 comparison_value = parse_filesize(m.group('value') + 'B')
1633 if comparison_value is None:
1634 raise ValueError(
1635 'Invalid value %r in format specification %r' % (
67134eab 1636 m.group('value'), filter_spec))
9ddb6925
S
1637 op = OPERATORS[m.group('op')]
1638
083c9df9 1639 if not m:
9ddb6925
S
1640 STR_OPERATORS = {
1641 '=': operator.eq,
10d33b34
YCH
1642 '^=': lambda attr, value: attr.startswith(value),
1643 '$=': lambda attr, value: attr.endswith(value),
1644 '*=': lambda attr, value: value in attr,
9ddb6925 1645 }
187986a8 1646 str_operator_rex = re.compile(r'''(?x)\s*
1647 (?P<key>[a-zA-Z0-9._-]+)\s*
1648 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1649 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1650 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1651 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1652 if m:
1653 comparison_value = m.group('value')
2cc779f4
S
1654 str_op = STR_OPERATORS[m.group('op')]
1655 if m.group('negation'):
e118a879 1656 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1657 else:
1658 op = str_op
083c9df9 1659
9ddb6925 1660 if not m:
187986a8 1661 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1662
1663 def _filter(f):
1664 actual_value = f.get(m.group('key'))
1665 if actual_value is None:
1666 return m.group('none_inclusive')
1667 return op(actual_value, comparison_value)
67134eab
JMF
1668 return _filter
1669
0017d9ad 1670 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1671
af0f7428
S
1672 def can_merge():
1673 merger = FFmpegMergerPP(self)
1674 return merger.available and merger.can_merge()
1675
91ebc640 1676 prefer_best = (
b7b04c78 1677 not self.params.get('simulate')
91ebc640 1678 and download
1679 and (
1680 not can_merge()
19807826 1681 or info_dict.get('is_live', False)
de6000d9 1682 or self.outtmpl_dict['default'] == '-'))
53ed7066 1683 compat = (
1684 prefer_best
1685 or self.params.get('allow_multiple_audio_streams', False)
1686 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1687
1688 return (
53ed7066 1689 'best/bestvideo+bestaudio' if prefer_best
1690 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1691 else 'bestvideo+bestaudio/best')
0017d9ad 1692
67134eab
JMF
1693 def build_format_selector(self, format_spec):
1694 def syntax_error(note, start):
1695 message = (
1696 'Invalid format specification: '
1697 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1698 return SyntaxError(message)
1699
1700 PICKFIRST = 'PICKFIRST'
1701 MERGE = 'MERGE'
1702 SINGLE = 'SINGLE'
0130afb7 1703 GROUP = 'GROUP'
67134eab
JMF
1704 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1705
91ebc640 1706 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1707 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1708
e8e73840 1709 check_formats = self.params.get('check_formats')
1710
67134eab
JMF
1711 def _parse_filter(tokens):
1712 filter_parts = []
1713 for type, string, start, _, _ in tokens:
1714 if type == tokenize.OP and string == ']':
1715 return ''.join(filter_parts)
1716 else:
1717 filter_parts.append(string)
1718
232541df 1719 def _remove_unused_ops(tokens):
17cc1534 1720 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1721 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1722 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1723 last_string, last_start, last_end, last_line = None, None, None, None
1724 for type, string, start, end, line in tokens:
1725 if type == tokenize.OP and string == '[':
1726 if last_string:
1727 yield tokenize.NAME, last_string, last_start, last_end, last_line
1728 last_string = None
1729 yield type, string, start, end, line
1730 # everything inside brackets will be handled by _parse_filter
1731 for type, string, start, end, line in tokens:
1732 yield type, string, start, end, line
1733 if type == tokenize.OP and string == ']':
1734 break
1735 elif type == tokenize.OP and string in ALLOWED_OPS:
1736 if last_string:
1737 yield tokenize.NAME, last_string, last_start, last_end, last_line
1738 last_string = None
1739 yield type, string, start, end, line
1740 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1741 if not last_string:
1742 last_string = string
1743 last_start = start
1744 last_end = end
1745 else:
1746 last_string += string
1747 if last_string:
1748 yield tokenize.NAME, last_string, last_start, last_end, last_line
1749
cf2ac6df 1750 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1751 selectors = []
1752 current_selector = None
1753 for type, string, start, _, _ in tokens:
1754 # ENCODING is only defined in python 3.x
1755 if type == getattr(tokenize, 'ENCODING', None):
1756 continue
1757 elif type in [tokenize.NAME, tokenize.NUMBER]:
1758 current_selector = FormatSelector(SINGLE, string, [])
1759 elif type == tokenize.OP:
cf2ac6df
JMF
1760 if string == ')':
1761 if not inside_group:
1762 # ')' will be handled by the parentheses group
1763 tokens.restore_last_token()
67134eab 1764 break
cf2ac6df 1765 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1766 tokens.restore_last_token()
1767 break
cf2ac6df
JMF
1768 elif inside_choice and string == ',':
1769 tokens.restore_last_token()
1770 break
1771 elif string == ',':
0a31a350
JMF
1772 if not current_selector:
1773 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1774 selectors.append(current_selector)
1775 current_selector = None
1776 elif string == '/':
d96d604e
JMF
1777 if not current_selector:
1778 raise syntax_error('"/" must follow a format selector', start)
67134eab 1779 first_choice = current_selector
cf2ac6df 1780 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1781 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1782 elif string == '[':
1783 if not current_selector:
1784 current_selector = FormatSelector(SINGLE, 'best', [])
1785 format_filter = _parse_filter(tokens)
1786 current_selector.filters.append(format_filter)
0130afb7
JMF
1787 elif string == '(':
1788 if current_selector:
1789 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1790 group = _parse_format_selection(tokens, inside_group=True)
1791 current_selector = FormatSelector(GROUP, group, [])
67134eab 1792 elif string == '+':
d03cfdce 1793 if not current_selector:
1794 raise syntax_error('Unexpected "+"', start)
1795 selector_1 = current_selector
1796 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1797 if not selector_2:
1798 raise syntax_error('Expected a selector', start)
1799 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1800 else:
1801 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1802 elif type == tokenize.ENDMARKER:
1803 break
1804 if current_selector:
1805 selectors.append(current_selector)
1806 return selectors
1807
f8d4ad9a 1808 def _merge(formats_pair):
1809 format_1, format_2 = formats_pair
1810
1811 formats_info = []
1812 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1813 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1814
1815 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1816 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1817 for (i, fmt_info) in enumerate(formats_info):
551f9388 1818 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1819 formats_info.pop(i)
1820 continue
1821 for aud_vid in ['audio', 'video']:
f8d4ad9a 1822 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1823 if get_no_more[aud_vid]:
1824 formats_info.pop(i)
f5510afe 1825 break
f8d4ad9a 1826 get_no_more[aud_vid] = True
1827
1828 if len(formats_info) == 1:
1829 return formats_info[0]
1830
1831 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1832 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1833
1834 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1835 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1836
1837 output_ext = self.params.get('merge_output_format')
1838 if not output_ext:
1839 if the_only_video:
1840 output_ext = the_only_video['ext']
1841 elif the_only_audio and not video_fmts:
1842 output_ext = the_only_audio['ext']
1843 else:
1844 output_ext = 'mkv'
1845
1846 new_dict = {
1847 'requested_formats': formats_info,
1848 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1849 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1850 'ext': output_ext,
1851 }
1852
1853 if the_only_video:
1854 new_dict.update({
1855 'width': the_only_video.get('width'),
1856 'height': the_only_video.get('height'),
1857 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1858 'fps': the_only_video.get('fps'),
1859 'vcodec': the_only_video.get('vcodec'),
1860 'vbr': the_only_video.get('vbr'),
1861 'stretched_ratio': the_only_video.get('stretched_ratio'),
1862 })
1863
1864 if the_only_audio:
1865 new_dict.update({
1866 'acodec': the_only_audio.get('acodec'),
1867 'abr': the_only_audio.get('abr'),
1868 })
1869
1870 return new_dict
1871
e8e73840 1872 def _check_formats(formats):
981052c9 1873 if not check_formats:
1874 yield from formats
b5ac45b1 1875 return
e8e73840 1876 for f in formats:
1877 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1878 temp_file = tempfile.NamedTemporaryFile(
1879 suffix='.tmp', delete=False,
1880 dir=self.get_output_path('temp') or None)
1881 temp_file.close()
fe346461 1882 try:
981052c9 1883 success, _ = self.dl(temp_file.name, f, test=True)
1884 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1885 success = False
fe346461 1886 finally:
21cd8fae 1887 if os.path.exists(temp_file.name):
1888 try:
1889 os.remove(temp_file.name)
1890 except OSError:
1891 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1892 if success:
e8e73840 1893 yield f
1894 else:
1895 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1896
67134eab 1897 def _build_selector_function(selector):
909d24dd 1898 if isinstance(selector, list): # ,
67134eab
JMF
1899 fs = [_build_selector_function(s) for s in selector]
1900
317f7ab6 1901 def selector_function(ctx):
67134eab 1902 for f in fs:
981052c9 1903 yield from f(ctx)
67134eab 1904 return selector_function
909d24dd 1905
1906 elif selector.type == GROUP: # ()
0130afb7 1907 selector_function = _build_selector_function(selector.selector)
909d24dd 1908
1909 elif selector.type == PICKFIRST: # /
67134eab
JMF
1910 fs = [_build_selector_function(s) for s in selector.selector]
1911
317f7ab6 1912 def selector_function(ctx):
67134eab 1913 for f in fs:
317f7ab6 1914 picked_formats = list(f(ctx))
67134eab
JMF
1915 if picked_formats:
1916 return picked_formats
1917 return []
67134eab 1918
981052c9 1919 elif selector.type == MERGE: # +
1920 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1921
1922 def selector_function(ctx):
1923 for pair in itertools.product(
1924 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1925 yield _merge(pair)
1926
909d24dd 1927 elif selector.type == SINGLE: # atom
598d185d 1928 format_spec = selector.selector or 'best'
909d24dd 1929
f8d4ad9a 1930 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1931 if format_spec == 'all':
1932 def selector_function(ctx):
981052c9 1933 yield from _check_formats(ctx['formats'])
f8d4ad9a 1934 elif format_spec == 'mergeall':
1935 def selector_function(ctx):
981052c9 1936 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1937 if not formats:
1938 return
921b76ca 1939 merged_format = formats[-1]
1940 for f in formats[-2::-1]:
f8d4ad9a 1941 merged_format = _merge((merged_format, f))
1942 yield merged_format
909d24dd 1943
1944 else:
e8e73840 1945 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1946 mobj = re.match(
1947 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1948 format_spec)
1949 if mobj is not None:
1950 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1951 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1952 format_type = (mobj.group('type') or [None])[0]
1953 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1954 format_modified = mobj.group('mod') is not None
909d24dd 1955
1956 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1957 _filter_f = (
eff63539 1958 (lambda f: f.get('%scodec' % format_type) != 'none')
1959 if format_type and format_modified # bv*, ba*, wv*, wa*
1960 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1961 if format_type # bv, ba, wv, wa
1962 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1963 if not format_modified # b, w
8326b00a 1964 else lambda f: True) # b*, w*
1965 filter_f = lambda f: _filter_f(f) and (
1966 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1967 else:
909d24dd 1968 filter_f = ((lambda f: f.get('ext') == format_spec)
1969 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1970 else (lambda f: f.get('format_id') == format_spec)) # id
1971
1972 def selector_function(ctx):
1973 formats = list(ctx['formats'])
909d24dd 1974 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1975 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1976 # for extractors with incomplete formats (audio only (soundcloud)
1977 # or video only (imgur)) best/worst will fallback to
1978 # best/worst {video,audio}-only format
e8e73840 1979 matches = formats
981052c9 1980 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1981 try:
e8e73840 1982 yield matches[format_idx - 1]
981052c9 1983 except IndexError:
1984 return
083c9df9 1985
67134eab 1986 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1987
317f7ab6
S
1988 def final_selector(ctx):
1989 ctx_copy = copy.deepcopy(ctx)
67134eab 1990 for _filter in filters:
317f7ab6
S
1991 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1992 return selector_function(ctx_copy)
67134eab 1993 return final_selector
083c9df9 1994
67134eab 1995 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1996 try:
232541df 1997 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1998 except tokenize.TokenError:
1999 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2000
2001 class TokenIterator(object):
2002 def __init__(self, tokens):
2003 self.tokens = tokens
2004 self.counter = 0
2005
2006 def __iter__(self):
2007 return self
2008
2009 def __next__(self):
2010 if self.counter >= len(self.tokens):
2011 raise StopIteration()
2012 value = self.tokens[self.counter]
2013 self.counter += 1
2014 return value
2015
2016 next = __next__
2017
2018 def restore_last_token(self):
2019 self.counter -= 1
2020
2021 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2022 return _build_selector_function(parsed_selector)
a9c58ad9 2023
e5660ee6
JMF
2024 def _calc_headers(self, info_dict):
2025 res = std_headers.copy()
2026
2027 add_headers = info_dict.get('http_headers')
2028 if add_headers:
2029 res.update(add_headers)
2030
2031 cookies = self._calc_cookies(info_dict)
2032 if cookies:
2033 res['Cookie'] = cookies
2034
0016b84e
S
2035 if 'X-Forwarded-For' not in res:
2036 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2037 if x_forwarded_for_ip:
2038 res['X-Forwarded-For'] = x_forwarded_for_ip
2039
e5660ee6
JMF
2040 return res
2041
2042 def _calc_cookies(self, info_dict):
5c2266df 2043 pr = sanitized_Request(info_dict['url'])
e5660ee6 2044 self.cookiejar.add_cookie_header(pr)
662435f7 2045 return pr.get_header('Cookie')
e5660ee6 2046
b0249bca 2047 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2048 thumbnails = info_dict.get('thumbnails')
2049 if thumbnails is None:
2050 thumbnail = info_dict.get('thumbnail')
2051 if thumbnail:
2052 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2053 if thumbnails:
2054 thumbnails.sort(key=lambda t: (
2055 t.get('preference') if t.get('preference') is not None else -1,
2056 t.get('width') if t.get('width') is not None else -1,
2057 t.get('height') if t.get('height') is not None else -1,
2058 t.get('id') if t.get('id') is not None else '',
2059 t.get('url')))
b0249bca 2060
0ba692ac 2061 def thumbnail_tester():
2062 if self.params.get('check_formats'):
cca80fe6 2063 test_all = True
2064 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2065 else:
cca80fe6 2066 test_all = False
0ba692ac 2067 to_screen = self.write_debug
2068
2069 def test_thumbnail(t):
cca80fe6 2070 if not test_all and not t.get('_test_url'):
2071 return True
0ba692ac 2072 to_screen('Testing thumbnail %s' % t['id'])
2073 try:
2074 self.urlopen(HEADRequest(t['url']))
2075 except network_exceptions as err:
2076 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2077 t['id'], t['url'], error_to_compat_str(err)))
2078 return False
2079 return True
2080
2081 return test_thumbnail
b0249bca 2082
bc516a3f 2083 for i, t in enumerate(thumbnails):
bc516a3f 2084 if t.get('id') is None:
2085 t['id'] = '%d' % i
b0249bca 2086 if t.get('width') and t.get('height'):
2087 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2088 t['url'] = sanitize_url(t['url'])
0ba692ac 2089
2090 if self.params.get('check_formats') is not False:
2091 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2092 else:
2093 info_dict['thumbnails'] = thumbnails
bc516a3f 2094
dd82ffea
JMF
2095 def process_video_result(self, info_dict, download=True):
2096 assert info_dict.get('_type', 'video') == 'video'
2097
bec1fad2
PH
2098 if 'id' not in info_dict:
2099 raise ExtractorError('Missing "id" field in extractor result')
2100 if 'title' not in info_dict:
1151c407 2101 raise ExtractorError('Missing "title" field in extractor result',
2102 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2103
c9969434
S
2104 def report_force_conversion(field, field_not, conversion):
2105 self.report_warning(
2106 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2107 % (field, field_not, conversion))
2108
2109 def sanitize_string_field(info, string_field):
2110 field = info.get(string_field)
2111 if field is None or isinstance(field, compat_str):
2112 return
2113 report_force_conversion(string_field, 'a string', 'string')
2114 info[string_field] = compat_str(field)
2115
2116 def sanitize_numeric_fields(info):
2117 for numeric_field in self._NUMERIC_FIELDS:
2118 field = info.get(numeric_field)
2119 if field is None or isinstance(field, compat_numeric_types):
2120 continue
2121 report_force_conversion(numeric_field, 'numeric', 'int')
2122 info[numeric_field] = int_or_none(field)
2123
2124 sanitize_string_field(info_dict, 'id')
2125 sanitize_numeric_fields(info_dict)
be6217b2 2126
dd82ffea
JMF
2127 if 'playlist' not in info_dict:
2128 # It isn't part of a playlist
2129 info_dict['playlist'] = None
2130 info_dict['playlist_index'] = None
2131
bc516a3f 2132 self._sanitize_thumbnails(info_dict)
d5519808 2133
536a55da 2134 thumbnail = info_dict.get('thumbnail')
bc516a3f 2135 thumbnails = info_dict.get('thumbnails')
536a55da
S
2136 if thumbnail:
2137 info_dict['thumbnail'] = sanitize_url(thumbnail)
2138 elif thumbnails:
d5519808
PH
2139 info_dict['thumbnail'] = thumbnails[-1]['url']
2140
ae30b840 2141 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2142 info_dict['display_id'] = info_dict['id']
2143
10db0d2f 2144 for ts_key, date_key in (
2145 ('timestamp', 'upload_date'),
2146 ('release_timestamp', 'release_date'),
2147 ):
2148 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2149 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2150 # see http://bugs.python.org/issue1646728)
2151 try:
2152 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2153 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2154 except (ValueError, OverflowError, OSError):
2155 pass
9d2ecdbc 2156
ae30b840 2157 live_keys = ('is_live', 'was_live')
2158 live_status = info_dict.get('live_status')
2159 if live_status is None:
2160 for key in live_keys:
2161 if info_dict.get(key) is False:
2162 continue
2163 if info_dict.get(key):
2164 live_status = key
2165 break
2166 if all(info_dict.get(key) is False for key in live_keys):
2167 live_status = 'not_live'
2168 if live_status:
2169 info_dict['live_status'] = live_status
2170 for key in live_keys:
2171 if info_dict.get(key) is None:
2172 info_dict[key] = (live_status == key)
2173
33d2fc2f
S
2174 # Auto generate title fields corresponding to the *_number fields when missing
2175 # in order to always have clean titles. This is very common for TV series.
2176 for field in ('chapter', 'season', 'episode'):
2177 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2178 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2179
05108a49
S
2180 for cc_kind in ('subtitles', 'automatic_captions'):
2181 cc = info_dict.get(cc_kind)
2182 if cc:
2183 for _, subtitle in cc.items():
2184 for subtitle_format in subtitle:
2185 if subtitle_format.get('url'):
2186 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2187 if subtitle_format.get('ext') is None:
2188 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2189
2190 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2191 subtitles = info_dict.get('subtitles')
4bba3716 2192
360e1ca5 2193 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2194 info_dict['id'], subtitles, automatic_captions)
a504ced0 2195
dd82ffea
JMF
2196 # We now pick which formats have to be downloaded
2197 if info_dict.get('formats') is None:
2198 # There's only one format available
2199 formats = [info_dict]
2200 else:
2201 formats = info_dict['formats']
2202
e0493e90 2203 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2204 if not self.params.get('allow_unplayable_formats'):
2205 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2206
db95dc13 2207 if not formats:
1151c407 2208 self.raise_no_formats(info_dict)
db95dc13 2209
73af5cc8
S
2210 def is_wellformed(f):
2211 url = f.get('url')
a5ac0c47 2212 if not url:
73af5cc8
S
2213 self.report_warning(
2214 '"url" field is missing or empty - skipping format, '
2215 'there is an error in extractor')
a5ac0c47
S
2216 return False
2217 if isinstance(url, bytes):
2218 sanitize_string_field(f, 'url')
2219 return True
73af5cc8
S
2220
2221 # Filter out malformed formats for better extraction robustness
2222 formats = list(filter(is_wellformed, formats))
2223
181c7053
S
2224 formats_dict = {}
2225
dd82ffea 2226 # We check that all the formats have the format and format_id fields
db95dc13 2227 for i, format in enumerate(formats):
c9969434
S
2228 sanitize_string_field(format, 'format_id')
2229 sanitize_numeric_fields(format)
dcf77cf1 2230 format['url'] = sanitize_url(format['url'])
e74e3b63 2231 if not format.get('format_id'):
8016c922 2232 format['format_id'] = compat_str(i)
e2effb08
S
2233 else:
2234 # Sanitize format_id from characters used in format selector expression
ec85ded8 2235 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2236 format_id = format['format_id']
2237 if format_id not in formats_dict:
2238 formats_dict[format_id] = []
2239 formats_dict[format_id].append(format)
2240
2241 # Make sure all formats have unique format_id
2242 for format_id, ambiguous_formats in formats_dict.items():
2243 if len(ambiguous_formats) > 1:
2244 for i, format in enumerate(ambiguous_formats):
2245 format['format_id'] = '%s-%d' % (format_id, i)
2246
2247 for i, format in enumerate(formats):
8c51aa65 2248 if format.get('format') is None:
6febd1c1 2249 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2250 id=format['format_id'],
2251 res=self.format_resolution(format),
b868936c 2252 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2253 )
c1002e96 2254 # Automatically determine file extension if missing
5b1d8575 2255 if format.get('ext') is None:
cce929ea 2256 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2257 # Automatically determine protocol if missing (useful for format
2258 # selection purposes)
6f0be937 2259 if format.get('protocol') is None:
b5559424 2260 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2261 # Add HTTP headers, so that external programs can use them from the
2262 # json output
2263 full_format_info = info_dict.copy()
2264 full_format_info.update(format)
2265 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2266 # Remove private housekeeping stuff
2267 if '__x_forwarded_for_ip' in info_dict:
2268 del info_dict['__x_forwarded_for_ip']
dd82ffea 2269
4bcc7bd1 2270 # TODO Central sorting goes here
99e206d5 2271
88acdbc2 2272 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2273 # only set the 'formats' fields if the original info_dict list them
2274 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2275 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2276 # which can't be exported to json
b3d9ef88 2277 info_dict['formats'] = formats
4ec82a72 2278
2279 info_dict, _ = self.pre_process(info_dict)
2280
b7b04c78 2281 if self.params.get('list_thumbnails'):
2282 self.list_thumbnails(info_dict)
2283 if self.params.get('listformats'):
86c66b2d 2284 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2285 self.to_screen('%s has no formats' % info_dict['id'])
2286 else:
2287 self.list_formats(info_dict)
b7b04c78 2288 if self.params.get('listsubtitles'):
2289 if 'automatic_captions' in info_dict:
2290 self.list_subtitles(
2291 info_dict['id'], automatic_captions, 'automatic captions')
2292 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2293 list_only = self.params.get('simulate') is None and (
2294 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2295 if list_only:
b7b04c78 2296 # Without this printing, -F --print-json will not work
169dbde9 2297 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2298 return
2299
187986a8 2300 format_selector = self.format_selector
2301 if format_selector is None:
0017d9ad 2302 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2303 self.write_debug('Default format spec: %s' % req_format)
187986a8 2304 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2305
2306 # While in format selection we may need to have an access to the original
2307 # format set in order to calculate some metrics or do some processing.
2308 # For now we need to be able to guess whether original formats provided
2309 # by extractor are incomplete or not (i.e. whether extractor provides only
2310 # video-only or audio-only formats) for proper formats selection for
2311 # extractors with such incomplete formats (see
067aa17e 2312 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2313 # Since formats may be filtered during format selection and may not match
2314 # the original formats the results may be incorrect. Thus original formats
2315 # or pre-calculated metrics should be passed to format selection routines
2316 # as well.
2317 # We will pass a context object containing all necessary additional data
2318 # instead of just formats.
2319 # This fixes incorrect format selection issue (see
067aa17e 2320 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2321 incomplete_formats = (
317f7ab6 2322 # All formats are video-only or
3089bc74 2323 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2324 # all formats are audio-only
3089bc74 2325 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2326
2327 ctx = {
2328 'formats': formats,
2329 'incomplete_formats': incomplete_formats,
2330 }
2331
2332 formats_to_download = list(format_selector(ctx))
dd82ffea 2333 if not formats_to_download:
b7da73eb 2334 if not self.params.get('ignore_no_formats_error'):
1151c407 2335 raise ExtractorError('Requested format is not available', expected=True,
2336 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2337 else:
2338 self.report_warning('Requested format is not available')
4513a41a
A
2339 # Process what we can, even without any available formats.
2340 self.process_info(dict(info_dict))
b7da73eb 2341 elif download:
2342 self.to_screen(
07cce701 2343 '[info] %s: Downloading %d format(s): %s' % (
2344 info_dict['id'], len(formats_to_download),
2345 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2346 for fmt in formats_to_download:
dd82ffea 2347 new_info = dict(info_dict)
4ec82a72 2348 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2349 new_info['__original_infodict'] = info_dict
b7da73eb 2350 new_info.update(fmt)
dd82ffea
JMF
2351 self.process_info(new_info)
2352 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2353 if formats_to_download:
2354 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2355 return info_dict
2356
98c70d6f 2357 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2358 """Select the requested subtitles and their format"""
98c70d6f
JMF
2359 available_subs = {}
2360 if normal_subtitles and self.params.get('writesubtitles'):
2361 available_subs.update(normal_subtitles)
2362 if automatic_captions and self.params.get('writeautomaticsub'):
2363 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2364 if lang not in available_subs:
2365 available_subs[lang] = cap_info
2366
4d171848
JMF
2367 if (not self.params.get('writesubtitles') and not
2368 self.params.get('writeautomaticsub') or not
2369 available_subs):
2370 return None
a504ced0 2371
c32b0aab 2372 all_sub_langs = available_subs.keys()
a504ced0 2373 if self.params.get('allsubtitles', False):
c32b0aab 2374 requested_langs = all_sub_langs
2375 elif self.params.get('subtitleslangs', False):
77c4a9ef 2376 # A list is used so that the order of languages will be the same as
2377 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2378 requested_langs = []
2379 for lang_re in self.params.get('subtitleslangs'):
2380 if lang_re == 'all':
2381 requested_langs.extend(all_sub_langs)
c32b0aab 2382 continue
77c4a9ef 2383 discard = lang_re[0] == '-'
c32b0aab 2384 if discard:
77c4a9ef 2385 lang_re = lang_re[1:]
2386 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2387 if discard:
2388 for lang in current_langs:
77c4a9ef 2389 while lang in requested_langs:
2390 requested_langs.remove(lang)
c32b0aab 2391 else:
77c4a9ef 2392 requested_langs.extend(current_langs)
2393 requested_langs = orderedSet(requested_langs)
c32b0aab 2394 elif 'en' in available_subs:
2395 requested_langs = ['en']
a504ced0 2396 else:
c32b0aab 2397 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2398 if requested_langs:
2399 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2400
2401 formats_query = self.params.get('subtitlesformat', 'best')
2402 formats_preference = formats_query.split('/') if formats_query else []
2403 subs = {}
2404 for lang in requested_langs:
2405 formats = available_subs.get(lang)
2406 if formats is None:
2407 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2408 continue
a504ced0
JMF
2409 for ext in formats_preference:
2410 if ext == 'best':
2411 f = formats[-1]
2412 break
2413 matches = list(filter(lambda f: f['ext'] == ext, formats))
2414 if matches:
2415 f = matches[-1]
2416 break
2417 else:
2418 f = formats[-1]
2419 self.report_warning(
2420 'No subtitle format found matching "%s" for language %s, '
2421 'using %s' % (formats_query, lang, f['ext']))
2422 subs[lang] = f
2423 return subs
2424
d06daf23 2425 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2426 def print_mandatory(field, actual_field=None):
2427 if actual_field is None:
2428 actual_field = field
d06daf23 2429 if (self.params.get('force%s' % field, False)
53c18592 2430 and (not incomplete or info_dict.get(actual_field) is not None)):
2431 self.to_stdout(info_dict[actual_field])
d06daf23
S
2432
2433 def print_optional(field):
2434 if (self.params.get('force%s' % field, False)
2435 and info_dict.get(field) is not None):
2436 self.to_stdout(info_dict[field])
2437
53c18592 2438 info_dict = info_dict.copy()
2439 if filename is not None:
2440 info_dict['filename'] = filename
2441 if info_dict.get('requested_formats') is not None:
2442 # For RTMP URLs, also include the playpath
2443 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2444 elif 'url' in info_dict:
2445 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2446
2b8a2973 2447 if self.params.get('forceprint') or self.params.get('forcejson'):
2448 self.post_extract(info_dict)
53c18592 2449 for tmpl in self.params.get('forceprint', []):
2450 if re.match(r'\w+$', tmpl):
2451 tmpl = '%({})s'.format(tmpl)
2452 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2453 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2454
d06daf23
S
2455 print_mandatory('title')
2456 print_mandatory('id')
53c18592 2457 print_mandatory('url', 'urls')
d06daf23
S
2458 print_optional('thumbnail')
2459 print_optional('description')
53c18592 2460 print_optional('filename')
b868936c 2461 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2462 self.to_stdout(formatSeconds(info_dict['duration']))
2463 print_mandatory('format')
53c18592 2464
2b8a2973 2465 if self.params.get('forcejson'):
6e84b215 2466 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2467
e8e73840 2468 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2469 if not info.get('url'):
1151c407 2470 self.raise_no_formats(info, True)
e8e73840 2471
2472 if test:
2473 verbose = self.params.get('verbose')
2474 params = {
2475 'test': True,
2476 'quiet': not verbose,
2477 'verbose': verbose,
2478 'noprogress': not verbose,
2479 'nopart': True,
2480 'skip_unavailable_fragments': False,
2481 'keep_fragments': False,
2482 'overwrites': True,
2483 '_no_ytdl_file': True,
2484 }
2485 else:
2486 params = self.params
96fccc10 2487 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2488 if not test:
2489 for ph in self._progress_hooks:
2490 fd.add_progress_hook(ph)
18e674b4 2491 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2492 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2493 new_info = dict(info)
2494 if new_info.get('http_headers') is None:
2495 new_info['http_headers'] = self._calc_headers(new_info)
2496 return fd.download(name, new_info, subtitle)
2497
8222d8de
JMF
2498 def process_info(self, info_dict):
2499 """Process a single resolved IE result."""
2500
2501 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2502
2503 max_downloads = self.params.get('max_downloads')
2504 if max_downloads is not None:
2505 if self._num_downloads >= int(max_downloads):
2506 raise MaxDownloadsReached()
8222d8de 2507
d06daf23 2508 # TODO: backward compatibility, to be removed
8222d8de 2509 info_dict['fulltitle'] = info_dict['title']
8222d8de 2510
4513a41a 2511 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2512 info_dict['format'] = info_dict['ext']
2513
c77495e3 2514 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2515 return
2516
277d6ff5 2517 self.post_extract(info_dict)
fd288278 2518 self._num_downloads += 1
8222d8de 2519
dcf64d43 2520 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2521 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2522 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2523 files_to_move = {}
8222d8de
JMF
2524
2525 # Forced printings
4513a41a 2526 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2527
b7b04c78 2528 if self.params.get('simulate'):
2d30509f 2529 if self.params.get('force_write_download_archive', False):
2530 self.record_download_archive(info_dict)
2531
2532 # Do nothing else if in simulate mode
8222d8de
JMF
2533 return
2534
de6000d9 2535 if full_filename is None:
8222d8de
JMF
2536 return
2537
e92caff5 2538 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2539 return
e92caff5 2540 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2541 return
2542
2543 if self.params.get('writedescription', False):
de6000d9 2544 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2545 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2546 return
0c3d0f51 2547 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2548 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2549 elif info_dict.get('description') is None:
2550 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2551 else:
2552 try:
6febd1c1 2553 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2554 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2555 descfile.write(info_dict['description'])
7b6fefc9 2556 except (OSError, IOError):
6febd1c1 2557 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2558 return
8222d8de 2559
1fb07d10 2560 if self.params.get('writeannotations', False):
de6000d9 2561 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2562 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2563 return
0c3d0f51 2564 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2565 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2566 elif not info_dict.get('annotations'):
2567 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2568 else:
2569 try:
6febd1c1 2570 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2571 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2572 annofile.write(info_dict['annotations'])
2573 except (KeyError, TypeError):
6febd1c1 2574 self.report_warning('There are no annotations to write.')
7b6fefc9 2575 except (OSError, IOError):
6febd1c1 2576 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2577 return
1fb07d10 2578
c4a91be7 2579 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2580 self.params.get('writeautomaticsub')])
c4a91be7 2581
c84dd8a9 2582 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2583 # subtitles download errors are already managed as troubles in relevant IE
2584 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2585 subtitles = info_dict['requested_subtitles']
fa57af1e 2586 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2587 for sub_lang, sub_info in subtitles.items():
2588 sub_format = sub_info['ext']
56d868db 2589 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2590 sub_filename_final = subtitles_filename(
2591 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2592 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2593 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2594 sub_info['filepath'] = sub_filename
0202b52a 2595 files_to_move[sub_filename] = sub_filename_final
a504ced0 2596 else:
0c9df79e 2597 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2598 if sub_info.get('data') is not None:
2599 try:
2600 # Use newline='' to prevent conversion of newline characters
067aa17e 2601 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2602 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2603 subfile.write(sub_info['data'])
dcf64d43 2604 sub_info['filepath'] = sub_filename
0202b52a 2605 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2606 except (OSError, IOError):
2607 self.report_error('Cannot write subtitles file ' + sub_filename)
2608 return
7b6fefc9 2609 else:
5ff1bc0c 2610 try:
388bc4a6
AG
2611 sub_copy = sub_info.copy()
2612 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
2613 self.dl(sub_filename, sub_copy, subtitle=True)
dcf64d43 2614 sub_info['filepath'] = sub_filename
0202b52a 2615 files_to_move[sub_filename] = sub_filename_final
fe346461 2616 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2617 self.report_warning('Unable to download subtitle for "%s": %s' %
2618 (sub_lang, error_to_compat_str(err)))
2619 continue
8222d8de 2620
8222d8de 2621 if self.params.get('writeinfojson', False):
de6000d9 2622 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2623 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2624 return
0c3d0f51 2625 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2626 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2627 else:
66c935fb 2628 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2629 try:
8012d892 2630 write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2631 except (OSError, IOError):
66c935fb 2632 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2633 return
de6000d9 2634 info_dict['__infojson_filename'] = infofn
8222d8de 2635
56d868db 2636 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2637 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2638 thumb_filename = replace_extension(
2639 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2640 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2641
732044af 2642 # Write internet shortcut files
2643 url_link = webloc_link = desktop_link = False
2644 if self.params.get('writelink', False):
2645 if sys.platform == "darwin": # macOS.
2646 webloc_link = True
2647 elif sys.platform.startswith("linux"):
2648 desktop_link = True
2649 else: # if sys.platform in ['win32', 'cygwin']:
2650 url_link = True
2651 if self.params.get('writeurllink', False):
2652 url_link = True
2653 if self.params.get('writewebloclink', False):
2654 webloc_link = True
2655 if self.params.get('writedesktoplink', False):
2656 desktop_link = True
2657
2658 if url_link or webloc_link or desktop_link:
2659 if 'webpage_url' not in info_dict:
2660 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2661 return
2662 ascii_url = iri_to_uri(info_dict['webpage_url'])
2663
2664 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2665 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2666 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2667 self.to_screen('[info] Internet shortcut is already present')
2668 else:
2669 try:
2670 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2671 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2672 template_vars = {'url': ascii_url}
2673 if embed_filename:
2674 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2675 linkfile.write(template % template_vars)
2676 except (OSError, IOError):
2677 self.report_error('Cannot write internet shortcut ' + linkfn)
2678 return False
2679 return True
2680
2681 if url_link:
2682 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2683 return
2684 if webloc_link:
2685 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2686 return
2687 if desktop_link:
2688 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2689 return
2690
56d868db 2691 try:
2692 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2693 except PostProcessingError as err:
2694 self.report_error('Preprocessing: %s' % str(err))
2695 return
2696
732044af 2697 must_record_download_archive = False
56d868db 2698 if self.params.get('skip_download', False):
2699 info_dict['filepath'] = temp_filename
2700 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2701 info_dict['__files_to_move'] = files_to_move
2702 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2703 else:
2704 # Download
b868936c 2705 info_dict.setdefault('__postprocessors', [])
4340deca 2706 try:
0202b52a 2707
6b591b29 2708 def existing_file(*filepaths):
2709 ext = info_dict.get('ext')
2710 final_ext = self.params.get('final_ext', ext)
2711 existing_files = []
2712 for file in orderedSet(filepaths):
2713 if final_ext != ext:
2714 converted = replace_extension(file, final_ext, ext)
2715 if os.path.exists(encodeFilename(converted)):
2716 existing_files.append(converted)
2717 if os.path.exists(encodeFilename(file)):
2718 existing_files.append(file)
2719
2720 if not existing_files or self.params.get('overwrites', False):
2721 for file in orderedSet(existing_files):
2722 self.report_file_delete(file)
2723 os.remove(encodeFilename(file))
2724 return None
2725
6b591b29 2726 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2727 return existing_files[0]
0202b52a 2728
2729 success = True
4340deca 2730 if info_dict.get('requested_formats') is not None:
81cd954a
S
2731
2732 def compatible_formats(formats):
d03cfdce 2733 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2734 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2735 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2736 if len(video_formats) > 2 or len(audio_formats) > 2:
2737 return False
2738
81cd954a 2739 # Check extension
d03cfdce 2740 exts = set(format.get('ext') for format in formats)
2741 COMPATIBLE_EXTS = (
2742 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2743 set(('webm',)),
2744 )
2745 for ext_sets in COMPATIBLE_EXTS:
2746 if ext_sets.issuperset(exts):
2747 return True
81cd954a
S
2748 # TODO: Check acodec/vcodec
2749 return False
2750
2751 requested_formats = info_dict['requested_formats']
0202b52a 2752 old_ext = info_dict['ext']
3b297919 2753 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2754 info_dict['ext'] = 'mkv'
2755 self.report_warning(
2756 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2757 new_ext = info_dict['ext']
0202b52a 2758
124bc071 2759 def correct_ext(filename, ext=new_ext):
96fccc10 2760 if filename == '-':
2761 return filename
0202b52a 2762 filename_real_ext = os.path.splitext(filename)[1][1:]
2763 filename_wo_ext = (
2764 os.path.splitext(filename)[0]
124bc071 2765 if filename_real_ext in (old_ext, new_ext)
0202b52a 2766 else filename)
124bc071 2767 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2768
38c6902b 2769 # Ensure filename always has a correct extension for successful merge
0202b52a 2770 full_filename = correct_ext(full_filename)
2771 temp_filename = correct_ext(temp_filename)
2772 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2773 info_dict['__real_download'] = False
18e674b4 2774
2775 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2776 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2777 info_dict['protocol'] = _protocols.pop()
d5fe04f5 2778 directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
dbf5416a 2779 if dl_filename is not None:
6c7274ec 2780 self.report_file_already_downloaded(dl_filename)
96fccc10 2781 elif (directly_mergable and get_suitable_downloader(
a46a815b 2782 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2783 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2784 success, real_download = self.dl(temp_filename, info_dict)
2785 info_dict['__real_download'] = real_download
18e674b4 2786 else:
2787 downloaded = []
2788 merger = FFmpegMergerPP(self)
2789 if self.params.get('allow_unplayable_formats'):
2790 self.report_warning(
2791 'You have requested merging of multiple formats '
2792 'while also allowing unplayable formats to be downloaded. '
2793 'The formats won\'t be merged to prevent data corruption.')
2794 elif not merger.available:
2795 self.report_warning(
2796 'You have requested merging of multiple formats but ffmpeg is not installed. '
2797 'The formats won\'t be merged.')
2798
96fccc10 2799 if temp_filename == '-':
2800 reason = ('using a downloader other than ffmpeg' if directly_mergable
2801 else 'but the formats are incompatible for simultaneous download' if merger.available
2802 else 'but ffmpeg is not installed')
2803 self.report_warning(
2804 f'You have requested downloading multiple formats to stdout {reason}. '
2805 'The formats will be streamed one after the other')
2806 fname = temp_filename
dbf5416a 2807 for f in requested_formats:
2808 new_info = dict(info_dict)
2809 del new_info['requested_formats']
2810 new_info.update(f)
96fccc10 2811 if temp_filename != '-':
124bc071 2812 fname = prepend_extension(
2813 correct_ext(temp_filename, new_info['ext']),
2814 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2815 if not self._ensure_dir_exists(fname):
2816 return
a21e0ab1 2817 f['filepath'] = fname
96fccc10 2818 downloaded.append(fname)
dbf5416a 2819 partial_success, real_download = self.dl(fname, new_info)
2820 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2821 success = success and partial_success
2822 if merger.available and not self.params.get('allow_unplayable_formats'):
2823 info_dict['__postprocessors'].append(merger)
2824 info_dict['__files_to_merge'] = downloaded
2825 # Even if there were no downloads, it is being merged only now
2826 info_dict['__real_download'] = True
2827 else:
2828 for file in downloaded:
2829 files_to_move[file] = None
4340deca
P
2830 else:
2831 # Just a single file
0202b52a 2832 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2833 if dl_filename is None or dl_filename == temp_filename:
2834 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2835 # So we should try to resume the download
e8e73840 2836 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2837 info_dict['__real_download'] = real_download
6c7274ec 2838 else:
2839 self.report_file_already_downloaded(dl_filename)
0202b52a 2840
0202b52a 2841 dl_filename = dl_filename or temp_filename
c571435f 2842 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2843
3158150c 2844 except network_exceptions as err:
7960b056 2845 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2846 return
2847 except (OSError, IOError) as err:
2848 raise UnavailableVideoError(err)
2849 except (ContentTooShortError, ) as err:
2850 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2851 return
8222d8de 2852
de6000d9 2853 if success and full_filename != '-':
f17f8651 2854
fd7cfb64 2855 def fixup():
2856 do_fixup = True
2857 fixup_policy = self.params.get('fixup')
2858 vid = info_dict['id']
2859
2860 if fixup_policy in ('ignore', 'never'):
2861 return
2862 elif fixup_policy == 'warn':
2863 do_fixup = False
f89b3e2d 2864 elif fixup_policy != 'force':
2865 assert fixup_policy in ('detect_or_warn', None)
2866 if not info_dict.get('__real_download'):
2867 do_fixup = False
fd7cfb64 2868
2869 def ffmpeg_fixup(cndn, msg, cls):
2870 if not cndn:
2871 return
2872 if not do_fixup:
2873 self.report_warning(f'{vid}: {msg}')
2874 return
2875 pp = cls(self)
2876 if pp.available:
2877 info_dict['__postprocessors'].append(pp)
2878 else:
2879 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2880
2881 stretched_ratio = info_dict.get('stretched_ratio')
2882 ffmpeg_fixup(
2883 stretched_ratio not in (1, None),
2884 f'Non-uniform pixel ratio {stretched_ratio}',
2885 FFmpegFixupStretchedPP)
2886
2887 ffmpeg_fixup(
2888 (info_dict.get('requested_formats') is None
2889 and info_dict.get('container') == 'm4a_dash'
2890 and info_dict.get('ext') == 'm4a'),
2891 'writing DASH m4a. Only some players support this container',
2892 FFmpegFixupM4aPP)
2893
2894 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2895 if 'protocol' in info_dict else None)
2896 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2897 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2898 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2899
2900 fixup()
8222d8de 2901 try:
23c1a667 2902 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2903 except PostProcessingError as err:
2904 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2905 return
ab8e5e51
AM
2906 try:
2907 for ph in self._post_hooks:
23c1a667 2908 ph(info_dict['filepath'])
ab8e5e51
AM
2909 except Exception as err:
2910 self.report_error('post hooks: %s' % str(err))
2911 return
2d30509f 2912 must_record_download_archive = True
2913
2914 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2915 self.record_download_archive(info_dict)
c3e6ffba 2916 max_downloads = self.params.get('max_downloads')
2917 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2918 raise MaxDownloadsReached()
8222d8de
JMF
2919
2920 def download(self, url_list):
2921 """Download a given list of URLs."""
de6000d9 2922 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2923 if (len(url_list) > 1
2924 and outtmpl != '-'
2925 and '%' not in outtmpl
2926 and self.params.get('max_downloads') != 1):
acd69589 2927 raise SameFileError(outtmpl)
8222d8de
JMF
2928
2929 for url in url_list:
2930 try:
5f6a1245 2931 # It also downloads the videos
61aa5ba3
S
2932 res = self.extract_info(
2933 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2934 except UnavailableVideoError:
6febd1c1 2935 self.report_error('unable to download video')
8222d8de 2936 except MaxDownloadsReached:
8f18aca8 2937 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2938 raise
2939 except ExistingVideoReached:
8f18aca8 2940 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2941 raise
2942 except RejectedVideoReached:
8f18aca8 2943 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2944 raise
63e0be34
PH
2945 else:
2946 if self.params.get('dump_single_json', False):
277d6ff5 2947 self.post_extract(res)
6e84b215 2948 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2949
2950 return self._download_retcode
2951
1dcc4c0c 2952 def download_with_info_file(self, info_filename):
31bd3925
JMF
2953 with contextlib.closing(fileinput.FileInput(
2954 [info_filename], mode='r',
2955 openhook=fileinput.hook_encoded('utf-8'))) as f:
2956 # FileInput doesn't have a read method, we can't call json.load
8012d892 2957 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2958 try:
2959 self.process_ie_result(info, download=True)
d3f62c19 2960 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2961 webpage_url = info.get('webpage_url')
2962 if webpage_url is not None:
6febd1c1 2963 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2964 return self.download([webpage_url])
2965 else:
2966 raise
2967 return self._download_retcode
1dcc4c0c 2968
cb202fd2 2969 @staticmethod
8012d892 2970 def sanitize_info(info_dict, remove_private_keys=False):
2971 ''' Sanitize the infodict for converting to json '''
3ad56b42 2972 if info_dict is None:
2973 return info_dict
6e84b215 2974 info_dict.setdefault('epoch', int(time.time()))
2975 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2976 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2977 if remove_private_keys:
6e84b215 2978 remove_keys |= {
2979 'requested_formats', 'requested_subtitles', 'requested_entries',
2980 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2981 }
ae8f99e6 2982 empty_values = (None, {}, [], set(), tuple())
2983 reject = lambda k, v: k not in keep_keys and (
2984 k.startswith('_') or k in remove_keys or v in empty_values)
2985 else:
ae8f99e6 2986 reject = lambda k, v: k in remove_keys
5226731e 2987 filter_fn = lambda obj: (
b0249bca 2988 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2989 else obj if not isinstance(obj, dict)
ae8f99e6 2990 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2991 return filter_fn(info_dict)
cb202fd2 2992
8012d892 2993 @staticmethod
2994 def filter_requested_info(info_dict, actually_filter=True):
2995 ''' Alias of sanitize_info for backward compatibility '''
2996 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2997
dcf64d43 2998 def run_pp(self, pp, infodict):
5bfa4862 2999 files_to_delete = []
dcf64d43 3000 if '__files_to_move' not in infodict:
3001 infodict['__files_to_move'] = {}
b1940459 3002 try:
3003 files_to_delete, infodict = pp.run(infodict)
3004 except PostProcessingError as e:
3005 # Must be True and not 'only_download'
3006 if self.params.get('ignoreerrors') is True:
3007 self.report_error(e)
3008 return infodict
3009 raise
3010
5bfa4862 3011 if not files_to_delete:
dcf64d43 3012 return infodict
5bfa4862 3013 if self.params.get('keepvideo', False):
3014 for f in files_to_delete:
dcf64d43 3015 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3016 else:
3017 for old_filename in set(files_to_delete):
3018 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3019 try:
3020 os.remove(encodeFilename(old_filename))
3021 except (IOError, OSError):
3022 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3023 if old_filename in infodict['__files_to_move']:
3024 del infodict['__files_to_move'][old_filename]
3025 return infodict
5bfa4862 3026
277d6ff5 3027 @staticmethod
3028 def post_extract(info_dict):
3029 def actual_post_extract(info_dict):
3030 if info_dict.get('_type') in ('playlist', 'multi_video'):
3031 for video_dict in info_dict.get('entries', {}):
b050d210 3032 actual_post_extract(video_dict or {})
277d6ff5 3033 return
3034
07cce701 3035 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3036 extra = post_extractor().items()
3037 info_dict.update(extra)
07cce701 3038 info_dict.pop('__post_extractor', None)
277d6ff5 3039
4ec82a72 3040 original_infodict = info_dict.get('__original_infodict') or {}
3041 original_infodict.update(extra)
3042 original_infodict.pop('__post_extractor', None)
3043
b050d210 3044 actual_post_extract(info_dict or {})
277d6ff5 3045
56d868db 3046 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3047 info = dict(ie_info)
56d868db 3048 info['__files_to_move'] = files_to_move or {}
3049 for pp in self._pps[key]:
dcf64d43 3050 info = self.run_pp(pp, info)
56d868db 3051 return info, info.pop('__files_to_move', None)
5bfa4862 3052
dcf64d43 3053 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3054 """Run all the postprocessors on the given file."""
3055 info = dict(ie_info)
3056 info['filepath'] = filename
dcf64d43 3057 info['__files_to_move'] = files_to_move or {}
0202b52a 3058
56d868db 3059 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3060 info = self.run_pp(pp, info)
3061 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3062 del info['__files_to_move']
56d868db 3063 for pp in self._pps['after_move']:
dcf64d43 3064 info = self.run_pp(pp, info)
23c1a667 3065 return info
c1c9a79c 3066
5db07df6 3067 def _make_archive_id(self, info_dict):
e9fef7ee
S
3068 video_id = info_dict.get('id')
3069 if not video_id:
3070 return
5db07df6
PH
3071 # Future-proof against any change in case
3072 # and backwards compatibility with prior versions
e9fef7ee 3073 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3074 if extractor is None:
1211bb6d
S
3075 url = str_or_none(info_dict.get('url'))
3076 if not url:
3077 return
e9fef7ee 3078 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3079 for ie_key, ie in self._ies.items():
1211bb6d 3080 if ie.suitable(url):
8b7491c8 3081 extractor = ie_key
e9fef7ee
S
3082 break
3083 else:
3084 return
d0757229 3085 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3086
3087 def in_download_archive(self, info_dict):
3088 fn = self.params.get('download_archive')
3089 if fn is None:
3090 return False
3091
3092 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3093 if not vid_id:
7012b23c 3094 return False # Incomplete video information
5db07df6 3095
a45e8619 3096 return vid_id in self.archive
c1c9a79c
PH
3097
3098 def record_download_archive(self, info_dict):
3099 fn = self.params.get('download_archive')
3100 if fn is None:
3101 return
5db07df6
PH
3102 vid_id = self._make_archive_id(info_dict)
3103 assert vid_id
c1c9a79c 3104 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3105 archive_file.write(vid_id + '\n')
a45e8619 3106 self.archive.add(vid_id)
dd82ffea 3107
8c51aa65 3108 @staticmethod
8abeeb94 3109 def format_resolution(format, default='unknown'):
fb04e403 3110 if format.get('vcodec') == 'none':
8326b00a 3111 if format.get('acodec') == 'none':
3112 return 'images'
fb04e403 3113 return 'audio only'
f49d89ee
PH
3114 if format.get('resolution') is not None:
3115 return format['resolution']
35615307
DA
3116 if format.get('width') and format.get('height'):
3117 res = '%dx%d' % (format['width'], format['height'])
3118 elif format.get('height'):
3119 res = '%sp' % format['height']
3120 elif format.get('width'):
388ae76b 3121 res = '%dx?' % format['width']
8c51aa65 3122 else:
8abeeb94 3123 res = default
8c51aa65
JMF
3124 return res
3125
c57f7757
PH
3126 def _format_note(self, fdict):
3127 res = ''
3128 if fdict.get('ext') in ['f4f', 'f4m']:
3129 res += '(unsupported) '
32f90364
PH
3130 if fdict.get('language'):
3131 if res:
3132 res += ' '
9016d76f 3133 res += '[%s] ' % fdict['language']
c57f7757
PH
3134 if fdict.get('format_note') is not None:
3135 res += fdict['format_note'] + ' '
3136 if fdict.get('tbr') is not None:
3137 res += '%4dk ' % fdict['tbr']
3138 if fdict.get('container') is not None:
3139 if res:
3140 res += ', '
3141 res += '%s container' % fdict['container']
3089bc74
S
3142 if (fdict.get('vcodec') is not None
3143 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3144 if res:
3145 res += ', '
3146 res += fdict['vcodec']
91c7271a 3147 if fdict.get('vbr') is not None:
c57f7757
PH
3148 res += '@'
3149 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3150 res += 'video@'
3151 if fdict.get('vbr') is not None:
3152 res += '%4dk' % fdict['vbr']
fbb21cf5 3153 if fdict.get('fps') is not None:
5d583bdf
S
3154 if res:
3155 res += ', '
3156 res += '%sfps' % fdict['fps']
c57f7757
PH
3157 if fdict.get('acodec') is not None:
3158 if res:
3159 res += ', '
3160 if fdict['acodec'] == 'none':
3161 res += 'video only'
3162 else:
3163 res += '%-5s' % fdict['acodec']
3164 elif fdict.get('abr') is not None:
3165 if res:
3166 res += ', '
3167 res += 'audio'
3168 if fdict.get('abr') is not None:
3169 res += '@%3dk' % fdict['abr']
3170 if fdict.get('asr') is not None:
3171 res += ' (%5dHz)' % fdict['asr']
3172 if fdict.get('filesize') is not None:
3173 if res:
3174 res += ', '
3175 res += format_bytes(fdict['filesize'])
9732d77e
PH
3176 elif fdict.get('filesize_approx') is not None:
3177 if res:
3178 res += ', '
3179 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3180 return res
91c7271a 3181
c57f7757 3182 def list_formats(self, info_dict):
94badb25 3183 formats = info_dict.get('formats', [info_dict])
53ed7066 3184 new_format = (
3185 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3186 and self.params.get('listformats_table', True) is not False)
76d321f6 3187 if new_format:
3188 table = [
3189 [
3190 format_field(f, 'format_id'),
3191 format_field(f, 'ext'),
3192 self.format_resolution(f),
3193 format_field(f, 'fps', '%d'),
3194 '|',
3195 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3196 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3197 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3198 '|',
3199 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3200 format_field(f, 'vbr', '%4dk'),
3201 format_field(f, 'acodec', default='unknown').replace('none', ''),
3202 format_field(f, 'abr', '%3dk'),
3203 format_field(f, 'asr', '%5dHz'),
3f698246 3204 ', '.join(filter(None, (
3205 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3206 format_field(f, 'language', '[%s]'),
3207 format_field(f, 'format_note'),
3208 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3209 ))),
3f698246 3210 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3211 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3212 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3213 else:
3214 table = [
3215 [
3216 format_field(f, 'format_id'),
3217 format_field(f, 'ext'),
3218 self.format_resolution(f),
3219 self._format_note(f)]
3220 for f in formats
3221 if f.get('preference') is None or f['preference'] >= -1000]
3222 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3223
cfb56d1a 3224 self.to_screen(
169dbde9 3225 '[info] Available formats for %s:' % info_dict['id'])
3226 self.to_stdout(render_table(
bc97cdae 3227 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3228
3229 def list_thumbnails(self, info_dict):
b0249bca 3230 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3231 if not thumbnails:
b7b72db9 3232 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3233 return
cfb56d1a
PH
3234
3235 self.to_screen(
3236 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3237 self.to_stdout(render_table(
cfb56d1a
PH
3238 ['ID', 'width', 'height', 'URL'],
3239 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3240
360e1ca5 3241 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3242 if not subtitles:
360e1ca5 3243 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3244 return
a504ced0 3245 self.to_screen(
edab9dbf 3246 'Available %s for %s:' % (name, video_id))
2412044c 3247
3248 def _row(lang, formats):
49c258e1 3249 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3250 if len(set(names)) == 1:
7aee40c1 3251 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3252 return [lang, ', '.join(names), ', '.join(exts)]
3253
169dbde9 3254 self.to_stdout(render_table(
2412044c 3255 ['Language', 'Name', 'Formats'],
3256 [_row(lang, formats) for lang, formats in subtitles.items()],
3257 hideEmpty=True))
a504ced0 3258
dca08720
PH
3259 def urlopen(self, req):
3260 """ Start an HTTP download """
82d8a8b6 3261 if isinstance(req, compat_basestring):
67dda517 3262 req = sanitized_Request(req)
19a41fc6 3263 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3264
3265 def print_debug_header(self):
3266 if not self.params.get('verbose'):
3267 return
62fec3b2 3268
c6afed48
PH
3269 stdout_encoding = getattr(
3270 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3271 encoding_str = (
734f90bb
PH
3272 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3273 locale.getpreferredencoding(),
3274 sys.getfilesystemencoding(),
c6afed48 3275 stdout_encoding,
b0472057 3276 self.get_encoding()))
4192b51c 3277 write_string(encoding_str, encoding=None)
734f90bb 3278
4c88ff87 3279 source = detect_variant()
3280 self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
e0986e31 3281 if _LAZY_LOADER:
f74980cb 3282 self._write_string('[debug] Lazy loading extractors enabled\n')
3283 if _PLUGIN_CLASSES:
3284 self._write_string(
3285 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3286 if self.params.get('compat_opts'):
3287 self._write_string(
3288 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3289 try:
3290 sp = subprocess.Popen(
3291 ['git', 'rev-parse', '--short', 'HEAD'],
3292 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3293 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3294 out, err = process_communicate_or_kill(sp)
dca08720
PH
3295 out = out.decode().strip()
3296 if re.match('[0-9a-f]+', out):
f74980cb 3297 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3298 except Exception:
dca08720
PH
3299 try:
3300 sys.exc_clear()
70a1165b 3301 except Exception:
dca08720 3302 pass
b300cda4
S
3303
3304 def python_implementation():
3305 impl_name = platform.python_implementation()
3306 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3307 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3308 return impl_name
3309
e5813e53 3310 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3311 platform.python_version(),
3312 python_implementation(),
3313 platform.architecture()[0],
b300cda4 3314 platform_name()))
d28b5171 3315
73fac4e9 3316 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3317 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3318 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3319 exe_str = ', '.join(
2831b468 3320 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3321 ) or 'none'
d28b5171 3322 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3323
2831b468 3324 from .downloader.websocket import has_websockets
3325 from .postprocessor.embedthumbnail import has_mutagen
3326 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3327
ad3dc496 3328 lib_str = ', '.join(sorted(filter(None, (
edf65256 3329 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3330 has_websockets and 'websockets',
3331 has_mutagen and 'mutagen',
3332 SQLITE_AVAILABLE and 'sqlite',
3333 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3334 )))) or 'none'
2831b468 3335 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3336
dca08720
PH
3337 proxy_map = {}
3338 for handler in self._opener.handlers:
3339 if hasattr(handler, 'proxies'):
3340 proxy_map.update(handler.proxies)
734f90bb 3341 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3342
58b1f00d
PH
3343 if self.params.get('call_home', False):
3344 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3345 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3346 return
58b1f00d
PH
3347 latest_version = self.urlopen(
3348 'https://yt-dl.org/latest/version').read().decode('utf-8')
3349 if version_tuple(latest_version) > version_tuple(__version__):
3350 self.report_warning(
3351 'You are using an outdated version (newest version: %s)! '
3352 'See https://yt-dl.org/update if you need help updating.' %
3353 latest_version)
3354
e344693b 3355 def _setup_opener(self):
6ad14cab 3356 timeout_val = self.params.get('socket_timeout')
19a41fc6 3357 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3358
982ee69a 3359 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3360 opts_cookiefile = self.params.get('cookiefile')
3361 opts_proxy = self.params.get('proxy')
3362
982ee69a 3363 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3364
6a3f4c3f 3365 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3366 if opts_proxy is not None:
3367 if opts_proxy == '':
3368 proxies = {}
3369 else:
3370 proxies = {'http': opts_proxy, 'https': opts_proxy}
3371 else:
3372 proxies = compat_urllib_request.getproxies()
067aa17e 3373 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3374 if 'http' in proxies and 'https' not in proxies:
3375 proxies['https'] = proxies['http']
91410c9b 3376 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3377
3378 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3379 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3380 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3381 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3382 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3383
3384 # When passing our own FileHandler instance, build_opener won't add the
3385 # default FileHandler and allows us to disable the file protocol, which
3386 # can be used for malicious purposes (see
067aa17e 3387 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3388 file_handler = compat_urllib_request.FileHandler()
3389
3390 def file_open(*args, **kwargs):
7a5c1cfe 3391 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3392 file_handler.file_open = file_open
3393
3394 opener = compat_urllib_request.build_opener(
fca6dba8 3395 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3396
dca08720
PH
3397 # Delete the default user-agent header, which would otherwise apply in
3398 # cases where our custom HTTP handler doesn't come into play
067aa17e 3399 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3400 opener.addheaders = []
3401 self._opener = opener
62fec3b2
PH
3402
3403 def encode(self, s):
3404 if isinstance(s, bytes):
3405 return s # Already encoded
3406
3407 try:
3408 return s.encode(self.get_encoding())
3409 except UnicodeEncodeError as err:
3410 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3411 raise
3412
3413 def get_encoding(self):
3414 encoding = self.params.get('encoding')
3415 if encoding is None:
3416 encoding = preferredencoding()
3417 return encoding
ec82d85a 3418
de6000d9 3419 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3420 write_all = self.params.get('write_all_thumbnails', False)
3421 thumbnails = []
3422 if write_all or self.params.get('writethumbnail', False):
0202b52a 3423 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3424 multiple = write_all and len(thumbnails) > 1
ec82d85a 3425
0202b52a 3426 ret = []
981052c9 3427 for t in thumbnails[::-1]:
ec82d85a 3428 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3429 suffix = '%s.' % t['id'] if multiple else ''
3430 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3431 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3432
0c3d0f51 3433 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3434 ret.append(suffix + thumb_ext)
8ba87148 3435 t['filepath'] = thumb_filename
ec82d85a
PH
3436 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3437 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3438 else:
5ef7d9bd 3439 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3440 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3441 try:
3442 uf = self.urlopen(t['url'])
d3d89c32 3443 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3444 shutil.copyfileobj(uf, thumbf)
de6000d9 3445 ret.append(suffix + thumb_ext)
ec82d85a
PH
3446 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3447 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3448 t['filepath'] = thumb_filename
3158150c 3449 except network_exceptions as err:
ec82d85a 3450 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3451 (t['url'], error_to_compat_str(err)))
6c4fd172 3452 if ret and not write_all:
3453 break
0202b52a 3454 return ret