]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Add option `--no-simulate` to not simulate even when `--print` or `--list...` are...
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
7d1eb38a 38 compat_shlex_quote,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b 44)
982ee69a 45from .cookies import load_cookies
8c25f81b 46from .utils import (
eedb7ba5
S
47 age_restricted,
48 args_to_str,
ce02ed60
PH
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
acd69589 52 DEFAULT_OUTTMPL,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
498f5606 61 EntryNotInPlaylist,
a06916d9 62 error_to_compat_str,
8b0d7497 63 ExistingVideoReached,
590bc6f6 64 expand_path,
ce02ed60 65 ExtractorError,
e29663c6 66 float_or_none,
02dbf93f 67 format_bytes,
76d321f6 68 format_field,
901130bb 69 STR_FORMAT_RE_TMPL,
70 STR_FORMAT_TYPES,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
b0249bca 73 HEADRequest,
c9969434 74 int_or_none,
732044af 75 iri_to_uri,
773f291d 76 ISO3166Utils,
56a8fb4f 77 LazyList,
ce02ed60 78 locked_file,
0202b52a 79 make_dir,
dca08720 80 make_HTTPS_handler,
ce02ed60 81 MaxDownloadsReached,
3158150c 82 network_exceptions,
cd6fc19e 83 orderedSet,
a06916d9 84 OUTTMPL_TYPES,
b7ab0590 85 PagedList,
083c9df9 86 parse_filesize,
91410c9b 87 PerRequestProxyHandler,
dca08720 88 platform_name,
eedb7ba5 89 PostProcessingError,
ce02ed60 90 preferredencoding,
eedb7ba5 91 prepend_extension,
a06916d9 92 process_communicate_or_kill,
51fb4995 93 register_socks_protocols,
a06916d9 94 RejectedVideoReached,
cfb56d1a 95 render_table,
eedb7ba5 96 replace_extension,
ce02ed60
PH
97 SameFileError,
98 sanitize_filename,
1bb5c511 99 sanitize_path,
dcf77cf1 100 sanitize_url,
67dda517 101 sanitized_Request,
e5660ee6 102 std_headers,
1211bb6d 103 str_or_none,
e29663c6 104 strftime_or_none,
ce02ed60 105 subtitles_filename,
51d9739f 106 ThrottledDownload,
732044af 107 to_high_limit_path,
324ad820 108 traverse_obj,
6033d980 109 try_get,
ce02ed60 110 UnavailableVideoError,
29eb5174 111 url_basename,
7d1eb38a 112 variadic,
58b1f00d 113 version_tuple,
ce02ed60
PH
114 write_json_file,
115 write_string,
6a3f4c3f 116 YoutubeDLCookieProcessor,
dca08720 117 YoutubeDLHandler,
fca6dba8 118 YoutubeDLRedirectHandler,
ce02ed60 119)
a0e07d31 120from .cache import Cache
52a8a1e1 121from .extractor import (
122 gen_extractor_classes,
123 get_info_extractor,
124 _LAZY_LOADER,
125 _PLUGIN_CLASSES
126)
4c54b89e 127from .extractor.openload import PhantomJSwrapper
52a8a1e1 128from .downloader import (
dbf5416a 129 FFmpegFD,
52a8a1e1 130 get_suitable_downloader,
131 shorten_protocol_name
132)
4c83c967 133from .downloader.rtmp import rtmpdump_version
4f026faf 134from .postprocessor import (
e36d50c5 135 get_postprocessor,
136 FFmpegFixupDurationPP,
f17f8651 137 FFmpegFixupM3u8PP,
62cd676c 138 FFmpegFixupM4aPP,
6271f1ca 139 FFmpegFixupStretchedPP,
e36d50c5 140 FFmpegFixupTimestampPP,
4f026faf
PH
141 FFmpegMergerPP,
142 FFmpegPostProcessor,
0202b52a 143 MoveFilesAfterDownloadPP,
4f026faf 144)
dca08720 145from .version import __version__
8222d8de 146
e9c0cdd3
YCH
147if compat_os_name == 'nt':
148 import ctypes
149
2459b6e1 150
8222d8de
JMF
151class YoutubeDL(object):
152 """YoutubeDL class.
153
154 YoutubeDL objects are the ones responsible of downloading the
155 actual video file and writing it to disk if the user has requested
156 it, among some other tasks. In most cases there should be one per
157 program. As, given a video URL, the downloader doesn't know how to
158 extract all the needed information, task that InfoExtractors do, it
159 has to pass the URL to one of them.
160
161 For this, YoutubeDL objects have a method that allows
162 InfoExtractors to be registered in a given order. When it is passed
163 a URL, the YoutubeDL object handles it to the first InfoExtractor it
164 finds that reports being able to handle it. The InfoExtractor extracts
165 all the information about the video or videos the URL refers to, and
166 YoutubeDL process the extracted information, possibly using a File
167 Downloader to download the video.
168
169 YoutubeDL objects accept a lot of parameters. In order not to saturate
170 the object constructor with arguments, it receives a dictionary of
171 options instead. These options are available through the params
172 attribute for the InfoExtractors to use. The YoutubeDL also
173 registers itself as the downloader in charge for the InfoExtractors
174 that are added to it, so this is a "mutual registration".
175
176 Available options:
177
178 username: Username for authentication purposes.
179 password: Password for authentication purposes.
180940e0 180 videopassword: Password for accessing a video.
1da50aa3
S
181 ap_mso: Adobe Pass multiple-system operator identifier.
182 ap_username: Multiple-system operator account username.
183 ap_password: Multiple-system operator account password.
8222d8de
JMF
184 usenetrc: Use netrc for authentication instead.
185 verbose: Print additional info to stdout.
186 quiet: Do not print messages to stdout.
ad8915b7 187 no_warnings: Do not print out anything for warnings.
53c18592 188 forceprint: A list of templates to force print
189 forceurl: Force printing final URL. (Deprecated)
190 forcetitle: Force printing title. (Deprecated)
191 forceid: Force printing ID. (Deprecated)
192 forcethumbnail: Force printing thumbnail URL. (Deprecated)
193 forcedescription: Force printing description. (Deprecated)
194 forcefilename: Force printing final filename. (Deprecated)
195 forceduration: Force printing duration. (Deprecated)
8694c600 196 forcejson: Force printing info_dict as JSON.
63e0be34
PH
197 dump_single_json: Force printing the info_dict of the whole playlist
198 (or video) as a single JSON line.
c25228e5 199 force_write_download_archive: Force writing download archive regardless
200 of 'skip_download' or 'simulate'.
b7b04c78 201 simulate: Do not download the video files. If unset (or None),
202 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 203 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
c25228e5 208 format_sort: How to sort the video formats. see "Sorting Formats"
209 for more details.
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
211 for more details.
212 allow_multiple_video_streams: Allow multiple video streams to be merged
213 into a single file
214 allow_multiple_audio_streams: Allow multiple audio streams to be merged
215 into a single file
0ba692ac 216 check_formats Whether to test if the formats are downloadable.
217 Can be True (check all), False (check none)
218 or None (check only if requested by extractor)
4524baf0 219 paths: Dictionary of output paths. The allowed keys are 'home'
220 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 221 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 222 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
223 A string a also accepted for backward compatibility
a820dc72
RA
224 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
225 restrictfilenames: Do not allow "&" and spaces in file names
226 trim_file_name: Limit length of filename (extension excluded)
4524baf0 227 windowsfilenames: Force the filenames to be windows compatible
a820dc72 228 ignoreerrors: Do not stop on download errors
7a5c1cfe 229 (Default True when running yt-dlp,
a820dc72 230 but False when directly accessing YoutubeDL class)
26e2805c 231 skip_playlist_after_errors: Number of allowed failures until the rest of
232 the playlist is skipped
d22dec74 233 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 234 overwrites: Overwrite all video and metadata files if True,
235 overwrite only non-video files if None
236 and don't overwrite any file if False
8222d8de
JMF
237 playliststart: Playlist item to start at.
238 playlistend: Playlist item to end at.
c14e88f0 239 playlist_items: Specific indices of playlist to download.
ff815fe6 240 playlistreverse: Download playlist items in reverse order.
75822ca7 241 playlistrandom: Download playlist items in random order.
8222d8de
JMF
242 matchtitle: Download only matching titles.
243 rejecttitle: Reject downloads for matching titles.
8bf9319e 244 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
245 logtostderr: Log messages to stderr instead of stdout.
246 writedescription: Write the video description to a .description file
247 writeinfojson: Write the video description to a .info.json file
75d43ca0 248 clean_infojson: Remove private fields from the infojson
06167fbb 249 writecomments: Extract video comments. This will not be written to disk
250 unless writeinfojson is also given
1fb07d10 251 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 252 writethumbnail: Write the thumbnail image to a file
c25228e5 253 allow_playlist_files: Whether to write playlists' description, infojson etc
254 also to disk when using the 'write*' options
ec82d85a 255 write_all_thumbnails: Write all thumbnail formats to files
732044af 256 writelink: Write an internet shortcut file, depending on the
257 current platform (.url/.webloc/.desktop)
258 writeurllink: Write a Windows internet shortcut file (.url)
259 writewebloclink: Write a macOS internet shortcut file (.webloc)
260 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 261 writesubtitles: Write the video subtitles to a file
741dd8ea 262 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 263 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 264 Downloads all the subtitles of the video
0b7f3118 265 (requires writesubtitles or writeautomaticsub)
8222d8de 266 listsubtitles: Lists all available subtitles for the video
a504ced0 267 subtitlesformat: The format code for subtitles
c32b0aab 268 subtitleslangs: List of languages of the subtitles to download (can be regex).
269 The list may contain "all" to refer to all the available
270 subtitles. The language can be prefixed with a "-" to
271 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
272 keepvideo: Keep the video file after post-processing
273 daterange: A DateRange object, download only if the upload_date is in the range.
274 skip_download: Skip the actual download of the video file
c35f9e72 275 cachedir: Location of the cache files in the filesystem.
a0e07d31 276 False to disable filesystem cache.
47192f92 277 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
278 age_limit: An integer representing the user's age in years.
279 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
280 min_views: An integer representing the minimum view count the video
281 must have in order to not be skipped.
282 Videos without view count information are always
283 downloaded. None for no limit.
284 max_views: An integer representing the maximum view count.
285 Videos that are more popular than that are not
286 downloaded.
287 Videos without view count information are always
288 downloaded. None for no limit.
289 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
290 Videos already present in the file are not downloaded
291 again.
8a51f564 292 break_on_existing: Stop the download process after attempting to download a
293 file that is in the archive.
294 break_on_reject: Stop the download process when encountering a video that
295 has been filtered out.
296 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
297 cookiesfrombrowser: A tuple containing the name of the browser and the profile
298 name/path from where cookies are loaded.
299 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 300 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
301 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
302 At the moment, this is only supported by YouTube.
a1ee09e8 303 proxy: URL of the proxy server to use
38cce791 304 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 305 on geo-restricted sites.
e344693b 306 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
307 bidi_workaround: Work around buggy terminals without bidirectional text
308 support, using fridibi
a0ddb8a2 309 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 310 include_ads: Download ads as well
04b4d394
PH
311 default_search: Prepend this string if an input url is not valid.
312 'auto' for elaborate guessing
62fec3b2 313 encoding: Use this encoding instead of the system-specified.
e8ee972c 314 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
315 Pass in 'in_playlist' to only show this behavior for
316 playlist items.
4f026faf 317 postprocessors: A list of dictionaries, each with an entry
71b640cc 318 * key: The name of the postprocessor. See
7a5c1cfe 319 yt_dlp/postprocessor/__init__.py for a list.
56d868db 320 * when: When to run the postprocessor. Can be one of
321 pre_process|before_dl|post_process|after_move.
322 Assumed to be 'post_process' if not given
ab8e5e51
AM
323 post_hooks: A list of functions that get called as the final step
324 for each video file, after all postprocessors have been
325 called. The filename will be passed as the only argument.
71b640cc
PH
326 progress_hooks: A list of functions that get called on download
327 progress, with a dictionary with the entries
5cda4eda 328 * status: One of "downloading", "error", or "finished".
ee69b99a 329 Check this first and ignore unknown values.
3ba7740d 330 * info_dict: The extracted info_dict
71b640cc 331
5cda4eda 332 If status is one of "downloading", or "finished", the
ee69b99a
PH
333 following properties may also be present:
334 * filename: The final filename (always present)
5cda4eda 335 * tmpfilename: The filename we're currently writing to
71b640cc
PH
336 * downloaded_bytes: Bytes on disk
337 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
338 * total_bytes_estimate: Guess of the eventual file size,
339 None if unavailable.
340 * elapsed: The number of seconds since download started.
71b640cc
PH
341 * eta: The estimated time in seconds, None if unknown
342 * speed: The download speed in bytes/second, None if
343 unknown
5cda4eda
PH
344 * fragment_index: The counter of the currently
345 downloaded video fragment.
346 * fragment_count: The number of fragments (= individual
347 files that will be merged)
71b640cc
PH
348
349 Progress hooks are guaranteed to be called at least once
350 (with status "finished") if the download is successful.
45598f15 351 merge_output_format: Extension to use when merging formats.
6b591b29 352 final_ext: Expected final extension; used to detect when the file was
353 already downloaded and converted. "merge_output_format" is
354 replaced by this extension when given
6271f1ca
PH
355 fixup: Automatically correct known faults of the file.
356 One of:
357 - "never": do nothing
358 - "warn": only emit a warning
359 - "detect_or_warn": check whether we can do anything
62cd676c 360 about it, warn otherwise (default)
504f20dd 361 source_address: Client-side IP address to bind to.
6ec6cb4e 362 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 363 yt-dlp servers for debugging. (BROKEN)
1cf376f5 364 sleep_interval_requests: Number of seconds to sleep between requests
365 during extraction
7aa589a5
S
366 sleep_interval: Number of seconds to sleep before each download when
367 used alone or a lower bound of a range for randomized
368 sleep before each download (minimum possible number
369 of seconds to sleep) when used along with
370 max_sleep_interval.
371 max_sleep_interval:Upper bound of a range for randomized sleep before each
372 download (maximum possible number of seconds to sleep).
373 Must only be used along with sleep_interval.
374 Actual sleep time will be a random float from range
375 [sleep_interval; max_sleep_interval].
1cf376f5 376 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
377 listformats: Print an overview of available video formats and exit.
378 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
379 match_filter: A function that gets called with the info_dict of
380 every video.
381 If it returns a message, the video is ignored.
382 If it returns None, the video is downloaded.
383 match_filter_func in utils.py is one example for this.
7e5db8c9 384 no_color: Do not emit color codes in output.
0a840f58 385 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 386 HTTP header
0a840f58 387 geo_bypass_country:
773f291d
S
388 Two-letter ISO 3166-2 country code that will be used for
389 explicit geographic restriction bypassing via faking
504f20dd 390 X-Forwarded-For HTTP header
5f95927a
S
391 geo_bypass_ip_block:
392 IP range in CIDR notation that will be used similarly to
504f20dd 393 geo_bypass_country
71b640cc 394
85729c51 395 The following options determine which downloader is picked:
52a8a1e1 396 external_downloader: A dictionary of protocol keys and the executable of the
397 external downloader to use for it. The allowed protocols
398 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
399 Set the value to 'native' to use the native downloader
400 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
401 or {'m3u8': 'ffmpeg'} instead.
402 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
403 if True, otherwise use ffmpeg/avconv if False, otherwise
404 use downloader suggested by extractor if None.
53ed7066 405 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 406 The following options do not work when used through the API:
407 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 408 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 409 Refer __init__.py for their implementation
fe7e0c98 410
8222d8de 411 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 412 the downloader (see yt_dlp/downloader/common.py):
51d9739f 413 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
414 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
415 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
416
417 The following options are used by the post processors:
d4a24f40 418 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 419 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
420 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
421 to the binary or its containing directory.
43820c03 422 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
423 and a list of additional command-line arguments for the
424 postprocessor/executable. The dict can also have "PP+EXE" keys
425 which are used when the given exe is used by the given PP.
426 Use 'default' as the name for arguments to passed to all PP
e409895f 427
428 The following options are used by the extractors:
62bff2c1 429 extractor_retries: Number of times to retry for known errors
430 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 431 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 432 discontinuities such as ad breaks (default: False)
5d3a0e79 433 extractor_args: A dictionary of arguments to be passed to the extractors.
434 See "EXTRACTOR ARGUMENTS" for details.
435 Eg: {'youtube': {'skip': ['dash', 'hls']}}
436 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
437 If True (default), DASH manifests and related
62bff2c1 438 data will be downloaded and processed by extractor.
439 You can reduce network I/O by disabling it if you don't
440 care about DASH. (only for youtube)
5d3a0e79 441 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
442 If True (default), HLS manifests and related
62bff2c1 443 data will be downloaded and processed by extractor.
444 You can reduce network I/O by disabling it if you don't
445 care about HLS. (only for youtube)
8222d8de
JMF
446 """
447
c9969434
S
448 _NUMERIC_FIELDS = set((
449 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
450 'timestamp', 'upload_year', 'upload_month', 'upload_day',
451 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
452 'average_rating', 'comment_count', 'age_limit',
453 'start_time', 'end_time',
454 'chapter_number', 'season_number', 'episode_number',
455 'track_number', 'disc_number', 'release_year',
456 'playlist_index',
457 ))
458
8222d8de
JMF
459 params = None
460 _ies = []
56d868db 461 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 462 _printed_messages = set()
1cf376f5 463 _first_webpage_request = True
8222d8de
JMF
464 _download_retcode = None
465 _num_downloads = None
30a074c2 466 _playlist_level = 0
467 _playlist_urls = set()
8222d8de
JMF
468 _screen_file = None
469
3511266b 470 def __init__(self, params=None, auto_init=True):
8222d8de 471 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
472 if params is None:
473 params = {}
8222d8de 474 self._ies = []
56c73665 475 self._ies_instances = {}
56d868db 476 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 477 self._printed_messages = set()
1cf376f5 478 self._first_webpage_request = True
ab8e5e51 479 self._post_hooks = []
933605d7 480 self._progress_hooks = []
8222d8de
JMF
481 self._download_retcode = 0
482 self._num_downloads = 0
483 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 484 self._err_file = sys.stderr
4abf617b
S
485 self.params = {
486 # Default parameters
487 'nocheckcertificate': False,
488 }
489 self.params.update(params)
a0e07d31 490 self.cache = Cache(self)
34308b30 491
a61f4b28 492 if sys.version_info < (3, 6):
493 self.report_warning(
0181adef 494 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 495
be5df5ee
S
496 def check_deprecated(param, option, suggestion):
497 if self.params.get(param) is not None:
53ed7066 498 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
499 return True
500 return False
501
502 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
503 if self.params.get('geo_verification_proxy') is None:
504 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
505
0d1bb027 506 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
507 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 508 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 509
510 for msg in self.params.get('warnings', []):
511 self.report_warning(msg)
512
6b591b29 513 if self.params.get('final_ext'):
514 if self.params.get('merge_output_format'):
515 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
516 self.params['merge_output_format'] = self.params['final_ext']
517
b9d973be 518 if 'overwrites' in self.params and self.params['overwrites'] is None:
519 del self.params['overwrites']
520
0783b09b 521 if params.get('bidi_workaround', False):
1c088fa8
PH
522 try:
523 import pty
524 master, slave = pty.openpty()
003c69a8 525 width = compat_get_terminal_size().columns
1c088fa8
PH
526 if width is None:
527 width_args = []
528 else:
529 width_args = ['-w', str(width)]
5d681e96 530 sp_kwargs = dict(
1c088fa8
PH
531 stdin=subprocess.PIPE,
532 stdout=slave,
533 stderr=self._err_file)
5d681e96
PH
534 try:
535 self._output_process = subprocess.Popen(
536 ['bidiv'] + width_args, **sp_kwargs
537 )
538 except OSError:
5d681e96
PH
539 self._output_process = subprocess.Popen(
540 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
541 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 542 except OSError as ose:
66e7ace1 543 if ose.errno == errno.ENOENT:
6febd1c1 544 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
545 else:
546 raise
0783b09b 547
3089bc74
S
548 if (sys.platform != 'win32'
549 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
550 and not params.get('restrictfilenames', False)):
e9137224 551 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 552 self.report_warning(
6febd1c1 553 'Assuming --restrict-filenames since file system encoding '
1b725173 554 'cannot encode all characters. '
6febd1c1 555 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 556 self.params['restrictfilenames'] = True
34308b30 557
de6000d9 558 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 559
187986a8 560 # Creating format selector here allows us to catch syntax errors before the extraction
561 self.format_selector = (
562 None if self.params.get('format') is None
563 else self.build_format_selector(self.params['format']))
564
dca08720
PH
565 self._setup_opener()
566
4cd0a709 567 """Preload the archive, if any is specified"""
568 def preload_download_archive(fn):
569 if fn is None:
570 return False
0760b0a7 571 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 572 try:
573 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
574 for line in archive_file:
575 self.archive.add(line.strip())
576 except IOError as ioe:
577 if ioe.errno != errno.ENOENT:
578 raise
579 return False
580 return True
581
582 self.archive = set()
583 preload_download_archive(self.params.get('download_archive'))
584
3511266b
PH
585 if auto_init:
586 self.print_debug_header()
587 self.add_default_info_extractors()
588
4f026faf 589 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 590 pp_def = dict(pp_def_raw)
fd7cfb64 591 when = pp_def.pop('when', 'post_process')
592 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 593 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 594 self.add_post_processor(pp, when=when)
4f026faf 595
ab8e5e51
AM
596 for ph in self.params.get('post_hooks', []):
597 self.add_post_hook(ph)
598
71b640cc
PH
599 for ph in self.params.get('progress_hooks', []):
600 self.add_progress_hook(ph)
601
51fb4995
YCH
602 register_socks_protocols()
603
7d4111ed
PH
604 def warn_if_short_id(self, argv):
605 # short YouTube ID starting with dash?
606 idxs = [
607 i for i, a in enumerate(argv)
608 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
609 if idxs:
610 correct_argv = (
7a5c1cfe 611 ['yt-dlp']
3089bc74
S
612 + [a for i, a in enumerate(argv) if i not in idxs]
613 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
614 )
615 self.report_warning(
616 'Long argument string detected. '
617 'Use -- to separate parameters and URLs, like this:\n%s\n' %
618 args_to_str(correct_argv))
619
8222d8de
JMF
620 def add_info_extractor(self, ie):
621 """Add an InfoExtractor object to the end of the list."""
622 self._ies.append(ie)
e52d7f85
JMF
623 if not isinstance(ie, type):
624 self._ies_instances[ie.ie_key()] = ie
625 ie.set_downloader(self)
8222d8de 626
56c73665
JMF
627 def get_info_extractor(self, ie_key):
628 """
629 Get an instance of an IE with name ie_key, it will try to get one from
630 the _ies list, if there's no instance it will create a new one and add
631 it to the extractor list.
632 """
633 ie = self._ies_instances.get(ie_key)
634 if ie is None:
635 ie = get_info_extractor(ie_key)()
636 self.add_info_extractor(ie)
637 return ie
638
023fa8c4
JMF
639 def add_default_info_extractors(self):
640 """
641 Add the InfoExtractors returned by gen_extractors to the end of the list
642 """
e52d7f85 643 for ie in gen_extractor_classes():
023fa8c4
JMF
644 self.add_info_extractor(ie)
645
56d868db 646 def add_post_processor(self, pp, when='post_process'):
8222d8de 647 """Add a PostProcessor object to the end of the chain."""
5bfa4862 648 self._pps[when].append(pp)
8222d8de
JMF
649 pp.set_downloader(self)
650
ab8e5e51
AM
651 def add_post_hook(self, ph):
652 """Add the post hook"""
653 self._post_hooks.append(ph)
654
933605d7
JMF
655 def add_progress_hook(self, ph):
656 """Add the progress hook (currently only for the file downloader)"""
657 self._progress_hooks.append(ph)
8ab470f1 658
1c088fa8 659 def _bidi_workaround(self, message):
5d681e96 660 if not hasattr(self, '_output_channel'):
1c088fa8
PH
661 return message
662
5d681e96 663 assert hasattr(self, '_output_process')
11b85ce6 664 assert isinstance(message, compat_str)
6febd1c1
PH
665 line_count = message.count('\n') + 1
666 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 667 self._output_process.stdin.flush()
6febd1c1 668 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 669 for _ in range(line_count))
6febd1c1 670 return res[:-len('\n')]
1c088fa8 671
b35496d8 672 def _write_string(self, message, out=None, only_once=False):
673 if only_once:
674 if message in self._printed_messages:
675 return
676 self._printed_messages.add(message)
677 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 678
848887eb 679 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 680 """Print message to stdout"""
8bf9319e 681 if self.params.get('logger'):
43afe285 682 self.params['logger'].debug(message)
835a1478 683 elif not quiet or self.params.get('verbose'):
684 self._write_string(
685 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
686 self._err_file if quiet else self._screen_file)
8222d8de 687
b35496d8 688 def to_stderr(self, message, only_once=False):
0760b0a7 689 """Print message to stderr"""
11b85ce6 690 assert isinstance(message, compat_str)
8bf9319e 691 if self.params.get('logger'):
43afe285
IB
692 self.params['logger'].error(message)
693 else:
b35496d8 694 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 695
1e5b9a95
PH
696 def to_console_title(self, message):
697 if not self.params.get('consoletitle', False):
698 return
4bede0d8
C
699 if compat_os_name == 'nt':
700 if ctypes.windll.kernel32.GetConsoleWindow():
701 # c_wchar_p() might not be necessary if `message` is
702 # already of type unicode()
703 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 704 elif 'TERM' in os.environ:
b46696bd 705 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 706
bdde425c
PH
707 def save_console_title(self):
708 if not self.params.get('consoletitle', False):
709 return
b7b04c78 710 if self.params.get('simulate'):
94c3442e 711 return
4bede0d8 712 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 713 # Save the title on stack
734f90bb 714 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
715
716 def restore_console_title(self):
717 if not self.params.get('consoletitle', False):
718 return
b7b04c78 719 if self.params.get('simulate'):
94c3442e 720 return
4bede0d8 721 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 722 # Restore the title from stack
734f90bb 723 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
724
725 def __enter__(self):
726 self.save_console_title()
727 return self
728
729 def __exit__(self, *args):
730 self.restore_console_title()
f89197d7 731
dca08720 732 if self.params.get('cookiefile') is not None:
1bab3437 733 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 734
8222d8de
JMF
735 def trouble(self, message=None, tb=None):
736 """Determine action to take when a download problem appears.
737
738 Depending on if the downloader has been configured to ignore
739 download errors or not, this method may throw an exception or
740 not when errors are found, after printing the message.
741
742 tb, if given, is additional traceback information.
743 """
744 if message is not None:
745 self.to_stderr(message)
746 if self.params.get('verbose'):
747 if tb is None:
748 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 749 tb = ''
8222d8de 750 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 751 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 752 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
753 else:
754 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 755 tb = ''.join(tb_data)
c19bc311 756 if tb:
757 self.to_stderr(tb)
8222d8de
JMF
758 if not self.params.get('ignoreerrors', False):
759 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
760 exc_info = sys.exc_info()[1].exc_info
761 else:
762 exc_info = sys.exc_info()
763 raise DownloadError(message, exc_info)
764 self._download_retcode = 1
765
0760b0a7 766 def to_screen(self, message, skip_eol=False):
767 """Print message to stdout if not in quiet mode"""
768 self.to_stdout(
769 message, skip_eol, quiet=self.params.get('quiet', False))
770
c84aeac6 771 def report_warning(self, message, only_once=False):
8222d8de
JMF
772 '''
773 Print the message to stderr, it will be prefixed with 'WARNING:'
774 If stderr is a tty file the 'WARNING:' will be colored
775 '''
6d07ce01
JMF
776 if self.params.get('logger') is not None:
777 self.params['logger'].warning(message)
8222d8de 778 else:
ad8915b7
PH
779 if self.params.get('no_warnings'):
780 return
e9c0cdd3 781 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
782 _msg_header = '\033[0;33mWARNING:\033[0m'
783 else:
784 _msg_header = 'WARNING:'
785 warning_message = '%s %s' % (_msg_header, message)
b35496d8 786 self.to_stderr(warning_message, only_once)
8222d8de
JMF
787
788 def report_error(self, message, tb=None):
789 '''
790 Do the same as trouble, but prefixes the message with 'ERROR:', colored
791 in red if stderr is a tty file.
792 '''
e9c0cdd3 793 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 794 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 795 else:
6febd1c1
PH
796 _msg_header = 'ERROR:'
797 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
798 self.trouble(error_message, tb)
799
b35496d8 800 def write_debug(self, message, only_once=False):
0760b0a7 801 '''Log debug message or Print message to stderr'''
802 if not self.params.get('verbose', False):
803 return
804 message = '[debug] %s' % message
805 if self.params.get('logger'):
806 self.params['logger'].debug(message)
807 else:
b35496d8 808 self.to_stderr(message, only_once)
0760b0a7 809
8222d8de
JMF
810 def report_file_already_downloaded(self, file_name):
811 """Report file has already been fully downloaded."""
812 try:
6febd1c1 813 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 814 except UnicodeEncodeError:
6febd1c1 815 self.to_screen('[download] The file has already been downloaded')
8222d8de 816
0c3d0f51 817 def report_file_delete(self, file_name):
818 """Report that existing file will be deleted."""
819 try:
c25228e5 820 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 821 except UnicodeEncodeError:
c25228e5 822 self.to_screen('Deleting existing file')
0c3d0f51 823
de6000d9 824 def parse_outtmpl(self):
825 outtmpl_dict = self.params.get('outtmpl', {})
826 if not isinstance(outtmpl_dict, dict):
827 outtmpl_dict = {'default': outtmpl_dict}
828 outtmpl_dict.update({
829 k: v for k, v in DEFAULT_OUTTMPL.items()
830 if not outtmpl_dict.get(k)})
831 for key, val in outtmpl_dict.items():
832 if isinstance(val, bytes):
833 self.report_warning(
834 'Parameter outtmpl is bytes, but should be a unicode string. '
835 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
836 return outtmpl_dict
837
21cd8fae 838 def get_output_path(self, dir_type='', filename=None):
839 paths = self.params.get('paths', {})
840 assert isinstance(paths, dict)
841 path = os.path.join(
842 expand_path(paths.get('home', '').strip()),
843 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
844 filename or '')
845
846 # Temporary fix for #4787
847 # 'Treat' all problem characters by passing filename through preferredencoding
848 # to workaround encoding issues with subprocess on python2 @ Windows
849 if sys.version_info < (3, 0) and sys.platform == 'win32':
850 path = encodeFilename(path, True).decode(preferredencoding())
851 return sanitize_path(path, force=self.params.get('windowsfilenames'))
852
76a264ac 853 @staticmethod
901130bb 854 def _outtmpl_expandpath(outtmpl):
855 # expand_path translates '%%' into '%' and '$$' into '$'
856 # correspondingly that is not what we want since we need to keep
857 # '%%' intact for template dict substitution step. Working around
858 # with boundary-alike separator hack.
859 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
860 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
861
862 # outtmpl should be expand_path'ed before template dict substitution
863 # because meta fields may contain env variables we don't want to
864 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
865 # title "Hello $PATH", we don't want `$PATH` to be expanded.
866 return expand_path(outtmpl).replace(sep, '')
867
868 @staticmethod
869 def escape_outtmpl(outtmpl):
870 ''' Escape any remaining strings like %s, %abc% etc. '''
871 return re.sub(
872 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
873 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
874 outtmpl)
875
876 @classmethod
877 def validate_outtmpl(cls, outtmpl):
76a264ac 878 ''' @return None or Exception object '''
7d1eb38a 879 outtmpl = re.sub(
880 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
881 lambda mobj: f'{mobj.group(0)[:-1]}s',
882 cls._outtmpl_expandpath(outtmpl))
76a264ac 883 try:
7d1eb38a 884 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 885 return None
886 except ValueError as err:
887 return err
888
143db31d 889 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 890 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 891 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
a439a3a4 892 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 893
6e84b215 894 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
895 for key in ('__original_infodict', '__postprocessors'):
896 info_dict.pop(key, None)
752cda38 897 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 898 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 899 if info_dict.get('duration', None) is not None
900 else None)
752cda38 901 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
902 if info_dict.get('resolution') is None:
903 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 904
143db31d 905 # For fields playlist_index and autonumber convert all occurrences
906 # of %(field)s to %(field)0Nd for backward compatibility
907 field_size_compat_map = {
752cda38 908 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
909 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 910 }
752cda38 911
385a27fa 912 TMPL_DICT = {}
7d1eb38a 913 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
385a27fa 914 MATH_FUNCTIONS = {
915 '+': float.__add__,
916 '-': float.__sub__,
917 }
e625be0d 918 # Field is of the form key1.key2...
919 # where keys (except first) can be string, int or slice
385a27fa 920 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
921 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
922 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 923 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
924 (?P<negate>-)?
385a27fa 925 (?P<fields>{field})
926 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 927 (?:>(?P<strf_format>.+?))?
928 (?:\|(?P<default>.*?))?
385a27fa 929 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 930
76a264ac 931 get_key = lambda k: traverse_obj(
932 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
933
752cda38 934 def get_value(mdict):
935 # Object traversal
76a264ac 936 value = get_key(mdict['fields'])
752cda38 937 # Negative
938 if mdict['negate']:
939 value = float_or_none(value)
940 if value is not None:
941 value *= -1
942 # Do maths
385a27fa 943 offset_key = mdict['maths']
944 if offset_key:
752cda38 945 value = float_or_none(value)
946 operator = None
385a27fa 947 while offset_key:
948 item = re.match(
949 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
950 offset_key).group(0)
951 offset_key = offset_key[len(item):]
952 if operator is None:
752cda38 953 operator = MATH_FUNCTIONS[item]
385a27fa 954 continue
955 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
956 offset = float_or_none(item)
957 if offset is None:
958 offset = float_or_none(get_key(item))
959 try:
960 value = operator(value, multiplier * offset)
961 except (TypeError, ZeroDivisionError):
962 return None
963 operator = None
752cda38 964 # Datetime formatting
965 if mdict['strf_format']:
966 value = strftime_or_none(value, mdict['strf_format'])
967
968 return value
969
6e84b215 970 def _dumpjson_default(obj):
971 if isinstance(obj, (set, LazyList)):
972 return list(obj)
973 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
974
752cda38 975 def create_key(outer_mobj):
976 if not outer_mobj.group('has_key'):
901130bb 977 return f'%{outer_mobj.group(0)}'
752cda38 978
901130bb 979 prefix = outer_mobj.group('prefix')
752cda38 980 key = outer_mobj.group('key')
901130bb 981 original_fmt = fmt = outer_mobj.group('format')
752cda38 982 mobj = re.match(INTERNAL_FORMAT_RE, key)
983 if mobj is None:
9fea350f 984 value, default, mobj = None, na, {'fields': ''}
752cda38 985 else:
e625be0d 986 mobj = mobj.groupdict()
752cda38 987 default = mobj['default'] if mobj['default'] is not None else na
988 value = get_value(mobj)
989
990 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
991 fmt = '0{:d}d'.format(field_size_compat_map[key])
992
993 value = default if value is None else value
752cda38 994
7d1eb38a 995 str_fmt = f'{fmt[:-1]}s'
996 if fmt[-1] == 'l':
997 value, fmt = ', '.join(variadic(value)), str_fmt
998 elif fmt[-1] == 'j':
6e84b215 999 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
7d1eb38a 1000 elif fmt[-1] == 'q':
1001 value, fmt = compat_shlex_quote(str(value)), str_fmt
1002 elif fmt[-1] == 'c':
1003 value = str(value)
76a264ac 1004 if value is None:
1005 value, fmt = default, 's'
1006 else:
1007 value = value[0]
1008 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1009 value = float_or_none(value)
752cda38 1010 if value is None:
1011 value, fmt = default, 's'
901130bb 1012
752cda38 1013 if sanitize:
1014 if fmt[-1] == 'r':
1015 # If value is an object, sanitize might convert it to a string
1016 # So we convert it to repr first
7d1eb38a 1017 value, fmt = repr(value), str_fmt
639f1cea 1018 if fmt[-1] in 'csr':
9fea350f 1019 value = sanitize(mobj['fields'].split('.')[-1], value)
901130bb 1020
1021 key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
385a27fa 1022 TMPL_DICT[key] = value
901130bb 1023 return f'{prefix}%({key}){fmt}'
752cda38 1024
385a27fa 1025 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1026
de6000d9 1027 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1028 try:
586a91b6 1029 sanitize = lambda k, v: sanitize_filename(
45598aab 1030 compat_str(v),
1bb5c511 1031 restricted=self.params.get('restrictfilenames'),
40df485f 1032 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1033 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1034 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1035 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1036 filename = outtmpl % template_dict
15da37c7 1037
143db31d 1038 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1039 if force_ext is not None:
752cda38 1040 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1041
bdc3fd2f
U
1042 # https://github.com/blackjack4494/youtube-dlc/issues/85
1043 trim_file_name = self.params.get('trim_file_name', False)
1044 if trim_file_name:
1045 fn_groups = filename.rsplit('.')
1046 ext = fn_groups[-1]
1047 sub_ext = ''
1048 if len(fn_groups) > 2:
1049 sub_ext = fn_groups[-2]
1050 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1051
0202b52a 1052 return filename
8222d8de 1053 except ValueError as err:
6febd1c1 1054 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1055 return None
1056
de6000d9 1057 def prepare_filename(self, info_dict, dir_type='', warn=False):
1058 """Generate the output filename."""
21cd8fae 1059
de6000d9 1060 filename = self._prepare_filename(info_dict, dir_type or 'default')
1061
c84aeac6 1062 if warn:
21cd8fae 1063 if not self.params.get('paths'):
de6000d9 1064 pass
1065 elif filename == '-':
c84aeac6 1066 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1067 elif os.path.isabs(filename):
c84aeac6 1068 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1069 self.__prepare_filename_warned = True
1070 if filename == '-' or not filename:
1071 return filename
1072
21cd8fae 1073 return self.get_output_path(dir_type, filename)
0202b52a 1074
120fe513 1075 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1076 """ Returns None if the file should be downloaded """
8222d8de 1077
c77495e3 1078 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1079
8b0d7497 1080 def check_filter():
8b0d7497 1081 if 'title' in info_dict:
1082 # This can happen when we're just evaluating the playlist
1083 title = info_dict['title']
1084 matchtitle = self.params.get('matchtitle', False)
1085 if matchtitle:
1086 if not re.search(matchtitle, title, re.IGNORECASE):
1087 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1088 rejecttitle = self.params.get('rejecttitle', False)
1089 if rejecttitle:
1090 if re.search(rejecttitle, title, re.IGNORECASE):
1091 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1092 date = info_dict.get('upload_date')
1093 if date is not None:
1094 dateRange = self.params.get('daterange', DateRange())
1095 if date not in dateRange:
1096 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1097 view_count = info_dict.get('view_count')
1098 if view_count is not None:
1099 min_views = self.params.get('min_views')
1100 if min_views is not None and view_count < min_views:
1101 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1102 max_views = self.params.get('max_views')
1103 if max_views is not None and view_count > max_views:
1104 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1105 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1106 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1107
1108 if not incomplete:
1109 match_filter = self.params.get('match_filter')
1110 if match_filter is not None:
1111 ret = match_filter(info_dict)
1112 if ret is not None:
1113 return ret
1114 return None
1115
c77495e3 1116 if self.in_download_archive(info_dict):
1117 reason = '%s has already been recorded in the archive' % video_title
1118 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1119 else:
1120 reason = check_filter()
1121 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1122 if reason is not None:
120fe513 1123 if not silent:
1124 self.to_screen('[download] ' + reason)
c77495e3 1125 if self.params.get(break_opt, False):
1126 raise break_err()
8b0d7497 1127 return reason
fe7e0c98 1128
b6c45014
JMF
1129 @staticmethod
1130 def add_extra_info(info_dict, extra_info):
1131 '''Set the keys from extra_info in info dict if they are missing'''
1132 for key, value in extra_info.items():
1133 info_dict.setdefault(key, value)
1134
58f197b7 1135 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1136 process=True, force_generic_extractor=False):
41d1cca3 1137 """
1138 Return a list with a dictionary for each video extracted.
1139
1140 Arguments:
1141 url -- URL to extract
1142
1143 Keyword arguments:
1144 download -- whether to download videos during extraction
1145 ie_key -- extractor key hint
1146 extra_info -- dictionary containing the extra values to add to each result
1147 process -- whether to resolve all unresolved references (URLs, playlist items),
1148 must be True for download to work.
1149 force_generic_extractor -- force using the generic extractor
1150 """
fe7e0c98 1151
61aa5ba3 1152 if not ie_key and force_generic_extractor:
d22dec74
S
1153 ie_key = 'Generic'
1154
8222d8de 1155 if ie_key:
56c73665 1156 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1157 else:
1158 ies = self._ies
1159
1160 for ie in ies:
1161 if not ie.suitable(url):
1162 continue
1163
9a68de12 1164 ie_key = ie.ie_key()
1165 ie = self.get_info_extractor(ie_key)
8222d8de 1166 if not ie.working():
6febd1c1
PH
1167 self.report_warning('The program functionality for this site has been marked as broken, '
1168 'and will probably not work.')
8222d8de
JMF
1169
1170 try:
d0757229 1171 temp_id = str_or_none(
63be1aab 1172 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1173 else ie._match_id(url))
a0566bbf 1174 except (AssertionError, IndexError, AttributeError):
1175 temp_id = None
1176 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1177 self.to_screen("[%s] %s: has already been recorded in archive" % (
1178 ie_key, temp_id))
1179 break
58f197b7 1180 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1181 else:
1182 self.report_error('no suitable InfoExtractor for URL %s' % url)
1183
cc9d1493 1184 def __handle_extraction_exceptions(func, handle_all_errors=True):
a0566bbf 1185 def wrapper(self, *args, **kwargs):
1186 try:
1187 return func(self, *args, **kwargs)
773f291d
S
1188 except GeoRestrictedError as e:
1189 msg = e.msg
1190 if e.countries:
1191 msg += '\nThis video is available in %s.' % ', '.join(
1192 map(ISO3166Utils.short2full, e.countries))
1193 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1194 self.report_error(msg)
fb043a6e 1195 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1196 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1197 except ThrottledDownload:
1198 self.to_stderr('\r')
1199 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1200 return wrapper(self, *args, **kwargs)
8b0d7497 1201 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1202 raise
8222d8de 1203 except Exception as e:
cc9d1493 1204 if handle_all_errors and self.params.get('ignoreerrors', False):
9b9c5355 1205 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1206 else:
1207 raise
a0566bbf 1208 return wrapper
1209
1210 @__handle_extraction_exceptions
58f197b7 1211 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1212 ie_result = ie.extract(url)
1213 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1214 return
1215 if isinstance(ie_result, list):
1216 # Backwards compatibility: old IE result format
1217 ie_result = {
1218 '_type': 'compat_list',
1219 'entries': ie_result,
1220 }
e37d0efb 1221 if extra_info.get('original_url'):
1222 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1223 self.add_default_extra_info(ie_result, ie, url)
1224 if process:
1225 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1226 else:
a0566bbf 1227 return ie_result
fe7e0c98 1228
ea38e55f 1229 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1230 if url is not None:
1231 self.add_extra_info(ie_result, {
1232 'webpage_url': url,
1233 'original_url': url,
1234 'webpage_url_basename': url_basename(url),
1235 })
1236 if ie is not None:
1237 self.add_extra_info(ie_result, {
1238 'extractor': ie.IE_NAME,
1239 'extractor_key': ie.ie_key(),
1240 })
ea38e55f 1241
8222d8de
JMF
1242 def process_ie_result(self, ie_result, download=True, extra_info={}):
1243 """
1244 Take the result of the ie(may be modified) and resolve all unresolved
1245 references (URLs, playlist items).
1246
1247 It will also download the videos if 'download'.
1248 Returns the resolved ie_result.
1249 """
e8ee972c
PH
1250 result_type = ie_result.get('_type', 'video')
1251
057a5206 1252 if result_type in ('url', 'url_transparent'):
134c6ea8 1253 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1254 if ie_result.get('original_url'):
1255 extra_info.setdefault('original_url', ie_result['original_url'])
1256
057a5206 1257 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1258 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1259 or extract_flat is True):
ecb54191 1260 info_copy = ie_result.copy()
1261 self.add_extra_info(info_copy, extra_info)
6033d980 1262 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1263 self.add_default_extra_info(info_copy, ie, ie_result['url'])
ecb54191 1264 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1265 return ie_result
1266
8222d8de 1267 if result_type == 'video':
b6c45014 1268 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1269 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1270 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1271 if additional_urls:
1272 # TODO: Improve MetadataFromFieldPP to allow setting a list
1273 if isinstance(additional_urls, compat_str):
1274 additional_urls = [additional_urls]
1275 self.to_screen(
1276 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1277 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1278 ie_result['additional_entries'] = [
1279 self.extract_info(
1280 url, download, extra_info,
1281 force_generic_extractor=self.params.get('force_generic_extractor'))
1282 for url in additional_urls
1283 ]
1284 return ie_result
8222d8de
JMF
1285 elif result_type == 'url':
1286 # We have to add extra_info to the results because it may be
1287 # contained in a playlist
07cce701 1288 return self.extract_info(
1289 ie_result['url'], download,
1290 ie_key=ie_result.get('ie_key'),
1291 extra_info=extra_info)
7fc3fa05
PH
1292 elif result_type == 'url_transparent':
1293 # Use the information from the embedding page
1294 info = self.extract_info(
1295 ie_result['url'], ie_key=ie_result.get('ie_key'),
1296 extra_info=extra_info, download=False, process=False)
1297
1640eb09
S
1298 # extract_info may return None when ignoreerrors is enabled and
1299 # extraction failed with an error, don't crash and return early
1300 # in this case
1301 if not info:
1302 return info
1303
412c617d
PH
1304 force_properties = dict(
1305 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1306 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1307 if f in force_properties:
1308 del force_properties[f]
1309 new_result = info.copy()
1310 new_result.update(force_properties)
7fc3fa05 1311
0563f7ac
S
1312 # Extracted info may not be a video result (i.e.
1313 # info.get('_type', 'video') != video) but rather an url or
1314 # url_transparent. In such cases outer metadata (from ie_result)
1315 # should be propagated to inner one (info). For this to happen
1316 # _type of info should be overridden with url_transparent. This
067aa17e 1317 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1318 if new_result.get('_type') == 'url':
1319 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1320
1321 return self.process_ie_result(
1322 new_result, download=download, extra_info=extra_info)
40fcba5e 1323 elif result_type in ('playlist', 'multi_video'):
30a074c2 1324 # Protect from infinite recursion due to recursively nested playlists
1325 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1326 webpage_url = ie_result['webpage_url']
1327 if webpage_url in self._playlist_urls:
7e85e872 1328 self.to_screen(
30a074c2 1329 '[download] Skipping already downloaded playlist: %s'
1330 % ie_result.get('title') or ie_result.get('id'))
1331 return
7e85e872 1332
30a074c2 1333 self._playlist_level += 1
1334 self._playlist_urls.add(webpage_url)
bc516a3f 1335 self._sanitize_thumbnails(ie_result)
30a074c2 1336 try:
1337 return self.__process_playlist(ie_result, download)
1338 finally:
1339 self._playlist_level -= 1
1340 if not self._playlist_level:
1341 self._playlist_urls.clear()
8222d8de 1342 elif result_type == 'compat_list':
c9bf4114
PH
1343 self.report_warning(
1344 'Extractor %s returned a compat_list result. '
1345 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1346
8222d8de 1347 def _fixup(r):
9e1a5b84
JW
1348 self.add_extra_info(
1349 r,
9103bbc5
JMF
1350 {
1351 'extractor': ie_result['extractor'],
1352 'webpage_url': ie_result['webpage_url'],
29eb5174 1353 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1354 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1355 }
1356 )
8222d8de
JMF
1357 return r
1358 ie_result['entries'] = [
b6c45014 1359 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1360 for r in ie_result['entries']
1361 ]
1362 return ie_result
1363 else:
1364 raise Exception('Invalid result type: %s' % result_type)
1365
e92caff5 1366 def _ensure_dir_exists(self, path):
1367 return make_dir(path, self.report_error)
1368
30a074c2 1369 def __process_playlist(self, ie_result, download):
1370 # We process each entry in the playlist
1371 playlist = ie_result.get('title') or ie_result.get('id')
1372 self.to_screen('[download] Downloading playlist: %s' % playlist)
1373
498f5606 1374 if 'entries' not in ie_result:
1375 raise EntryNotInPlaylist()
1376 incomplete_entries = bool(ie_result.get('requested_entries'))
1377 if incomplete_entries:
1378 def fill_missing_entries(entries, indexes):
1379 ret = [None] * max(*indexes)
1380 for i, entry in zip(indexes, entries):
1381 ret[i - 1] = entry
1382 return ret
1383 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1384
30a074c2 1385 playlist_results = []
1386
56a8fb4f 1387 playliststart = self.params.get('playliststart', 1)
30a074c2 1388 playlistend = self.params.get('playlistend')
1389 # For backwards compatibility, interpret -1 as whole list
1390 if playlistend == -1:
1391 playlistend = None
1392
1393 playlistitems_str = self.params.get('playlist_items')
1394 playlistitems = None
1395 if playlistitems_str is not None:
1396 def iter_playlistitems(format):
1397 for string_segment in format.split(','):
1398 if '-' in string_segment:
1399 start, end = string_segment.split('-')
1400 for item in range(int(start), int(end) + 1):
1401 yield int(item)
1402 else:
1403 yield int(string_segment)
1404 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1405
1406 ie_entries = ie_result['entries']
56a8fb4f 1407 msg = (
1408 'Downloading %d videos' if not isinstance(ie_entries, list)
1409 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1410 if not isinstance(ie_entries, (list, PagedList)):
1411 ie_entries = LazyList(ie_entries)
1412
50fed816 1413 def get_entry(i):
1414 return YoutubeDL.__handle_extraction_exceptions(
cc9d1493 1415 lambda self, i: ie_entries[i - 1],
1416 False
50fed816 1417 )(self, i)
1418
56a8fb4f 1419 entries = []
1420 for i in playlistitems or itertools.count(playliststart):
1421 if playlistitems is None and playlistend is not None and playlistend < i:
1422 break
1423 entry = None
1424 try:
50fed816 1425 entry = get_entry(i)
56a8fb4f 1426 if entry is None:
498f5606 1427 raise EntryNotInPlaylist()
56a8fb4f 1428 except (IndexError, EntryNotInPlaylist):
1429 if incomplete_entries:
1430 raise EntryNotInPlaylist()
1431 elif not playlistitems:
1432 break
1433 entries.append(entry)
120fe513 1434 try:
1435 if entry is not None:
1436 self._match_entry(entry, incomplete=True, silent=True)
1437 except (ExistingVideoReached, RejectedVideoReached):
1438 break
56a8fb4f 1439 ie_result['entries'] = entries
30a074c2 1440
56a8fb4f 1441 # Save playlist_index before re-ordering
1442 entries = [
1443 ((playlistitems[i - 1] if playlistitems else i), entry)
1444 for i, entry in enumerate(entries, 1)
1445 if entry is not None]
1446 n_entries = len(entries)
498f5606 1447
498f5606 1448 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1449 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1450 ie_result['requested_entries'] = playlistitems
1451
1452 if self.params.get('allow_playlist_files', True):
1453 ie_copy = {
1454 'playlist': playlist,
1455 'playlist_id': ie_result.get('id'),
1456 'playlist_title': ie_result.get('title'),
1457 'playlist_uploader': ie_result.get('uploader'),
1458 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1459 'playlist_index': 0,
498f5606 1460 }
1461 ie_copy.update(dict(ie_result))
1462
1463 if self.params.get('writeinfojson', False):
1464 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1465 if not self._ensure_dir_exists(encodeFilename(infofn)):
1466 return
1467 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1468 self.to_screen('[info] Playlist metadata is already present')
1469 else:
1470 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1471 try:
8012d892 1472 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
498f5606 1473 except (OSError, IOError):
1474 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1475
681de68e 1476 # TODO: This should be passed to ThumbnailsConvertor if necessary
1477 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1478
498f5606 1479 if self.params.get('writedescription', False):
1480 descfn = self.prepare_filename(ie_copy, 'pl_description')
1481 if not self._ensure_dir_exists(encodeFilename(descfn)):
1482 return
1483 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1484 self.to_screen('[info] Playlist description is already present')
1485 elif ie_result.get('description') is None:
1486 self.report_warning('There\'s no playlist description to write.')
1487 else:
1488 try:
1489 self.to_screen('[info] Writing playlist description to: ' + descfn)
1490 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1491 descfile.write(ie_result['description'])
1492 except (OSError, IOError):
1493 self.report_error('Cannot write playlist description file ' + descfn)
1494 return
30a074c2 1495
1496 if self.params.get('playlistreverse', False):
1497 entries = entries[::-1]
30a074c2 1498 if self.params.get('playlistrandom', False):
1499 random.shuffle(entries)
1500
1501 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1502
56a8fb4f 1503 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1504 failures = 0
1505 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1506 for i, entry_tuple in enumerate(entries, 1):
1507 playlist_index, entry = entry_tuple
53ed7066 1508 if 'playlist_index' in self.params.get('compat_options', []):
1509 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1510 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1511 # This __x_forwarded_for_ip thing is a bit ugly but requires
1512 # minimal changes
1513 if x_forwarded_for:
1514 entry['__x_forwarded_for_ip'] = x_forwarded_for
1515 extra = {
1516 'n_entries': n_entries,
f59ae581 1517 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1518 'playlist_index': playlist_index,
1519 'playlist_autonumber': i,
30a074c2 1520 'playlist': playlist,
1521 'playlist_id': ie_result.get('id'),
1522 'playlist_title': ie_result.get('title'),
1523 'playlist_uploader': ie_result.get('uploader'),
1524 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1525 'extractor': ie_result['extractor'],
1526 'webpage_url': ie_result['webpage_url'],
1527 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1528 'extractor_key': ie_result['extractor_key'],
1529 }
1530
1531 if self._match_entry(entry, incomplete=True) is not None:
1532 continue
1533
1534 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1535 if not entry_result:
1536 failures += 1
1537 if failures >= max_failures:
1538 self.report_error(
1539 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1540 break
30a074c2 1541 # TODO: skip failed (empty) entries?
1542 playlist_results.append(entry_result)
1543 ie_result['entries'] = playlist_results
1544 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1545 return ie_result
1546
a0566bbf 1547 @__handle_extraction_exceptions
1548 def __process_iterable_entry(self, entry, download, extra_info):
1549 return self.process_ie_result(
1550 entry, download=download, extra_info=extra_info)
1551
67134eab
JMF
1552 def _build_format_filter(self, filter_spec):
1553 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1554
1555 OPERATORS = {
1556 '<': operator.lt,
1557 '<=': operator.le,
1558 '>': operator.gt,
1559 '>=': operator.ge,
1560 '=': operator.eq,
1561 '!=': operator.ne,
1562 }
67134eab 1563 operator_rex = re.compile(r'''(?x)\s*
187986a8 1564 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1565 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1566 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1567 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1568 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1569 if m:
1570 try:
1571 comparison_value = int(m.group('value'))
1572 except ValueError:
1573 comparison_value = parse_filesize(m.group('value'))
1574 if comparison_value is None:
1575 comparison_value = parse_filesize(m.group('value') + 'B')
1576 if comparison_value is None:
1577 raise ValueError(
1578 'Invalid value %r in format specification %r' % (
67134eab 1579 m.group('value'), filter_spec))
9ddb6925
S
1580 op = OPERATORS[m.group('op')]
1581
083c9df9 1582 if not m:
9ddb6925
S
1583 STR_OPERATORS = {
1584 '=': operator.eq,
10d33b34
YCH
1585 '^=': lambda attr, value: attr.startswith(value),
1586 '$=': lambda attr, value: attr.endswith(value),
1587 '*=': lambda attr, value: value in attr,
9ddb6925 1588 }
187986a8 1589 str_operator_rex = re.compile(r'''(?x)\s*
1590 (?P<key>[a-zA-Z0-9._-]+)\s*
1591 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1592 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1593 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1594 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1595 if m:
1596 comparison_value = m.group('value')
2cc779f4
S
1597 str_op = STR_OPERATORS[m.group('op')]
1598 if m.group('negation'):
e118a879 1599 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1600 else:
1601 op = str_op
083c9df9 1602
9ddb6925 1603 if not m:
187986a8 1604 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1605
1606 def _filter(f):
1607 actual_value = f.get(m.group('key'))
1608 if actual_value is None:
1609 return m.group('none_inclusive')
1610 return op(actual_value, comparison_value)
67134eab
JMF
1611 return _filter
1612
0017d9ad 1613 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1614
af0f7428
S
1615 def can_merge():
1616 merger = FFmpegMergerPP(self)
1617 return merger.available and merger.can_merge()
1618
91ebc640 1619 prefer_best = (
b7b04c78 1620 not self.params.get('simulate')
91ebc640 1621 and download
1622 and (
1623 not can_merge()
19807826 1624 or info_dict.get('is_live', False)
de6000d9 1625 or self.outtmpl_dict['default'] == '-'))
53ed7066 1626 compat = (
1627 prefer_best
1628 or self.params.get('allow_multiple_audio_streams', False)
1629 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1630
1631 return (
53ed7066 1632 'best/bestvideo+bestaudio' if prefer_best
1633 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1634 else 'bestvideo+bestaudio/best')
0017d9ad 1635
67134eab
JMF
1636 def build_format_selector(self, format_spec):
1637 def syntax_error(note, start):
1638 message = (
1639 'Invalid format specification: '
1640 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1641 return SyntaxError(message)
1642
1643 PICKFIRST = 'PICKFIRST'
1644 MERGE = 'MERGE'
1645 SINGLE = 'SINGLE'
0130afb7 1646 GROUP = 'GROUP'
67134eab
JMF
1647 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1648
91ebc640 1649 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1650 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1651
e8e73840 1652 check_formats = self.params.get('check_formats')
1653
67134eab
JMF
1654 def _parse_filter(tokens):
1655 filter_parts = []
1656 for type, string, start, _, _ in tokens:
1657 if type == tokenize.OP and string == ']':
1658 return ''.join(filter_parts)
1659 else:
1660 filter_parts.append(string)
1661
232541df 1662 def _remove_unused_ops(tokens):
17cc1534 1663 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1664 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1665 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1666 last_string, last_start, last_end, last_line = None, None, None, None
1667 for type, string, start, end, line in tokens:
1668 if type == tokenize.OP and string == '[':
1669 if last_string:
1670 yield tokenize.NAME, last_string, last_start, last_end, last_line
1671 last_string = None
1672 yield type, string, start, end, line
1673 # everything inside brackets will be handled by _parse_filter
1674 for type, string, start, end, line in tokens:
1675 yield type, string, start, end, line
1676 if type == tokenize.OP and string == ']':
1677 break
1678 elif type == tokenize.OP and string in ALLOWED_OPS:
1679 if last_string:
1680 yield tokenize.NAME, last_string, last_start, last_end, last_line
1681 last_string = None
1682 yield type, string, start, end, line
1683 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1684 if not last_string:
1685 last_string = string
1686 last_start = start
1687 last_end = end
1688 else:
1689 last_string += string
1690 if last_string:
1691 yield tokenize.NAME, last_string, last_start, last_end, last_line
1692
cf2ac6df 1693 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1694 selectors = []
1695 current_selector = None
1696 for type, string, start, _, _ in tokens:
1697 # ENCODING is only defined in python 3.x
1698 if type == getattr(tokenize, 'ENCODING', None):
1699 continue
1700 elif type in [tokenize.NAME, tokenize.NUMBER]:
1701 current_selector = FormatSelector(SINGLE, string, [])
1702 elif type == tokenize.OP:
cf2ac6df
JMF
1703 if string == ')':
1704 if not inside_group:
1705 # ')' will be handled by the parentheses group
1706 tokens.restore_last_token()
67134eab 1707 break
cf2ac6df 1708 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1709 tokens.restore_last_token()
1710 break
cf2ac6df
JMF
1711 elif inside_choice and string == ',':
1712 tokens.restore_last_token()
1713 break
1714 elif string == ',':
0a31a350
JMF
1715 if not current_selector:
1716 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1717 selectors.append(current_selector)
1718 current_selector = None
1719 elif string == '/':
d96d604e
JMF
1720 if not current_selector:
1721 raise syntax_error('"/" must follow a format selector', start)
67134eab 1722 first_choice = current_selector
cf2ac6df 1723 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1724 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1725 elif string == '[':
1726 if not current_selector:
1727 current_selector = FormatSelector(SINGLE, 'best', [])
1728 format_filter = _parse_filter(tokens)
1729 current_selector.filters.append(format_filter)
0130afb7
JMF
1730 elif string == '(':
1731 if current_selector:
1732 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1733 group = _parse_format_selection(tokens, inside_group=True)
1734 current_selector = FormatSelector(GROUP, group, [])
67134eab 1735 elif string == '+':
d03cfdce 1736 if not current_selector:
1737 raise syntax_error('Unexpected "+"', start)
1738 selector_1 = current_selector
1739 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1740 if not selector_2:
1741 raise syntax_error('Expected a selector', start)
1742 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1743 else:
1744 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1745 elif type == tokenize.ENDMARKER:
1746 break
1747 if current_selector:
1748 selectors.append(current_selector)
1749 return selectors
1750
f8d4ad9a 1751 def _merge(formats_pair):
1752 format_1, format_2 = formats_pair
1753
1754 formats_info = []
1755 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1756 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1757
1758 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1759 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1760 for (i, fmt_info) in enumerate(formats_info):
551f9388 1761 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1762 formats_info.pop(i)
1763 continue
1764 for aud_vid in ['audio', 'video']:
f8d4ad9a 1765 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1766 if get_no_more[aud_vid]:
1767 formats_info.pop(i)
f5510afe 1768 break
f8d4ad9a 1769 get_no_more[aud_vid] = True
1770
1771 if len(formats_info) == 1:
1772 return formats_info[0]
1773
1774 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1775 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1776
1777 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1778 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1779
1780 output_ext = self.params.get('merge_output_format')
1781 if not output_ext:
1782 if the_only_video:
1783 output_ext = the_only_video['ext']
1784 elif the_only_audio and not video_fmts:
1785 output_ext = the_only_audio['ext']
1786 else:
1787 output_ext = 'mkv'
1788
1789 new_dict = {
1790 'requested_formats': formats_info,
1791 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1792 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1793 'ext': output_ext,
1794 }
1795
1796 if the_only_video:
1797 new_dict.update({
1798 'width': the_only_video.get('width'),
1799 'height': the_only_video.get('height'),
1800 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1801 'fps': the_only_video.get('fps'),
1802 'vcodec': the_only_video.get('vcodec'),
1803 'vbr': the_only_video.get('vbr'),
1804 'stretched_ratio': the_only_video.get('stretched_ratio'),
1805 })
1806
1807 if the_only_audio:
1808 new_dict.update({
1809 'acodec': the_only_audio.get('acodec'),
1810 'abr': the_only_audio.get('abr'),
1811 })
1812
1813 return new_dict
1814
e8e73840 1815 def _check_formats(formats):
981052c9 1816 if not check_formats:
1817 yield from formats
b5ac45b1 1818 return
e8e73840 1819 for f in formats:
1820 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1821 temp_file = tempfile.NamedTemporaryFile(
1822 suffix='.tmp', delete=False,
1823 dir=self.get_output_path('temp') or None)
1824 temp_file.close()
fe346461 1825 try:
981052c9 1826 success, _ = self.dl(temp_file.name, f, test=True)
1827 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1828 success = False
fe346461 1829 finally:
21cd8fae 1830 if os.path.exists(temp_file.name):
1831 try:
1832 os.remove(temp_file.name)
1833 except OSError:
1834 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1835 if success:
e8e73840 1836 yield f
1837 else:
1838 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1839
67134eab 1840 def _build_selector_function(selector):
909d24dd 1841 if isinstance(selector, list): # ,
67134eab
JMF
1842 fs = [_build_selector_function(s) for s in selector]
1843
317f7ab6 1844 def selector_function(ctx):
67134eab 1845 for f in fs:
981052c9 1846 yield from f(ctx)
67134eab 1847 return selector_function
909d24dd 1848
1849 elif selector.type == GROUP: # ()
0130afb7 1850 selector_function = _build_selector_function(selector.selector)
909d24dd 1851
1852 elif selector.type == PICKFIRST: # /
67134eab
JMF
1853 fs = [_build_selector_function(s) for s in selector.selector]
1854
317f7ab6 1855 def selector_function(ctx):
67134eab 1856 for f in fs:
317f7ab6 1857 picked_formats = list(f(ctx))
67134eab
JMF
1858 if picked_formats:
1859 return picked_formats
1860 return []
67134eab 1861
981052c9 1862 elif selector.type == MERGE: # +
1863 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1864
1865 def selector_function(ctx):
1866 for pair in itertools.product(
1867 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1868 yield _merge(pair)
1869
909d24dd 1870 elif selector.type == SINGLE: # atom
598d185d 1871 format_spec = selector.selector or 'best'
909d24dd 1872
f8d4ad9a 1873 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1874 if format_spec == 'all':
1875 def selector_function(ctx):
981052c9 1876 yield from _check_formats(ctx['formats'])
f8d4ad9a 1877 elif format_spec == 'mergeall':
1878 def selector_function(ctx):
981052c9 1879 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1880 if not formats:
1881 return
921b76ca 1882 merged_format = formats[-1]
1883 for f in formats[-2::-1]:
f8d4ad9a 1884 merged_format = _merge((merged_format, f))
1885 yield merged_format
909d24dd 1886
1887 else:
e8e73840 1888 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1889 mobj = re.match(
1890 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1891 format_spec)
1892 if mobj is not None:
1893 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1894 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1895 format_type = (mobj.group('type') or [None])[0]
1896 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1897 format_modified = mobj.group('mod') is not None
909d24dd 1898
1899 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1900 _filter_f = (
eff63539 1901 (lambda f: f.get('%scodec' % format_type) != 'none')
1902 if format_type and format_modified # bv*, ba*, wv*, wa*
1903 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1904 if format_type # bv, ba, wv, wa
1905 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1906 if not format_modified # b, w
8326b00a 1907 else lambda f: True) # b*, w*
1908 filter_f = lambda f: _filter_f(f) and (
1909 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1910 else:
909d24dd 1911 filter_f = ((lambda f: f.get('ext') == format_spec)
1912 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1913 else (lambda f: f.get('format_id') == format_spec)) # id
1914
1915 def selector_function(ctx):
1916 formats = list(ctx['formats'])
909d24dd 1917 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1918 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1919 # for extractors with incomplete formats (audio only (soundcloud)
1920 # or video only (imgur)) best/worst will fallback to
1921 # best/worst {video,audio}-only format
e8e73840 1922 matches = formats
981052c9 1923 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1924 try:
e8e73840 1925 yield matches[format_idx - 1]
981052c9 1926 except IndexError:
1927 return
083c9df9 1928
67134eab 1929 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1930
317f7ab6
S
1931 def final_selector(ctx):
1932 ctx_copy = copy.deepcopy(ctx)
67134eab 1933 for _filter in filters:
317f7ab6
S
1934 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1935 return selector_function(ctx_copy)
67134eab 1936 return final_selector
083c9df9 1937
67134eab 1938 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1939 try:
232541df 1940 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1941 except tokenize.TokenError:
1942 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1943
1944 class TokenIterator(object):
1945 def __init__(self, tokens):
1946 self.tokens = tokens
1947 self.counter = 0
1948
1949 def __iter__(self):
1950 return self
1951
1952 def __next__(self):
1953 if self.counter >= len(self.tokens):
1954 raise StopIteration()
1955 value = self.tokens[self.counter]
1956 self.counter += 1
1957 return value
1958
1959 next = __next__
1960
1961 def restore_last_token(self):
1962 self.counter -= 1
1963
1964 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1965 return _build_selector_function(parsed_selector)
a9c58ad9 1966
e5660ee6
JMF
1967 def _calc_headers(self, info_dict):
1968 res = std_headers.copy()
1969
1970 add_headers = info_dict.get('http_headers')
1971 if add_headers:
1972 res.update(add_headers)
1973
1974 cookies = self._calc_cookies(info_dict)
1975 if cookies:
1976 res['Cookie'] = cookies
1977
0016b84e
S
1978 if 'X-Forwarded-For' not in res:
1979 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1980 if x_forwarded_for_ip:
1981 res['X-Forwarded-For'] = x_forwarded_for_ip
1982
e5660ee6
JMF
1983 return res
1984
1985 def _calc_cookies(self, info_dict):
5c2266df 1986 pr = sanitized_Request(info_dict['url'])
e5660ee6 1987 self.cookiejar.add_cookie_header(pr)
662435f7 1988 return pr.get_header('Cookie')
e5660ee6 1989
b0249bca 1990 def _sanitize_thumbnails(self, info_dict):
bc516a3f 1991 thumbnails = info_dict.get('thumbnails')
1992 if thumbnails is None:
1993 thumbnail = info_dict.get('thumbnail')
1994 if thumbnail:
1995 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1996 if thumbnails:
1997 thumbnails.sort(key=lambda t: (
1998 t.get('preference') if t.get('preference') is not None else -1,
1999 t.get('width') if t.get('width') is not None else -1,
2000 t.get('height') if t.get('height') is not None else -1,
2001 t.get('id') if t.get('id') is not None else '',
2002 t.get('url')))
b0249bca 2003
0ba692ac 2004 def thumbnail_tester():
2005 if self.params.get('check_formats'):
cca80fe6 2006 test_all = True
2007 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2008 else:
cca80fe6 2009 test_all = False
0ba692ac 2010 to_screen = self.write_debug
2011
2012 def test_thumbnail(t):
cca80fe6 2013 if not test_all and not t.get('_test_url'):
2014 return True
0ba692ac 2015 to_screen('Testing thumbnail %s' % t['id'])
2016 try:
2017 self.urlopen(HEADRequest(t['url']))
2018 except network_exceptions as err:
2019 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2020 t['id'], t['url'], error_to_compat_str(err)))
2021 return False
2022 return True
2023
2024 return test_thumbnail
b0249bca 2025
bc516a3f 2026 for i, t in enumerate(thumbnails):
bc516a3f 2027 if t.get('id') is None:
2028 t['id'] = '%d' % i
b0249bca 2029 if t.get('width') and t.get('height'):
2030 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2031 t['url'] = sanitize_url(t['url'])
0ba692ac 2032
2033 if self.params.get('check_formats') is not False:
2034 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2035 else:
2036 info_dict['thumbnails'] = thumbnails
bc516a3f 2037
dd82ffea
JMF
2038 def process_video_result(self, info_dict, download=True):
2039 assert info_dict.get('_type', 'video') == 'video'
2040
bec1fad2
PH
2041 if 'id' not in info_dict:
2042 raise ExtractorError('Missing "id" field in extractor result')
2043 if 'title' not in info_dict:
2044 raise ExtractorError('Missing "title" field in extractor result')
2045
c9969434
S
2046 def report_force_conversion(field, field_not, conversion):
2047 self.report_warning(
2048 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2049 % (field, field_not, conversion))
2050
2051 def sanitize_string_field(info, string_field):
2052 field = info.get(string_field)
2053 if field is None or isinstance(field, compat_str):
2054 return
2055 report_force_conversion(string_field, 'a string', 'string')
2056 info[string_field] = compat_str(field)
2057
2058 def sanitize_numeric_fields(info):
2059 for numeric_field in self._NUMERIC_FIELDS:
2060 field = info.get(numeric_field)
2061 if field is None or isinstance(field, compat_numeric_types):
2062 continue
2063 report_force_conversion(numeric_field, 'numeric', 'int')
2064 info[numeric_field] = int_or_none(field)
2065
2066 sanitize_string_field(info_dict, 'id')
2067 sanitize_numeric_fields(info_dict)
be6217b2 2068
dd82ffea
JMF
2069 if 'playlist' not in info_dict:
2070 # It isn't part of a playlist
2071 info_dict['playlist'] = None
2072 info_dict['playlist_index'] = None
2073
bc516a3f 2074 self._sanitize_thumbnails(info_dict)
d5519808 2075
536a55da 2076 thumbnail = info_dict.get('thumbnail')
bc516a3f 2077 thumbnails = info_dict.get('thumbnails')
536a55da
S
2078 if thumbnail:
2079 info_dict['thumbnail'] = sanitize_url(thumbnail)
2080 elif thumbnails:
d5519808
PH
2081 info_dict['thumbnail'] = thumbnails[-1]['url']
2082
ae30b840 2083 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2084 info_dict['display_id'] = info_dict['id']
2085
10db0d2f 2086 for ts_key, date_key in (
2087 ('timestamp', 'upload_date'),
2088 ('release_timestamp', 'release_date'),
2089 ):
2090 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2091 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2092 # see http://bugs.python.org/issue1646728)
2093 try:
2094 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2095 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2096 except (ValueError, OverflowError, OSError):
2097 pass
9d2ecdbc 2098
ae30b840 2099 live_keys = ('is_live', 'was_live')
2100 live_status = info_dict.get('live_status')
2101 if live_status is None:
2102 for key in live_keys:
2103 if info_dict.get(key) is False:
2104 continue
2105 if info_dict.get(key):
2106 live_status = key
2107 break
2108 if all(info_dict.get(key) is False for key in live_keys):
2109 live_status = 'not_live'
2110 if live_status:
2111 info_dict['live_status'] = live_status
2112 for key in live_keys:
2113 if info_dict.get(key) is None:
2114 info_dict[key] = (live_status == key)
2115
33d2fc2f
S
2116 # Auto generate title fields corresponding to the *_number fields when missing
2117 # in order to always have clean titles. This is very common for TV series.
2118 for field in ('chapter', 'season', 'episode'):
2119 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2120 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2121
05108a49
S
2122 for cc_kind in ('subtitles', 'automatic_captions'):
2123 cc = info_dict.get(cc_kind)
2124 if cc:
2125 for _, subtitle in cc.items():
2126 for subtitle_format in subtitle:
2127 if subtitle_format.get('url'):
2128 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2129 if subtitle_format.get('ext') is None:
2130 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2131
2132 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2133 subtitles = info_dict.get('subtitles')
4bba3716 2134
360e1ca5 2135 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2136 info_dict['id'], subtitles, automatic_captions)
a504ced0 2137
dd82ffea
JMF
2138 # We now pick which formats have to be downloaded
2139 if info_dict.get('formats') is None:
2140 # There's only one format available
2141 formats = [info_dict]
2142 else:
2143 formats = info_dict['formats']
2144
db95dc13 2145 if not formats:
b7da73eb 2146 if not self.params.get('ignore_no_formats_error'):
2147 raise ExtractorError('No video formats found!')
2148 else:
2149 self.report_warning('No video formats found!')
db95dc13 2150
73af5cc8
S
2151 def is_wellformed(f):
2152 url = f.get('url')
a5ac0c47 2153 if not url:
73af5cc8
S
2154 self.report_warning(
2155 '"url" field is missing or empty - skipping format, '
2156 'there is an error in extractor')
a5ac0c47
S
2157 return False
2158 if isinstance(url, bytes):
2159 sanitize_string_field(f, 'url')
2160 return True
73af5cc8
S
2161
2162 # Filter out malformed formats for better extraction robustness
2163 formats = list(filter(is_wellformed, formats))
2164
181c7053
S
2165 formats_dict = {}
2166
dd82ffea 2167 # We check that all the formats have the format and format_id fields
db95dc13 2168 for i, format in enumerate(formats):
c9969434
S
2169 sanitize_string_field(format, 'format_id')
2170 sanitize_numeric_fields(format)
dcf77cf1 2171 format['url'] = sanitize_url(format['url'])
e74e3b63 2172 if not format.get('format_id'):
8016c922 2173 format['format_id'] = compat_str(i)
e2effb08
S
2174 else:
2175 # Sanitize format_id from characters used in format selector expression
ec85ded8 2176 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2177 format_id = format['format_id']
2178 if format_id not in formats_dict:
2179 formats_dict[format_id] = []
2180 formats_dict[format_id].append(format)
2181
2182 # Make sure all formats have unique format_id
2183 for format_id, ambiguous_formats in formats_dict.items():
2184 if len(ambiguous_formats) > 1:
2185 for i, format in enumerate(ambiguous_formats):
2186 format['format_id'] = '%s-%d' % (format_id, i)
2187
2188 for i, format in enumerate(formats):
8c51aa65 2189 if format.get('format') is None:
6febd1c1 2190 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2191 id=format['format_id'],
2192 res=self.format_resolution(format),
6febd1c1 2193 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2194 )
c1002e96 2195 # Automatically determine file extension if missing
5b1d8575 2196 if format.get('ext') is None:
cce929ea 2197 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2198 # Automatically determine protocol if missing (useful for format
2199 # selection purposes)
6f0be937 2200 if format.get('protocol') is None:
b5559424 2201 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2202 # Add HTTP headers, so that external programs can use them from the
2203 # json output
2204 full_format_info = info_dict.copy()
2205 full_format_info.update(format)
2206 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2207 # Remove private housekeeping stuff
2208 if '__x_forwarded_for_ip' in info_dict:
2209 del info_dict['__x_forwarded_for_ip']
dd82ffea 2210
4bcc7bd1 2211 # TODO Central sorting goes here
99e206d5 2212
b7da73eb 2213 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2214 # only set the 'formats' fields if the original info_dict list them
2215 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2216 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2217 # which can't be exported to json
b3d9ef88 2218 info_dict['formats'] = formats
4ec82a72 2219
2220 info_dict, _ = self.pre_process(info_dict)
2221
b7b04c78 2222 if self.params.get('list_thumbnails'):
2223 self.list_thumbnails(info_dict)
2224 if self.params.get('listformats'):
2225 if not info_dict.get('formats'):
2226 raise ExtractorError('No video formats found', expected=True)
2227 self.list_formats(info_dict)
2228 if self.params.get('listsubtitles'):
2229 if 'automatic_captions' in info_dict:
2230 self.list_subtitles(
2231 info_dict['id'], automatic_captions, 'automatic captions')
2232 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2233 list_only = self.params.get('simulate') is None and (
2234 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2235 if list_only:
b7b04c78 2236 # Without this printing, -F --print-json will not work
169dbde9 2237 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2238 return
2239
187986a8 2240 format_selector = self.format_selector
2241 if format_selector is None:
0017d9ad 2242 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2243 self.write_debug('Default format spec: %s' % req_format)
187986a8 2244 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2245
2246 # While in format selection we may need to have an access to the original
2247 # format set in order to calculate some metrics or do some processing.
2248 # For now we need to be able to guess whether original formats provided
2249 # by extractor are incomplete or not (i.e. whether extractor provides only
2250 # video-only or audio-only formats) for proper formats selection for
2251 # extractors with such incomplete formats (see
067aa17e 2252 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2253 # Since formats may be filtered during format selection and may not match
2254 # the original formats the results may be incorrect. Thus original formats
2255 # or pre-calculated metrics should be passed to format selection routines
2256 # as well.
2257 # We will pass a context object containing all necessary additional data
2258 # instead of just formats.
2259 # This fixes incorrect format selection issue (see
067aa17e 2260 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2261 incomplete_formats = (
317f7ab6 2262 # All formats are video-only or
3089bc74 2263 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2264 # all formats are audio-only
3089bc74 2265 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2266
2267 ctx = {
2268 'formats': formats,
2269 'incomplete_formats': incomplete_formats,
2270 }
2271
2272 formats_to_download = list(format_selector(ctx))
dd82ffea 2273 if not formats_to_download:
b7da73eb 2274 if not self.params.get('ignore_no_formats_error'):
2275 raise ExtractorError('Requested format is not available', expected=True)
2276 else:
2277 self.report_warning('Requested format is not available')
4513a41a
A
2278 # Process what we can, even without any available formats.
2279 self.process_info(dict(info_dict))
b7da73eb 2280 elif download:
2281 self.to_screen(
07cce701 2282 '[info] %s: Downloading %d format(s): %s' % (
2283 info_dict['id'], len(formats_to_download),
2284 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2285 for fmt in formats_to_download:
dd82ffea 2286 new_info = dict(info_dict)
4ec82a72 2287 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2288 new_info['__original_infodict'] = info_dict
b7da73eb 2289 new_info.update(fmt)
dd82ffea
JMF
2290 self.process_info(new_info)
2291 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2292 if formats_to_download:
2293 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2294 return info_dict
2295
98c70d6f 2296 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2297 """Select the requested subtitles and their format"""
98c70d6f
JMF
2298 available_subs = {}
2299 if normal_subtitles and self.params.get('writesubtitles'):
2300 available_subs.update(normal_subtitles)
2301 if automatic_captions and self.params.get('writeautomaticsub'):
2302 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2303 if lang not in available_subs:
2304 available_subs[lang] = cap_info
2305
4d171848
JMF
2306 if (not self.params.get('writesubtitles') and not
2307 self.params.get('writeautomaticsub') or not
2308 available_subs):
2309 return None
a504ced0 2310
c32b0aab 2311 all_sub_langs = available_subs.keys()
a504ced0 2312 if self.params.get('allsubtitles', False):
c32b0aab 2313 requested_langs = all_sub_langs
2314 elif self.params.get('subtitleslangs', False):
2315 requested_langs = set()
2316 for lang in self.params.get('subtitleslangs'):
2317 if lang == 'all':
2318 requested_langs.update(all_sub_langs)
2319 continue
2320 discard = lang[0] == '-'
2321 if discard:
2322 lang = lang[1:]
2323 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2324 if discard:
2325 for lang in current_langs:
2326 requested_langs.discard(lang)
2327 else:
2328 requested_langs.update(current_langs)
2329 elif 'en' in available_subs:
2330 requested_langs = ['en']
a504ced0 2331 else:
c32b0aab 2332 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2333 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2334
2335 formats_query = self.params.get('subtitlesformat', 'best')
2336 formats_preference = formats_query.split('/') if formats_query else []
2337 subs = {}
2338 for lang in requested_langs:
2339 formats = available_subs.get(lang)
2340 if formats is None:
2341 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2342 continue
a504ced0
JMF
2343 for ext in formats_preference:
2344 if ext == 'best':
2345 f = formats[-1]
2346 break
2347 matches = list(filter(lambda f: f['ext'] == ext, formats))
2348 if matches:
2349 f = matches[-1]
2350 break
2351 else:
2352 f = formats[-1]
2353 self.report_warning(
2354 'No subtitle format found matching "%s" for language %s, '
2355 'using %s' % (formats_query, lang, f['ext']))
2356 subs[lang] = f
2357 return subs
2358
d06daf23 2359 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2360 def print_mandatory(field, actual_field=None):
2361 if actual_field is None:
2362 actual_field = field
d06daf23 2363 if (self.params.get('force%s' % field, False)
53c18592 2364 and (not incomplete or info_dict.get(actual_field) is not None)):
2365 self.to_stdout(info_dict[actual_field])
d06daf23
S
2366
2367 def print_optional(field):
2368 if (self.params.get('force%s' % field, False)
2369 and info_dict.get(field) is not None):
2370 self.to_stdout(info_dict[field])
2371
53c18592 2372 info_dict = info_dict.copy()
2373 if filename is not None:
2374 info_dict['filename'] = filename
2375 if info_dict.get('requested_formats') is not None:
2376 # For RTMP URLs, also include the playpath
2377 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2378 elif 'url' in info_dict:
2379 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2380
2381 for tmpl in self.params.get('forceprint', []):
2382 if re.match(r'\w+$', tmpl):
2383 tmpl = '%({})s'.format(tmpl)
2384 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2385 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2386
d06daf23
S
2387 print_mandatory('title')
2388 print_mandatory('id')
53c18592 2389 print_mandatory('url', 'urls')
d06daf23
S
2390 print_optional('thumbnail')
2391 print_optional('description')
53c18592 2392 print_optional('filename')
d06daf23
S
2393 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2394 self.to_stdout(formatSeconds(info_dict['duration']))
2395 print_mandatory('format')
53c18592 2396
d06daf23 2397 if self.params.get('forcejson', False):
277d6ff5 2398 self.post_extract(info_dict)
6e84b215 2399 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2400
e8e73840 2401 def dl(self, name, info, subtitle=False, test=False):
2402
2403 if test:
2404 verbose = self.params.get('verbose')
2405 params = {
2406 'test': True,
2407 'quiet': not verbose,
2408 'verbose': verbose,
2409 'noprogress': not verbose,
2410 'nopart': True,
2411 'skip_unavailable_fragments': False,
2412 'keep_fragments': False,
2413 'overwrites': True,
2414 '_no_ytdl_file': True,
2415 }
2416 else:
2417 params = self.params
96fccc10 2418 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2419 if not test:
2420 for ph in self._progress_hooks:
2421 fd.add_progress_hook(ph)
18e674b4 2422 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2423 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2424 new_info = dict(info)
2425 if new_info.get('http_headers') is None:
2426 new_info['http_headers'] = self._calc_headers(new_info)
2427 return fd.download(name, new_info, subtitle)
2428
8222d8de
JMF
2429 def process_info(self, info_dict):
2430 """Process a single resolved IE result."""
2431
2432 assert info_dict.get('_type', 'video') == 'video'
fd288278 2433
0202b52a 2434 info_dict.setdefault('__postprocessors', [])
2435
fd288278
PH
2436 max_downloads = self.params.get('max_downloads')
2437 if max_downloads is not None:
2438 if self._num_downloads >= int(max_downloads):
2439 raise MaxDownloadsReached()
8222d8de 2440
d06daf23 2441 # TODO: backward compatibility, to be removed
8222d8de 2442 info_dict['fulltitle'] = info_dict['title']
8222d8de 2443
4513a41a 2444 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2445 info_dict['format'] = info_dict['ext']
2446
c77495e3 2447 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2448 return
2449
277d6ff5 2450 self.post_extract(info_dict)
fd288278 2451 self._num_downloads += 1
8222d8de 2452
dcf64d43 2453 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2454 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2455 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2456 files_to_move = {}
8222d8de
JMF
2457
2458 # Forced printings
4513a41a 2459 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2460
b7b04c78 2461 if self.params.get('simulate'):
2d30509f 2462 if self.params.get('force_write_download_archive', False):
2463 self.record_download_archive(info_dict)
2464
2465 # Do nothing else if in simulate mode
8222d8de
JMF
2466 return
2467
de6000d9 2468 if full_filename is None:
8222d8de
JMF
2469 return
2470
e92caff5 2471 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2472 return
e92caff5 2473 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2474 return
2475
2476 if self.params.get('writedescription', False):
de6000d9 2477 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2478 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2479 return
0c3d0f51 2480 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2481 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2482 elif info_dict.get('description') is None:
2483 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2484 else:
2485 try:
6febd1c1 2486 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2487 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2488 descfile.write(info_dict['description'])
7b6fefc9 2489 except (OSError, IOError):
6febd1c1 2490 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2491 return
8222d8de 2492
1fb07d10 2493 if self.params.get('writeannotations', False):
de6000d9 2494 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2495 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2496 return
0c3d0f51 2497 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2498 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2499 elif not info_dict.get('annotations'):
2500 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2501 else:
2502 try:
6febd1c1 2503 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2504 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2505 annofile.write(info_dict['annotations'])
2506 except (KeyError, TypeError):
6febd1c1 2507 self.report_warning('There are no annotations to write.')
7b6fefc9 2508 except (OSError, IOError):
6febd1c1 2509 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2510 return
1fb07d10 2511
c4a91be7 2512 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2513 self.params.get('writeautomaticsub')])
c4a91be7 2514
c84dd8a9 2515 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2516 # subtitles download errors are already managed as troubles in relevant IE
2517 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2518 subtitles = info_dict['requested_subtitles']
fa57af1e 2519 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2520 for sub_lang, sub_info in subtitles.items():
2521 sub_format = sub_info['ext']
56d868db 2522 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2523 sub_filename_final = subtitles_filename(
2524 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2525 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2526 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2527 sub_info['filepath'] = sub_filename
0202b52a 2528 files_to_move[sub_filename] = sub_filename_final
a504ced0 2529 else:
0c9df79e 2530 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2531 if sub_info.get('data') is not None:
2532 try:
2533 # Use newline='' to prevent conversion of newline characters
067aa17e 2534 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2535 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2536 subfile.write(sub_info['data'])
dcf64d43 2537 sub_info['filepath'] = sub_filename
0202b52a 2538 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2539 except (OSError, IOError):
2540 self.report_error('Cannot write subtitles file ' + sub_filename)
2541 return
7b6fefc9 2542 else:
5ff1bc0c 2543 try:
e8e73840 2544 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2545 sub_info['filepath'] = sub_filename
0202b52a 2546 files_to_move[sub_filename] = sub_filename_final
fe346461 2547 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2548 self.report_warning('Unable to download subtitle for "%s": %s' %
2549 (sub_lang, error_to_compat_str(err)))
2550 continue
8222d8de 2551
8222d8de 2552 if self.params.get('writeinfojson', False):
de6000d9 2553 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2554 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2555 return
0c3d0f51 2556 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2557 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2558 else:
66c935fb 2559 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2560 try:
8012d892 2561 write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2562 except (OSError, IOError):
66c935fb 2563 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2564 return
de6000d9 2565 info_dict['__infojson_filename'] = infofn
8222d8de 2566
56d868db 2567 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2568 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2569 thumb_filename = replace_extension(
2570 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2571 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2572
732044af 2573 # Write internet shortcut files
2574 url_link = webloc_link = desktop_link = False
2575 if self.params.get('writelink', False):
2576 if sys.platform == "darwin": # macOS.
2577 webloc_link = True
2578 elif sys.platform.startswith("linux"):
2579 desktop_link = True
2580 else: # if sys.platform in ['win32', 'cygwin']:
2581 url_link = True
2582 if self.params.get('writeurllink', False):
2583 url_link = True
2584 if self.params.get('writewebloclink', False):
2585 webloc_link = True
2586 if self.params.get('writedesktoplink', False):
2587 desktop_link = True
2588
2589 if url_link or webloc_link or desktop_link:
2590 if 'webpage_url' not in info_dict:
2591 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2592 return
2593 ascii_url = iri_to_uri(info_dict['webpage_url'])
2594
2595 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2596 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2597 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2598 self.to_screen('[info] Internet shortcut is already present')
2599 else:
2600 try:
2601 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2602 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2603 template_vars = {'url': ascii_url}
2604 if embed_filename:
2605 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2606 linkfile.write(template % template_vars)
2607 except (OSError, IOError):
2608 self.report_error('Cannot write internet shortcut ' + linkfn)
2609 return False
2610 return True
2611
2612 if url_link:
2613 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2614 return
2615 if webloc_link:
2616 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2617 return
2618 if desktop_link:
2619 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2620 return
2621
56d868db 2622 try:
2623 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2624 except PostProcessingError as err:
2625 self.report_error('Preprocessing: %s' % str(err))
2626 return
2627
732044af 2628 must_record_download_archive = False
56d868db 2629 if self.params.get('skip_download', False):
2630 info_dict['filepath'] = temp_filename
2631 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2632 info_dict['__files_to_move'] = files_to_move
2633 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2634 else:
2635 # Download
4340deca 2636 try:
0202b52a 2637
6b591b29 2638 def existing_file(*filepaths):
2639 ext = info_dict.get('ext')
2640 final_ext = self.params.get('final_ext', ext)
2641 existing_files = []
2642 for file in orderedSet(filepaths):
2643 if final_ext != ext:
2644 converted = replace_extension(file, final_ext, ext)
2645 if os.path.exists(encodeFilename(converted)):
2646 existing_files.append(converted)
2647 if os.path.exists(encodeFilename(file)):
2648 existing_files.append(file)
2649
2650 if not existing_files or self.params.get('overwrites', False):
2651 for file in orderedSet(existing_files):
2652 self.report_file_delete(file)
2653 os.remove(encodeFilename(file))
2654 return None
2655
2656 self.report_file_already_downloaded(existing_files[0])
2657 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2658 return existing_files[0]
0202b52a 2659
2660 success = True
4340deca 2661 if info_dict.get('requested_formats') is not None:
81cd954a
S
2662
2663 def compatible_formats(formats):
d03cfdce 2664 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2665 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2666 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2667 if len(video_formats) > 2 or len(audio_formats) > 2:
2668 return False
2669
81cd954a 2670 # Check extension
d03cfdce 2671 exts = set(format.get('ext') for format in formats)
2672 COMPATIBLE_EXTS = (
2673 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2674 set(('webm',)),
2675 )
2676 for ext_sets in COMPATIBLE_EXTS:
2677 if ext_sets.issuperset(exts):
2678 return True
81cd954a
S
2679 # TODO: Check acodec/vcodec
2680 return False
2681
2682 requested_formats = info_dict['requested_formats']
0202b52a 2683 old_ext = info_dict['ext']
3b297919 2684 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2685 info_dict['ext'] = 'mkv'
2686 self.report_warning(
2687 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2688 new_ext = info_dict['ext']
0202b52a 2689
124bc071 2690 def correct_ext(filename, ext=new_ext):
96fccc10 2691 if filename == '-':
2692 return filename
0202b52a 2693 filename_real_ext = os.path.splitext(filename)[1][1:]
2694 filename_wo_ext = (
2695 os.path.splitext(filename)[0]
124bc071 2696 if filename_real_ext in (old_ext, new_ext)
0202b52a 2697 else filename)
124bc071 2698 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2699
38c6902b 2700 # Ensure filename always has a correct extension for successful merge
0202b52a 2701 full_filename = correct_ext(full_filename)
2702 temp_filename = correct_ext(temp_filename)
2703 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2704 info_dict['__real_download'] = False
18e674b4 2705
2706 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2707 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2708 info_dict['protocol'] = _protocols.pop()
dbf5416a 2709 directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2710 if dl_filename is not None:
2711 pass
96fccc10 2712 elif (directly_mergable and get_suitable_downloader(
a46a815b 2713 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2714 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2715 success, real_download = self.dl(temp_filename, info_dict)
2716 info_dict['__real_download'] = real_download
18e674b4 2717 else:
2718 downloaded = []
2719 merger = FFmpegMergerPP(self)
2720 if self.params.get('allow_unplayable_formats'):
2721 self.report_warning(
2722 'You have requested merging of multiple formats '
2723 'while also allowing unplayable formats to be downloaded. '
2724 'The formats won\'t be merged to prevent data corruption.')
2725 elif not merger.available:
2726 self.report_warning(
2727 'You have requested merging of multiple formats but ffmpeg is not installed. '
2728 'The formats won\'t be merged.')
2729
96fccc10 2730 if temp_filename == '-':
2731 reason = ('using a downloader other than ffmpeg' if directly_mergable
2732 else 'but the formats are incompatible for simultaneous download' if merger.available
2733 else 'but ffmpeg is not installed')
2734 self.report_warning(
2735 f'You have requested downloading multiple formats to stdout {reason}. '
2736 'The formats will be streamed one after the other')
2737 fname = temp_filename
dbf5416a 2738 for f in requested_formats:
2739 new_info = dict(info_dict)
2740 del new_info['requested_formats']
2741 new_info.update(f)
96fccc10 2742 if temp_filename != '-':
124bc071 2743 fname = prepend_extension(
2744 correct_ext(temp_filename, new_info['ext']),
2745 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2746 if not self._ensure_dir_exists(fname):
2747 return
2748 downloaded.append(fname)
dbf5416a 2749 partial_success, real_download = self.dl(fname, new_info)
2750 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2751 success = success and partial_success
2752 if merger.available and not self.params.get('allow_unplayable_formats'):
2753 info_dict['__postprocessors'].append(merger)
2754 info_dict['__files_to_merge'] = downloaded
2755 # Even if there were no downloads, it is being merged only now
2756 info_dict['__real_download'] = True
2757 else:
2758 for file in downloaded:
2759 files_to_move[file] = None
4340deca
P
2760 else:
2761 # Just a single file
0202b52a 2762 dl_filename = existing_file(full_filename, temp_filename)
2763 if dl_filename is None:
e8e73840 2764 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2765 info_dict['__real_download'] = real_download
2766
0202b52a 2767 dl_filename = dl_filename or temp_filename
c571435f 2768 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2769
3158150c 2770 except network_exceptions as err:
7960b056 2771 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2772 return
2773 except (OSError, IOError) as err:
2774 raise UnavailableVideoError(err)
2775 except (ContentTooShortError, ) as err:
2776 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2777 return
8222d8de 2778
de6000d9 2779 if success and full_filename != '-':
f17f8651 2780
fd7cfb64 2781 def fixup():
2782 do_fixup = True
2783 fixup_policy = self.params.get('fixup')
2784 vid = info_dict['id']
2785
2786 if fixup_policy in ('ignore', 'never'):
2787 return
2788 elif fixup_policy == 'warn':
2789 do_fixup = False
f89b3e2d 2790 elif fixup_policy != 'force':
2791 assert fixup_policy in ('detect_or_warn', None)
2792 if not info_dict.get('__real_download'):
2793 do_fixup = False
fd7cfb64 2794
2795 def ffmpeg_fixup(cndn, msg, cls):
2796 if not cndn:
2797 return
2798 if not do_fixup:
2799 self.report_warning(f'{vid}: {msg}')
2800 return
2801 pp = cls(self)
2802 if pp.available:
2803 info_dict['__postprocessors'].append(pp)
2804 else:
2805 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2806
2807 stretched_ratio = info_dict.get('stretched_ratio')
2808 ffmpeg_fixup(
2809 stretched_ratio not in (1, None),
2810 f'Non-uniform pixel ratio {stretched_ratio}',
2811 FFmpegFixupStretchedPP)
2812
2813 ffmpeg_fixup(
2814 (info_dict.get('requested_formats') is None
2815 and info_dict.get('container') == 'm4a_dash'
2816 and info_dict.get('ext') == 'm4a'),
2817 'writing DASH m4a. Only some players support this container',
2818 FFmpegFixupM4aPP)
2819
2820 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2821 if 'protocol' in info_dict else None)
2822 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2823 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2824 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2825
2826 fixup()
8222d8de 2827 try:
23c1a667 2828 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2829 except PostProcessingError as err:
2830 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2831 return
ab8e5e51
AM
2832 try:
2833 for ph in self._post_hooks:
23c1a667 2834 ph(info_dict['filepath'])
ab8e5e51
AM
2835 except Exception as err:
2836 self.report_error('post hooks: %s' % str(err))
2837 return
2d30509f 2838 must_record_download_archive = True
2839
2840 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2841 self.record_download_archive(info_dict)
c3e6ffba 2842 max_downloads = self.params.get('max_downloads')
2843 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2844 raise MaxDownloadsReached()
8222d8de
JMF
2845
2846 def download(self, url_list):
2847 """Download a given list of URLs."""
de6000d9 2848 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2849 if (len(url_list) > 1
2850 and outtmpl != '-'
2851 and '%' not in outtmpl
2852 and self.params.get('max_downloads') != 1):
acd69589 2853 raise SameFileError(outtmpl)
8222d8de
JMF
2854
2855 for url in url_list:
2856 try:
5f6a1245 2857 # It also downloads the videos
61aa5ba3
S
2858 res = self.extract_info(
2859 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2860 except UnavailableVideoError:
6febd1c1 2861 self.report_error('unable to download video')
8222d8de 2862 except MaxDownloadsReached:
8b0d7497 2863 self.to_screen('[info] Maximum number of downloaded files reached')
2864 raise
2865 except ExistingVideoReached:
d83cb531 2866 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2867 raise
2868 except RejectedVideoReached:
d83cb531 2869 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2870 raise
63e0be34
PH
2871 else:
2872 if self.params.get('dump_single_json', False):
277d6ff5 2873 self.post_extract(res)
6e84b215 2874 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2875
2876 return self._download_retcode
2877
1dcc4c0c 2878 def download_with_info_file(self, info_filename):
31bd3925
JMF
2879 with contextlib.closing(fileinput.FileInput(
2880 [info_filename], mode='r',
2881 openhook=fileinput.hook_encoded('utf-8'))) as f:
2882 # FileInput doesn't have a read method, we can't call json.load
8012d892 2883 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2884 try:
2885 self.process_ie_result(info, download=True)
d3f62c19 2886 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2887 webpage_url = info.get('webpage_url')
2888 if webpage_url is not None:
6febd1c1 2889 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2890 return self.download([webpage_url])
2891 else:
2892 raise
2893 return self._download_retcode
1dcc4c0c 2894
cb202fd2 2895 @staticmethod
8012d892 2896 def sanitize_info(info_dict, remove_private_keys=False):
2897 ''' Sanitize the infodict for converting to json '''
6e84b215 2898 info_dict.setdefault('epoch', int(time.time()))
2899 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2900 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2901 if remove_private_keys:
6e84b215 2902 remove_keys |= {
2903 'requested_formats', 'requested_subtitles', 'requested_entries',
2904 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2905 }
ae8f99e6 2906 empty_values = (None, {}, [], set(), tuple())
2907 reject = lambda k, v: k not in keep_keys and (
2908 k.startswith('_') or k in remove_keys or v in empty_values)
2909 else:
ae8f99e6 2910 reject = lambda k, v: k in remove_keys
5226731e 2911 filter_fn = lambda obj: (
b0249bca 2912 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2913 else obj if not isinstance(obj, dict)
ae8f99e6 2914 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2915 return filter_fn(info_dict)
cb202fd2 2916
8012d892 2917 @staticmethod
2918 def filter_requested_info(info_dict, actually_filter=True):
2919 ''' Alias of sanitize_info for backward compatibility '''
2920 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2921
dcf64d43 2922 def run_pp(self, pp, infodict):
5bfa4862 2923 files_to_delete = []
dcf64d43 2924 if '__files_to_move' not in infodict:
2925 infodict['__files_to_move'] = {}
af819c21 2926 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2927 if not files_to_delete:
dcf64d43 2928 return infodict
5bfa4862 2929
2930 if self.params.get('keepvideo', False):
2931 for f in files_to_delete:
dcf64d43 2932 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2933 else:
2934 for old_filename in set(files_to_delete):
2935 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2936 try:
2937 os.remove(encodeFilename(old_filename))
2938 except (IOError, OSError):
2939 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2940 if old_filename in infodict['__files_to_move']:
2941 del infodict['__files_to_move'][old_filename]
2942 return infodict
5bfa4862 2943
277d6ff5 2944 @staticmethod
2945 def post_extract(info_dict):
2946 def actual_post_extract(info_dict):
2947 if info_dict.get('_type') in ('playlist', 'multi_video'):
2948 for video_dict in info_dict.get('entries', {}):
b050d210 2949 actual_post_extract(video_dict or {})
277d6ff5 2950 return
2951
07cce701 2952 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2953 extra = post_extractor().items()
2954 info_dict.update(extra)
07cce701 2955 info_dict.pop('__post_extractor', None)
277d6ff5 2956
4ec82a72 2957 original_infodict = info_dict.get('__original_infodict') or {}
2958 original_infodict.update(extra)
2959 original_infodict.pop('__post_extractor', None)
2960
b050d210 2961 actual_post_extract(info_dict or {})
277d6ff5 2962
56d868db 2963 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2964 info = dict(ie_info)
56d868db 2965 info['__files_to_move'] = files_to_move or {}
2966 for pp in self._pps[key]:
dcf64d43 2967 info = self.run_pp(pp, info)
56d868db 2968 return info, info.pop('__files_to_move', None)
5bfa4862 2969
dcf64d43 2970 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2971 """Run all the postprocessors on the given file."""
2972 info = dict(ie_info)
2973 info['filepath'] = filename
dcf64d43 2974 info['__files_to_move'] = files_to_move or {}
0202b52a 2975
56d868db 2976 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2977 info = self.run_pp(pp, info)
2978 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2979 del info['__files_to_move']
56d868db 2980 for pp in self._pps['after_move']:
dcf64d43 2981 info = self.run_pp(pp, info)
23c1a667 2982 return info
c1c9a79c 2983
5db07df6 2984 def _make_archive_id(self, info_dict):
e9fef7ee
S
2985 video_id = info_dict.get('id')
2986 if not video_id:
2987 return
5db07df6
PH
2988 # Future-proof against any change in case
2989 # and backwards compatibility with prior versions
e9fef7ee 2990 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2991 if extractor is None:
1211bb6d
S
2992 url = str_or_none(info_dict.get('url'))
2993 if not url:
2994 return
e9fef7ee
S
2995 # Try to find matching extractor for the URL and take its ie_key
2996 for ie in self._ies:
1211bb6d 2997 if ie.suitable(url):
e9fef7ee
S
2998 extractor = ie.ie_key()
2999 break
3000 else:
3001 return
d0757229 3002 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3003
3004 def in_download_archive(self, info_dict):
3005 fn = self.params.get('download_archive')
3006 if fn is None:
3007 return False
3008
3009 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3010 if not vid_id:
7012b23c 3011 return False # Incomplete video information
5db07df6 3012
a45e8619 3013 return vid_id in self.archive
c1c9a79c
PH
3014
3015 def record_download_archive(self, info_dict):
3016 fn = self.params.get('download_archive')
3017 if fn is None:
3018 return
5db07df6
PH
3019 vid_id = self._make_archive_id(info_dict)
3020 assert vid_id
c1c9a79c 3021 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3022 archive_file.write(vid_id + '\n')
a45e8619 3023 self.archive.add(vid_id)
dd82ffea 3024
8c51aa65 3025 @staticmethod
8abeeb94 3026 def format_resolution(format, default='unknown'):
fb04e403 3027 if format.get('vcodec') == 'none':
8326b00a 3028 if format.get('acodec') == 'none':
3029 return 'images'
fb04e403 3030 return 'audio only'
f49d89ee
PH
3031 if format.get('resolution') is not None:
3032 return format['resolution']
35615307
DA
3033 if format.get('width') and format.get('height'):
3034 res = '%dx%d' % (format['width'], format['height'])
3035 elif format.get('height'):
3036 res = '%sp' % format['height']
3037 elif format.get('width'):
388ae76b 3038 res = '%dx?' % format['width']
8c51aa65 3039 else:
8abeeb94 3040 res = default
8c51aa65
JMF
3041 return res
3042
c57f7757
PH
3043 def _format_note(self, fdict):
3044 res = ''
3045 if fdict.get('ext') in ['f4f', 'f4m']:
3046 res += '(unsupported) '
32f90364
PH
3047 if fdict.get('language'):
3048 if res:
3049 res += ' '
9016d76f 3050 res += '[%s] ' % fdict['language']
c57f7757
PH
3051 if fdict.get('format_note') is not None:
3052 res += fdict['format_note'] + ' '
3053 if fdict.get('tbr') is not None:
3054 res += '%4dk ' % fdict['tbr']
3055 if fdict.get('container') is not None:
3056 if res:
3057 res += ', '
3058 res += '%s container' % fdict['container']
3089bc74
S
3059 if (fdict.get('vcodec') is not None
3060 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3061 if res:
3062 res += ', '
3063 res += fdict['vcodec']
91c7271a 3064 if fdict.get('vbr') is not None:
c57f7757
PH
3065 res += '@'
3066 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3067 res += 'video@'
3068 if fdict.get('vbr') is not None:
3069 res += '%4dk' % fdict['vbr']
fbb21cf5 3070 if fdict.get('fps') is not None:
5d583bdf
S
3071 if res:
3072 res += ', '
3073 res += '%sfps' % fdict['fps']
c57f7757
PH
3074 if fdict.get('acodec') is not None:
3075 if res:
3076 res += ', '
3077 if fdict['acodec'] == 'none':
3078 res += 'video only'
3079 else:
3080 res += '%-5s' % fdict['acodec']
3081 elif fdict.get('abr') is not None:
3082 if res:
3083 res += ', '
3084 res += 'audio'
3085 if fdict.get('abr') is not None:
3086 res += '@%3dk' % fdict['abr']
3087 if fdict.get('asr') is not None:
3088 res += ' (%5dHz)' % fdict['asr']
3089 if fdict.get('filesize') is not None:
3090 if res:
3091 res += ', '
3092 res += format_bytes(fdict['filesize'])
9732d77e
PH
3093 elif fdict.get('filesize_approx') is not None:
3094 if res:
3095 res += ', '
3096 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3097 return res
91c7271a 3098
c57f7757 3099 def list_formats(self, info_dict):
94badb25 3100 formats = info_dict.get('formats', [info_dict])
53ed7066 3101 new_format = (
3102 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3103 and self.params.get('listformats_table', True) is not False)
76d321f6 3104 if new_format:
3105 table = [
3106 [
3107 format_field(f, 'format_id'),
3108 format_field(f, 'ext'),
3109 self.format_resolution(f),
3110 format_field(f, 'fps', '%d'),
3111 '|',
3112 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3113 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3114 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3115 '|',
3116 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3117 format_field(f, 'vbr', '%4dk'),
3118 format_field(f, 'acodec', default='unknown').replace('none', ''),
3119 format_field(f, 'abr', '%3dk'),
3120 format_field(f, 'asr', '%5dHz'),
3f698246 3121 ', '.join(filter(None, (
3122 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3123 format_field(f, 'language', '[%s]'),
3124 format_field(f, 'format_note'),
3125 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3126 ))),
3f698246 3127 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3128 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3129 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3130 else:
3131 table = [
3132 [
3133 format_field(f, 'format_id'),
3134 format_field(f, 'ext'),
3135 self.format_resolution(f),
3136 self._format_note(f)]
3137 for f in formats
3138 if f.get('preference') is None or f['preference'] >= -1000]
3139 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3140
cfb56d1a 3141 self.to_screen(
169dbde9 3142 '[info] Available formats for %s:' % info_dict['id'])
3143 self.to_stdout(render_table(
bc97cdae 3144 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3145
3146 def list_thumbnails(self, info_dict):
b0249bca 3147 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3148 if not thumbnails:
b7b72db9 3149 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3150 return
cfb56d1a
PH
3151
3152 self.to_screen(
3153 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3154 self.to_stdout(render_table(
cfb56d1a
PH
3155 ['ID', 'width', 'height', 'URL'],
3156 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3157
360e1ca5 3158 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3159 if not subtitles:
360e1ca5 3160 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3161 return
a504ced0 3162 self.to_screen(
edab9dbf 3163 'Available %s for %s:' % (name, video_id))
2412044c 3164
3165 def _row(lang, formats):
49c258e1 3166 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3167 if len(set(names)) == 1:
7aee40c1 3168 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3169 return [lang, ', '.join(names), ', '.join(exts)]
3170
169dbde9 3171 self.to_stdout(render_table(
2412044c 3172 ['Language', 'Name', 'Formats'],
3173 [_row(lang, formats) for lang, formats in subtitles.items()],
3174 hideEmpty=True))
a504ced0 3175
dca08720
PH
3176 def urlopen(self, req):
3177 """ Start an HTTP download """
82d8a8b6 3178 if isinstance(req, compat_basestring):
67dda517 3179 req = sanitized_Request(req)
19a41fc6 3180 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3181
3182 def print_debug_header(self):
3183 if not self.params.get('verbose'):
3184 return
62fec3b2 3185
4192b51c 3186 if type('') is not compat_str:
067aa17e 3187 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3188 self.report_warning(
3189 'Your Python is broken! Update to a newer and supported version')
3190
c6afed48
PH
3191 stdout_encoding = getattr(
3192 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3193 encoding_str = (
734f90bb
PH
3194 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3195 locale.getpreferredencoding(),
3196 sys.getfilesystemencoding(),
c6afed48 3197 stdout_encoding,
b0472057 3198 self.get_encoding()))
4192b51c 3199 write_string(encoding_str, encoding=None)
734f90bb 3200
e5813e53 3201 source = (
3202 '(exe)' if hasattr(sys, 'frozen')
3203 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3204 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3205 else '')
3206 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3207 if _LAZY_LOADER:
f74980cb 3208 self._write_string('[debug] Lazy loading extractors enabled\n')
3209 if _PLUGIN_CLASSES:
3210 self._write_string(
3211 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3212 if self.params.get('compat_opts'):
3213 self._write_string(
3214 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3215 try:
3216 sp = subprocess.Popen(
3217 ['git', 'rev-parse', '--short', 'HEAD'],
3218 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3219 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3220 out, err = process_communicate_or_kill(sp)
dca08720
PH
3221 out = out.decode().strip()
3222 if re.match('[0-9a-f]+', out):
f74980cb 3223 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3224 except Exception:
dca08720
PH
3225 try:
3226 sys.exc_clear()
70a1165b 3227 except Exception:
dca08720 3228 pass
b300cda4
S
3229
3230 def python_implementation():
3231 impl_name = platform.python_implementation()
3232 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3233 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3234 return impl_name
3235
e5813e53 3236 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3237 platform.python_version(),
3238 python_implementation(),
3239 platform.architecture()[0],
b300cda4 3240 platform_name()))
d28b5171 3241
73fac4e9 3242 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3243 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3244 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3245 exe_str = ', '.join(
3246 '%s %s' % (exe, v)
3247 for exe, v in sorted(exe_versions.items())
3248 if v
3249 )
3250 if not exe_str:
3251 exe_str = 'none'
3252 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3253
3254 proxy_map = {}
3255 for handler in self._opener.handlers:
3256 if hasattr(handler, 'proxies'):
3257 proxy_map.update(handler.proxies)
734f90bb 3258 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3259
58b1f00d
PH
3260 if self.params.get('call_home', False):
3261 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3262 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3263 return
58b1f00d
PH
3264 latest_version = self.urlopen(
3265 'https://yt-dl.org/latest/version').read().decode('utf-8')
3266 if version_tuple(latest_version) > version_tuple(__version__):
3267 self.report_warning(
3268 'You are using an outdated version (newest version: %s)! '
3269 'See https://yt-dl.org/update if you need help updating.' %
3270 latest_version)
3271
e344693b 3272 def _setup_opener(self):
6ad14cab 3273 timeout_val = self.params.get('socket_timeout')
19a41fc6 3274 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3275
982ee69a 3276 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3277 opts_cookiefile = self.params.get('cookiefile')
3278 opts_proxy = self.params.get('proxy')
3279
982ee69a 3280 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3281
6a3f4c3f 3282 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3283 if opts_proxy is not None:
3284 if opts_proxy == '':
3285 proxies = {}
3286 else:
3287 proxies = {'http': opts_proxy, 'https': opts_proxy}
3288 else:
3289 proxies = compat_urllib_request.getproxies()
067aa17e 3290 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3291 if 'http' in proxies and 'https' not in proxies:
3292 proxies['https'] = proxies['http']
91410c9b 3293 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3294
3295 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3296 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3297 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3298 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3299 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3300
3301 # When passing our own FileHandler instance, build_opener won't add the
3302 # default FileHandler and allows us to disable the file protocol, which
3303 # can be used for malicious purposes (see
067aa17e 3304 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3305 file_handler = compat_urllib_request.FileHandler()
3306
3307 def file_open(*args, **kwargs):
7a5c1cfe 3308 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3309 file_handler.file_open = file_open
3310
3311 opener = compat_urllib_request.build_opener(
fca6dba8 3312 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3313
dca08720
PH
3314 # Delete the default user-agent header, which would otherwise apply in
3315 # cases where our custom HTTP handler doesn't come into play
067aa17e 3316 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3317 opener.addheaders = []
3318 self._opener = opener
62fec3b2
PH
3319
3320 def encode(self, s):
3321 if isinstance(s, bytes):
3322 return s # Already encoded
3323
3324 try:
3325 return s.encode(self.get_encoding())
3326 except UnicodeEncodeError as err:
3327 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3328 raise
3329
3330 def get_encoding(self):
3331 encoding = self.params.get('encoding')
3332 if encoding is None:
3333 encoding = preferredencoding()
3334 return encoding
ec82d85a 3335
de6000d9 3336 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3337 write_all = self.params.get('write_all_thumbnails', False)
3338 thumbnails = []
3339 if write_all or self.params.get('writethumbnail', False):
0202b52a 3340 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3341 multiple = write_all and len(thumbnails) > 1
ec82d85a 3342
0202b52a 3343 ret = []
981052c9 3344 for t in thumbnails[::-1]:
ec82d85a 3345 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3346 suffix = '%s.' % t['id'] if multiple else ''
3347 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3348 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3349
0c3d0f51 3350 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3351 ret.append(suffix + thumb_ext)
8ba87148 3352 t['filepath'] = thumb_filename
ec82d85a
PH
3353 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3354 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3355 else:
5ef7d9bd 3356 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3357 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3358 try:
3359 uf = self.urlopen(t['url'])
d3d89c32 3360 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3361 shutil.copyfileobj(uf, thumbf)
de6000d9 3362 ret.append(suffix + thumb_ext)
ec82d85a
PH
3363 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3364 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3365 t['filepath'] = thumb_filename
3158150c 3366 except network_exceptions as err:
ec82d85a 3367 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3368 (t['url'], error_to_compat_str(err)))
6c4fd172 3369 if ret and not write_all:
3370 break
0202b52a 3371 return ret