]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[tests:download] Add batch testing for extractors
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
7d1eb38a 38 compat_shlex_quote,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b 44)
982ee69a 45from .cookies import load_cookies
8c25f81b 46from .utils import (
eedb7ba5
S
47 age_restricted,
48 args_to_str,
ce02ed60
PH
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
acd69589 52 DEFAULT_OUTTMPL,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
498f5606 61 EntryNotInPlaylist,
a06916d9 62 error_to_compat_str,
8b0d7497 63 ExistingVideoReached,
590bc6f6 64 expand_path,
ce02ed60 65 ExtractorError,
e29663c6 66 float_or_none,
02dbf93f 67 format_bytes,
76d321f6 68 format_field,
901130bb 69 STR_FORMAT_RE_TMPL,
70 STR_FORMAT_TYPES,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
b0249bca 73 HEADRequest,
c9969434 74 int_or_none,
732044af 75 iri_to_uri,
773f291d 76 ISO3166Utils,
56a8fb4f 77 LazyList,
ce02ed60 78 locked_file,
0202b52a 79 make_dir,
dca08720 80 make_HTTPS_handler,
ce02ed60 81 MaxDownloadsReached,
3158150c 82 network_exceptions,
cd6fc19e 83 orderedSet,
a06916d9 84 OUTTMPL_TYPES,
b7ab0590 85 PagedList,
083c9df9 86 parse_filesize,
91410c9b 87 PerRequestProxyHandler,
dca08720 88 platform_name,
eedb7ba5 89 PostProcessingError,
ce02ed60 90 preferredencoding,
eedb7ba5 91 prepend_extension,
a06916d9 92 process_communicate_or_kill,
51fb4995 93 register_socks_protocols,
a06916d9 94 RejectedVideoReached,
cfb56d1a 95 render_table,
eedb7ba5 96 replace_extension,
ce02ed60
PH
97 SameFileError,
98 sanitize_filename,
1bb5c511 99 sanitize_path,
dcf77cf1 100 sanitize_url,
67dda517 101 sanitized_Request,
e5660ee6 102 std_headers,
1211bb6d 103 str_or_none,
e29663c6 104 strftime_or_none,
ce02ed60 105 subtitles_filename,
51d9739f 106 ThrottledDownload,
732044af 107 to_high_limit_path,
324ad820 108 traverse_obj,
6033d980 109 try_get,
ce02ed60 110 UnavailableVideoError,
29eb5174 111 url_basename,
7d1eb38a 112 variadic,
58b1f00d 113 version_tuple,
ce02ed60
PH
114 write_json_file,
115 write_string,
6a3f4c3f 116 YoutubeDLCookieProcessor,
dca08720 117 YoutubeDLHandler,
fca6dba8 118 YoutubeDLRedirectHandler,
ce02ed60 119)
a0e07d31 120from .cache import Cache
52a8a1e1 121from .extractor import (
122 gen_extractor_classes,
123 get_info_extractor,
124 _LAZY_LOADER,
125 _PLUGIN_CLASSES
126)
4c54b89e 127from .extractor.openload import PhantomJSwrapper
52a8a1e1 128from .downloader import (
dbf5416a 129 FFmpegFD,
52a8a1e1 130 get_suitable_downloader,
131 shorten_protocol_name
132)
4c83c967 133from .downloader.rtmp import rtmpdump_version
4f026faf 134from .postprocessor import (
e36d50c5 135 get_postprocessor,
136 FFmpegFixupDurationPP,
f17f8651 137 FFmpegFixupM3u8PP,
62cd676c 138 FFmpegFixupM4aPP,
6271f1ca 139 FFmpegFixupStretchedPP,
e36d50c5 140 FFmpegFixupTimestampPP,
4f026faf
PH
141 FFmpegMergerPP,
142 FFmpegPostProcessor,
0202b52a 143 MoveFilesAfterDownloadPP,
4f026faf 144)
dca08720 145from .version import __version__
8222d8de 146
e9c0cdd3
YCH
147if compat_os_name == 'nt':
148 import ctypes
149
2459b6e1 150
8222d8de
JMF
151class YoutubeDL(object):
152 """YoutubeDL class.
153
154 YoutubeDL objects are the ones responsible of downloading the
155 actual video file and writing it to disk if the user has requested
156 it, among some other tasks. In most cases there should be one per
157 program. As, given a video URL, the downloader doesn't know how to
158 extract all the needed information, task that InfoExtractors do, it
159 has to pass the URL to one of them.
160
161 For this, YoutubeDL objects have a method that allows
162 InfoExtractors to be registered in a given order. When it is passed
163 a URL, the YoutubeDL object handles it to the first InfoExtractor it
164 finds that reports being able to handle it. The InfoExtractor extracts
165 all the information about the video or videos the URL refers to, and
166 YoutubeDL process the extracted information, possibly using a File
167 Downloader to download the video.
168
169 YoutubeDL objects accept a lot of parameters. In order not to saturate
170 the object constructor with arguments, it receives a dictionary of
171 options instead. These options are available through the params
172 attribute for the InfoExtractors to use. The YoutubeDL also
173 registers itself as the downloader in charge for the InfoExtractors
174 that are added to it, so this is a "mutual registration".
175
176 Available options:
177
178 username: Username for authentication purposes.
179 password: Password for authentication purposes.
180940e0 180 videopassword: Password for accessing a video.
1da50aa3
S
181 ap_mso: Adobe Pass multiple-system operator identifier.
182 ap_username: Multiple-system operator account username.
183 ap_password: Multiple-system operator account password.
8222d8de
JMF
184 usenetrc: Use netrc for authentication instead.
185 verbose: Print additional info to stdout.
186 quiet: Do not print messages to stdout.
ad8915b7 187 no_warnings: Do not print out anything for warnings.
53c18592 188 forceprint: A list of templates to force print
189 forceurl: Force printing final URL. (Deprecated)
190 forcetitle: Force printing title. (Deprecated)
191 forceid: Force printing ID. (Deprecated)
192 forcethumbnail: Force printing thumbnail URL. (Deprecated)
193 forcedescription: Force printing description. (Deprecated)
194 forcefilename: Force printing final filename. (Deprecated)
195 forceduration: Force printing duration. (Deprecated)
8694c600 196 forcejson: Force printing info_dict as JSON.
63e0be34
PH
197 dump_single_json: Force printing the info_dict of the whole playlist
198 (or video) as a single JSON line.
c25228e5 199 force_write_download_archive: Force writing download archive regardless
200 of 'skip_download' or 'simulate'.
b7b04c78 201 simulate: Do not download the video files. If unset (or None),
202 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 203 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
c25228e5 208 format_sort: How to sort the video formats. see "Sorting Formats"
209 for more details.
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
211 for more details.
212 allow_multiple_video_streams: Allow multiple video streams to be merged
213 into a single file
214 allow_multiple_audio_streams: Allow multiple audio streams to be merged
215 into a single file
0ba692ac 216 check_formats Whether to test if the formats are downloadable.
217 Can be True (check all), False (check none)
218 or None (check only if requested by extractor)
4524baf0 219 paths: Dictionary of output paths. The allowed keys are 'home'
220 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 221 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 222 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
223 A string a also accepted for backward compatibility
a820dc72
RA
224 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
225 restrictfilenames: Do not allow "&" and spaces in file names
226 trim_file_name: Limit length of filename (extension excluded)
4524baf0 227 windowsfilenames: Force the filenames to be windows compatible
a820dc72 228 ignoreerrors: Do not stop on download errors
7a5c1cfe 229 (Default True when running yt-dlp,
a820dc72 230 but False when directly accessing YoutubeDL class)
26e2805c 231 skip_playlist_after_errors: Number of allowed failures until the rest of
232 the playlist is skipped
d22dec74 233 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 234 overwrites: Overwrite all video and metadata files if True,
235 overwrite only non-video files if None
236 and don't overwrite any file if False
8222d8de
JMF
237 playliststart: Playlist item to start at.
238 playlistend: Playlist item to end at.
c14e88f0 239 playlist_items: Specific indices of playlist to download.
ff815fe6 240 playlistreverse: Download playlist items in reverse order.
75822ca7 241 playlistrandom: Download playlist items in random order.
8222d8de
JMF
242 matchtitle: Download only matching titles.
243 rejecttitle: Reject downloads for matching titles.
8bf9319e 244 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
245 logtostderr: Log messages to stderr instead of stdout.
246 writedescription: Write the video description to a .description file
247 writeinfojson: Write the video description to a .info.json file
75d43ca0 248 clean_infojson: Remove private fields from the infojson
06167fbb 249 writecomments: Extract video comments. This will not be written to disk
250 unless writeinfojson is also given
1fb07d10 251 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 252 writethumbnail: Write the thumbnail image to a file
c25228e5 253 allow_playlist_files: Whether to write playlists' description, infojson etc
254 also to disk when using the 'write*' options
ec82d85a 255 write_all_thumbnails: Write all thumbnail formats to files
732044af 256 writelink: Write an internet shortcut file, depending on the
257 current platform (.url/.webloc/.desktop)
258 writeurllink: Write a Windows internet shortcut file (.url)
259 writewebloclink: Write a macOS internet shortcut file (.webloc)
260 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 261 writesubtitles: Write the video subtitles to a file
741dd8ea 262 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 263 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 264 Downloads all the subtitles of the video
0b7f3118 265 (requires writesubtitles or writeautomaticsub)
8222d8de 266 listsubtitles: Lists all available subtitles for the video
a504ced0 267 subtitlesformat: The format code for subtitles
c32b0aab 268 subtitleslangs: List of languages of the subtitles to download (can be regex).
269 The list may contain "all" to refer to all the available
270 subtitles. The language can be prefixed with a "-" to
271 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
272 keepvideo: Keep the video file after post-processing
273 daterange: A DateRange object, download only if the upload_date is in the range.
274 skip_download: Skip the actual download of the video file
c35f9e72 275 cachedir: Location of the cache files in the filesystem.
a0e07d31 276 False to disable filesystem cache.
47192f92 277 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
278 age_limit: An integer representing the user's age in years.
279 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
280 min_views: An integer representing the minimum view count the video
281 must have in order to not be skipped.
282 Videos without view count information are always
283 downloaded. None for no limit.
284 max_views: An integer representing the maximum view count.
285 Videos that are more popular than that are not
286 downloaded.
287 Videos without view count information are always
288 downloaded. None for no limit.
289 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
290 Videos already present in the file are not downloaded
291 again.
8a51f564 292 break_on_existing: Stop the download process after attempting to download a
293 file that is in the archive.
294 break_on_reject: Stop the download process when encountering a video that
295 has been filtered out.
296 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
297 cookiesfrombrowser: A tuple containing the name of the browser and the profile
298 name/path from where cookies are loaded.
299 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 300 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
301 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
302 At the moment, this is only supported by YouTube.
a1ee09e8 303 proxy: URL of the proxy server to use
38cce791 304 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 305 on geo-restricted sites.
e344693b 306 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
307 bidi_workaround: Work around buggy terminals without bidirectional text
308 support, using fridibi
a0ddb8a2 309 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 310 include_ads: Download ads as well
04b4d394
PH
311 default_search: Prepend this string if an input url is not valid.
312 'auto' for elaborate guessing
62fec3b2 313 encoding: Use this encoding instead of the system-specified.
e8ee972c 314 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
315 Pass in 'in_playlist' to only show this behavior for
316 playlist items.
4f026faf 317 postprocessors: A list of dictionaries, each with an entry
71b640cc 318 * key: The name of the postprocessor. See
7a5c1cfe 319 yt_dlp/postprocessor/__init__.py for a list.
56d868db 320 * when: When to run the postprocessor. Can be one of
321 pre_process|before_dl|post_process|after_move.
322 Assumed to be 'post_process' if not given
ab8e5e51
AM
323 post_hooks: A list of functions that get called as the final step
324 for each video file, after all postprocessors have been
325 called. The filename will be passed as the only argument.
71b640cc
PH
326 progress_hooks: A list of functions that get called on download
327 progress, with a dictionary with the entries
5cda4eda 328 * status: One of "downloading", "error", or "finished".
ee69b99a 329 Check this first and ignore unknown values.
3ba7740d 330 * info_dict: The extracted info_dict
71b640cc 331
5cda4eda 332 If status is one of "downloading", or "finished", the
ee69b99a
PH
333 following properties may also be present:
334 * filename: The final filename (always present)
5cda4eda 335 * tmpfilename: The filename we're currently writing to
71b640cc
PH
336 * downloaded_bytes: Bytes on disk
337 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
338 * total_bytes_estimate: Guess of the eventual file size,
339 None if unavailable.
340 * elapsed: The number of seconds since download started.
71b640cc
PH
341 * eta: The estimated time in seconds, None if unknown
342 * speed: The download speed in bytes/second, None if
343 unknown
5cda4eda
PH
344 * fragment_index: The counter of the currently
345 downloaded video fragment.
346 * fragment_count: The number of fragments (= individual
347 files that will be merged)
71b640cc
PH
348
349 Progress hooks are guaranteed to be called at least once
350 (with status "finished") if the download is successful.
45598f15 351 merge_output_format: Extension to use when merging formats.
6b591b29 352 final_ext: Expected final extension; used to detect when the file was
353 already downloaded and converted. "merge_output_format" is
354 replaced by this extension when given
6271f1ca
PH
355 fixup: Automatically correct known faults of the file.
356 One of:
357 - "never": do nothing
358 - "warn": only emit a warning
359 - "detect_or_warn": check whether we can do anything
62cd676c 360 about it, warn otherwise (default)
504f20dd 361 source_address: Client-side IP address to bind to.
6ec6cb4e 362 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 363 yt-dlp servers for debugging. (BROKEN)
1cf376f5 364 sleep_interval_requests: Number of seconds to sleep between requests
365 during extraction
7aa589a5
S
366 sleep_interval: Number of seconds to sleep before each download when
367 used alone or a lower bound of a range for randomized
368 sleep before each download (minimum possible number
369 of seconds to sleep) when used along with
370 max_sleep_interval.
371 max_sleep_interval:Upper bound of a range for randomized sleep before each
372 download (maximum possible number of seconds to sleep).
373 Must only be used along with sleep_interval.
374 Actual sleep time will be a random float from range
375 [sleep_interval; max_sleep_interval].
1cf376f5 376 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
377 listformats: Print an overview of available video formats and exit.
378 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
379 match_filter: A function that gets called with the info_dict of
380 every video.
381 If it returns a message, the video is ignored.
382 If it returns None, the video is downloaded.
383 match_filter_func in utils.py is one example for this.
7e5db8c9 384 no_color: Do not emit color codes in output.
0a840f58 385 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 386 HTTP header
0a840f58 387 geo_bypass_country:
773f291d
S
388 Two-letter ISO 3166-2 country code that will be used for
389 explicit geographic restriction bypassing via faking
504f20dd 390 X-Forwarded-For HTTP header
5f95927a
S
391 geo_bypass_ip_block:
392 IP range in CIDR notation that will be used similarly to
504f20dd 393 geo_bypass_country
71b640cc 394
85729c51 395 The following options determine which downloader is picked:
52a8a1e1 396 external_downloader: A dictionary of protocol keys and the executable of the
397 external downloader to use for it. The allowed protocols
398 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
399 Set the value to 'native' to use the native downloader
400 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
401 or {'m3u8': 'ffmpeg'} instead.
402 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
403 if True, otherwise use ffmpeg/avconv if False, otherwise
404 use downloader suggested by extractor if None.
53ed7066 405 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 406 The following options do not work when used through the API:
407 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 408 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 409 Refer __init__.py for their implementation
fe7e0c98 410
8222d8de 411 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 412 the downloader (see yt_dlp/downloader/common.py):
51d9739f 413 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
414 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
415 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
416
417 The following options are used by the post processors:
d4a24f40 418 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 419 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
420 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
421 to the binary or its containing directory.
43820c03 422 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
423 and a list of additional command-line arguments for the
424 postprocessor/executable. The dict can also have "PP+EXE" keys
425 which are used when the given exe is used by the given PP.
426 Use 'default' as the name for arguments to passed to all PP
e409895f 427
428 The following options are used by the extractors:
62bff2c1 429 extractor_retries: Number of times to retry for known errors
430 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 431 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 432 discontinuities such as ad breaks (default: False)
5d3a0e79 433 extractor_args: A dictionary of arguments to be passed to the extractors.
434 See "EXTRACTOR ARGUMENTS" for details.
435 Eg: {'youtube': {'skip': ['dash', 'hls']}}
436 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
437 If True (default), DASH manifests and related
62bff2c1 438 data will be downloaded and processed by extractor.
439 You can reduce network I/O by disabling it if you don't
440 care about DASH. (only for youtube)
5d3a0e79 441 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
442 If True (default), HLS manifests and related
62bff2c1 443 data will be downloaded and processed by extractor.
444 You can reduce network I/O by disabling it if you don't
445 care about HLS. (only for youtube)
8222d8de
JMF
446 """
447
c9969434
S
448 _NUMERIC_FIELDS = set((
449 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
450 'timestamp', 'upload_year', 'upload_month', 'upload_day',
451 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
452 'average_rating', 'comment_count', 'age_limit',
453 'start_time', 'end_time',
454 'chapter_number', 'season_number', 'episode_number',
455 'track_number', 'disc_number', 'release_year',
456 'playlist_index',
457 ))
458
8222d8de
JMF
459 params = None
460 _ies = []
56d868db 461 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 462 _printed_messages = set()
1cf376f5 463 _first_webpage_request = True
8222d8de
JMF
464 _download_retcode = None
465 _num_downloads = None
30a074c2 466 _playlist_level = 0
467 _playlist_urls = set()
8222d8de
JMF
468 _screen_file = None
469
3511266b 470 def __init__(self, params=None, auto_init=True):
8222d8de 471 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
472 if params is None:
473 params = {}
8222d8de 474 self._ies = []
56c73665 475 self._ies_instances = {}
56d868db 476 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 477 self._printed_messages = set()
1cf376f5 478 self._first_webpage_request = True
ab8e5e51 479 self._post_hooks = []
933605d7 480 self._progress_hooks = []
8222d8de
JMF
481 self._download_retcode = 0
482 self._num_downloads = 0
483 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 484 self._err_file = sys.stderr
4abf617b
S
485 self.params = {
486 # Default parameters
487 'nocheckcertificate': False,
488 }
489 self.params.update(params)
a0e07d31 490 self.cache = Cache(self)
34308b30 491
a61f4b28 492 if sys.version_info < (3, 6):
493 self.report_warning(
0181adef 494 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 495
be5df5ee
S
496 def check_deprecated(param, option, suggestion):
497 if self.params.get(param) is not None:
53ed7066 498 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
499 return True
500 return False
501
502 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
503 if self.params.get('geo_verification_proxy') is None:
504 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
505
0d1bb027 506 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
507 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 508 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 509
510 for msg in self.params.get('warnings', []):
511 self.report_warning(msg)
512
6b591b29 513 if self.params.get('final_ext'):
514 if self.params.get('merge_output_format'):
515 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
516 self.params['merge_output_format'] = self.params['final_ext']
517
b9d973be 518 if 'overwrites' in self.params and self.params['overwrites'] is None:
519 del self.params['overwrites']
520
0783b09b 521 if params.get('bidi_workaround', False):
1c088fa8
PH
522 try:
523 import pty
524 master, slave = pty.openpty()
003c69a8 525 width = compat_get_terminal_size().columns
1c088fa8
PH
526 if width is None:
527 width_args = []
528 else:
529 width_args = ['-w', str(width)]
5d681e96 530 sp_kwargs = dict(
1c088fa8
PH
531 stdin=subprocess.PIPE,
532 stdout=slave,
533 stderr=self._err_file)
5d681e96
PH
534 try:
535 self._output_process = subprocess.Popen(
536 ['bidiv'] + width_args, **sp_kwargs
537 )
538 except OSError:
5d681e96
PH
539 self._output_process = subprocess.Popen(
540 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
541 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 542 except OSError as ose:
66e7ace1 543 if ose.errno == errno.ENOENT:
6febd1c1 544 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
545 else:
546 raise
0783b09b 547
3089bc74
S
548 if (sys.platform != 'win32'
549 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
550 and not params.get('restrictfilenames', False)):
e9137224 551 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 552 self.report_warning(
6febd1c1 553 'Assuming --restrict-filenames since file system encoding '
1b725173 554 'cannot encode all characters. '
6febd1c1 555 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 556 self.params['restrictfilenames'] = True
34308b30 557
de6000d9 558 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 559
187986a8 560 # Creating format selector here allows us to catch syntax errors before the extraction
561 self.format_selector = (
562 None if self.params.get('format') is None
563 else self.build_format_selector(self.params['format']))
564
dca08720
PH
565 self._setup_opener()
566
4cd0a709 567 """Preload the archive, if any is specified"""
568 def preload_download_archive(fn):
569 if fn is None:
570 return False
0760b0a7 571 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 572 try:
573 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
574 for line in archive_file:
575 self.archive.add(line.strip())
576 except IOError as ioe:
577 if ioe.errno != errno.ENOENT:
578 raise
579 return False
580 return True
581
582 self.archive = set()
583 preload_download_archive(self.params.get('download_archive'))
584
3511266b
PH
585 if auto_init:
586 self.print_debug_header()
587 self.add_default_info_extractors()
588
4f026faf 589 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 590 pp_def = dict(pp_def_raw)
fd7cfb64 591 when = pp_def.pop('when', 'post_process')
592 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 593 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 594 self.add_post_processor(pp, when=when)
4f026faf 595
ab8e5e51
AM
596 for ph in self.params.get('post_hooks', []):
597 self.add_post_hook(ph)
598
71b640cc
PH
599 for ph in self.params.get('progress_hooks', []):
600 self.add_progress_hook(ph)
601
51fb4995
YCH
602 register_socks_protocols()
603
7d4111ed
PH
604 def warn_if_short_id(self, argv):
605 # short YouTube ID starting with dash?
606 idxs = [
607 i for i, a in enumerate(argv)
608 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
609 if idxs:
610 correct_argv = (
7a5c1cfe 611 ['yt-dlp']
3089bc74
S
612 + [a for i, a in enumerate(argv) if i not in idxs]
613 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
614 )
615 self.report_warning(
616 'Long argument string detected. '
617 'Use -- to separate parameters and URLs, like this:\n%s\n' %
618 args_to_str(correct_argv))
619
8222d8de
JMF
620 def add_info_extractor(self, ie):
621 """Add an InfoExtractor object to the end of the list."""
622 self._ies.append(ie)
e52d7f85
JMF
623 if not isinstance(ie, type):
624 self._ies_instances[ie.ie_key()] = ie
625 ie.set_downloader(self)
8222d8de 626
56c73665
JMF
627 def get_info_extractor(self, ie_key):
628 """
629 Get an instance of an IE with name ie_key, it will try to get one from
630 the _ies list, if there's no instance it will create a new one and add
631 it to the extractor list.
632 """
633 ie = self._ies_instances.get(ie_key)
634 if ie is None:
635 ie = get_info_extractor(ie_key)()
636 self.add_info_extractor(ie)
637 return ie
638
023fa8c4
JMF
639 def add_default_info_extractors(self):
640 """
641 Add the InfoExtractors returned by gen_extractors to the end of the list
642 """
e52d7f85 643 for ie in gen_extractor_classes():
023fa8c4
JMF
644 self.add_info_extractor(ie)
645
56d868db 646 def add_post_processor(self, pp, when='post_process'):
8222d8de 647 """Add a PostProcessor object to the end of the chain."""
5bfa4862 648 self._pps[when].append(pp)
8222d8de
JMF
649 pp.set_downloader(self)
650
ab8e5e51
AM
651 def add_post_hook(self, ph):
652 """Add the post hook"""
653 self._post_hooks.append(ph)
654
933605d7
JMF
655 def add_progress_hook(self, ph):
656 """Add the progress hook (currently only for the file downloader)"""
657 self._progress_hooks.append(ph)
8ab470f1 658
1c088fa8 659 def _bidi_workaround(self, message):
5d681e96 660 if not hasattr(self, '_output_channel'):
1c088fa8
PH
661 return message
662
5d681e96 663 assert hasattr(self, '_output_process')
11b85ce6 664 assert isinstance(message, compat_str)
6febd1c1
PH
665 line_count = message.count('\n') + 1
666 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 667 self._output_process.stdin.flush()
6febd1c1 668 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 669 for _ in range(line_count))
6febd1c1 670 return res[:-len('\n')]
1c088fa8 671
b35496d8 672 def _write_string(self, message, out=None, only_once=False):
673 if only_once:
674 if message in self._printed_messages:
675 return
676 self._printed_messages.add(message)
677 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 678
848887eb 679 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 680 """Print message to stdout"""
8bf9319e 681 if self.params.get('logger'):
43afe285 682 self.params['logger'].debug(message)
835a1478 683 elif not quiet or self.params.get('verbose'):
684 self._write_string(
685 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
686 self._err_file if quiet else self._screen_file)
8222d8de 687
b35496d8 688 def to_stderr(self, message, only_once=False):
0760b0a7 689 """Print message to stderr"""
11b85ce6 690 assert isinstance(message, compat_str)
8bf9319e 691 if self.params.get('logger'):
43afe285
IB
692 self.params['logger'].error(message)
693 else:
b35496d8 694 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 695
1e5b9a95
PH
696 def to_console_title(self, message):
697 if not self.params.get('consoletitle', False):
698 return
4bede0d8
C
699 if compat_os_name == 'nt':
700 if ctypes.windll.kernel32.GetConsoleWindow():
701 # c_wchar_p() might not be necessary if `message` is
702 # already of type unicode()
703 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 704 elif 'TERM' in os.environ:
b46696bd 705 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 706
bdde425c
PH
707 def save_console_title(self):
708 if not self.params.get('consoletitle', False):
709 return
b7b04c78 710 if self.params.get('simulate'):
94c3442e 711 return
4bede0d8 712 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 713 # Save the title on stack
734f90bb 714 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
715
716 def restore_console_title(self):
717 if not self.params.get('consoletitle', False):
718 return
b7b04c78 719 if self.params.get('simulate'):
94c3442e 720 return
4bede0d8 721 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 722 # Restore the title from stack
734f90bb 723 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
724
725 def __enter__(self):
726 self.save_console_title()
727 return self
728
729 def __exit__(self, *args):
730 self.restore_console_title()
f89197d7 731
dca08720 732 if self.params.get('cookiefile') is not None:
1bab3437 733 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 734
8222d8de
JMF
735 def trouble(self, message=None, tb=None):
736 """Determine action to take when a download problem appears.
737
738 Depending on if the downloader has been configured to ignore
739 download errors or not, this method may throw an exception or
740 not when errors are found, after printing the message.
741
742 tb, if given, is additional traceback information.
743 """
744 if message is not None:
745 self.to_stderr(message)
746 if self.params.get('verbose'):
747 if tb is None:
748 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 749 tb = ''
8222d8de 750 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 751 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 752 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
753 else:
754 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 755 tb = ''.join(tb_data)
c19bc311 756 if tb:
757 self.to_stderr(tb)
8222d8de
JMF
758 if not self.params.get('ignoreerrors', False):
759 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
760 exc_info = sys.exc_info()[1].exc_info
761 else:
762 exc_info = sys.exc_info()
763 raise DownloadError(message, exc_info)
764 self._download_retcode = 1
765
0760b0a7 766 def to_screen(self, message, skip_eol=False):
767 """Print message to stdout if not in quiet mode"""
768 self.to_stdout(
769 message, skip_eol, quiet=self.params.get('quiet', False))
770
c84aeac6 771 def report_warning(self, message, only_once=False):
8222d8de
JMF
772 '''
773 Print the message to stderr, it will be prefixed with 'WARNING:'
774 If stderr is a tty file the 'WARNING:' will be colored
775 '''
6d07ce01
JMF
776 if self.params.get('logger') is not None:
777 self.params['logger'].warning(message)
8222d8de 778 else:
ad8915b7
PH
779 if self.params.get('no_warnings'):
780 return
e9c0cdd3 781 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
782 _msg_header = '\033[0;33mWARNING:\033[0m'
783 else:
784 _msg_header = 'WARNING:'
785 warning_message = '%s %s' % (_msg_header, message)
b35496d8 786 self.to_stderr(warning_message, only_once)
8222d8de
JMF
787
788 def report_error(self, message, tb=None):
789 '''
790 Do the same as trouble, but prefixes the message with 'ERROR:', colored
791 in red if stderr is a tty file.
792 '''
e9c0cdd3 793 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 794 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 795 else:
6febd1c1
PH
796 _msg_header = 'ERROR:'
797 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
798 self.trouble(error_message, tb)
799
b35496d8 800 def write_debug(self, message, only_once=False):
0760b0a7 801 '''Log debug message or Print message to stderr'''
802 if not self.params.get('verbose', False):
803 return
804 message = '[debug] %s' % message
805 if self.params.get('logger'):
806 self.params['logger'].debug(message)
807 else:
b35496d8 808 self.to_stderr(message, only_once)
0760b0a7 809
8222d8de
JMF
810 def report_file_already_downloaded(self, file_name):
811 """Report file has already been fully downloaded."""
812 try:
6febd1c1 813 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 814 except UnicodeEncodeError:
6febd1c1 815 self.to_screen('[download] The file has already been downloaded')
8222d8de 816
0c3d0f51 817 def report_file_delete(self, file_name):
818 """Report that existing file will be deleted."""
819 try:
c25228e5 820 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 821 except UnicodeEncodeError:
c25228e5 822 self.to_screen('Deleting existing file')
0c3d0f51 823
de6000d9 824 def parse_outtmpl(self):
825 outtmpl_dict = self.params.get('outtmpl', {})
826 if not isinstance(outtmpl_dict, dict):
827 outtmpl_dict = {'default': outtmpl_dict}
828 outtmpl_dict.update({
829 k: v for k, v in DEFAULT_OUTTMPL.items()
830 if not outtmpl_dict.get(k)})
831 for key, val in outtmpl_dict.items():
832 if isinstance(val, bytes):
833 self.report_warning(
834 'Parameter outtmpl is bytes, but should be a unicode string. '
835 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
836 return outtmpl_dict
837
21cd8fae 838 def get_output_path(self, dir_type='', filename=None):
839 paths = self.params.get('paths', {})
840 assert isinstance(paths, dict)
841 path = os.path.join(
842 expand_path(paths.get('home', '').strip()),
843 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
844 filename or '')
845
846 # Temporary fix for #4787
847 # 'Treat' all problem characters by passing filename through preferredencoding
848 # to workaround encoding issues with subprocess on python2 @ Windows
849 if sys.version_info < (3, 0) and sys.platform == 'win32':
850 path = encodeFilename(path, True).decode(preferredencoding())
851 return sanitize_path(path, force=self.params.get('windowsfilenames'))
852
76a264ac 853 @staticmethod
901130bb 854 def _outtmpl_expandpath(outtmpl):
855 # expand_path translates '%%' into '%' and '$$' into '$'
856 # correspondingly that is not what we want since we need to keep
857 # '%%' intact for template dict substitution step. Working around
858 # with boundary-alike separator hack.
859 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
860 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
861
862 # outtmpl should be expand_path'ed before template dict substitution
863 # because meta fields may contain env variables we don't want to
864 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
865 # title "Hello $PATH", we don't want `$PATH` to be expanded.
866 return expand_path(outtmpl).replace(sep, '')
867
868 @staticmethod
869 def escape_outtmpl(outtmpl):
870 ''' Escape any remaining strings like %s, %abc% etc. '''
871 return re.sub(
872 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
873 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
874 outtmpl)
875
876 @classmethod
877 def validate_outtmpl(cls, outtmpl):
76a264ac 878 ''' @return None or Exception object '''
7d1eb38a 879 outtmpl = re.sub(
880 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
881 lambda mobj: f'{mobj.group(0)[:-1]}s',
882 cls._outtmpl_expandpath(outtmpl))
76a264ac 883 try:
7d1eb38a 884 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 885 return None
886 except ValueError as err:
887 return err
888
143db31d 889 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 890 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 891 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
a439a3a4 892 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 893
6e84b215 894 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
895 for key in ('__original_infodict', '__postprocessors'):
896 info_dict.pop(key, None)
752cda38 897 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 898 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 899 if info_dict.get('duration', None) is not None
900 else None)
752cda38 901 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
902 if info_dict.get('resolution') is None:
903 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 904
143db31d 905 # For fields playlist_index and autonumber convert all occurrences
906 # of %(field)s to %(field)0Nd for backward compatibility
907 field_size_compat_map = {
752cda38 908 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
909 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 910 }
752cda38 911
385a27fa 912 TMPL_DICT = {}
7d1eb38a 913 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
385a27fa 914 MATH_FUNCTIONS = {
915 '+': float.__add__,
916 '-': float.__sub__,
917 }
e625be0d 918 # Field is of the form key1.key2...
919 # where keys (except first) can be string, int or slice
2b8a2973 920 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 921 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
922 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 923 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
924 (?P<negate>-)?
385a27fa 925 (?P<fields>{field})
926 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 927 (?:>(?P<strf_format>.+?))?
928 (?:\|(?P<default>.*?))?
385a27fa 929 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 930
2b8a2973 931 def _traverse_infodict(k):
932 k = k.split('.')
933 if k[0] == '':
934 k.pop(0)
935 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 936
752cda38 937 def get_value(mdict):
938 # Object traversal
2b8a2973 939 value = _traverse_infodict(mdict['fields'])
752cda38 940 # Negative
941 if mdict['negate']:
942 value = float_or_none(value)
943 if value is not None:
944 value *= -1
945 # Do maths
385a27fa 946 offset_key = mdict['maths']
947 if offset_key:
752cda38 948 value = float_or_none(value)
949 operator = None
385a27fa 950 while offset_key:
951 item = re.match(
952 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
953 offset_key).group(0)
954 offset_key = offset_key[len(item):]
955 if operator is None:
752cda38 956 operator = MATH_FUNCTIONS[item]
385a27fa 957 continue
958 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
959 offset = float_or_none(item)
960 if offset is None:
2b8a2973 961 offset = float_or_none(_traverse_infodict(item))
385a27fa 962 try:
963 value = operator(value, multiplier * offset)
964 except (TypeError, ZeroDivisionError):
965 return None
966 operator = None
752cda38 967 # Datetime formatting
968 if mdict['strf_format']:
969 value = strftime_or_none(value, mdict['strf_format'])
970
971 return value
972
6e84b215 973 def _dumpjson_default(obj):
974 if isinstance(obj, (set, LazyList)):
975 return list(obj)
976 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
977
752cda38 978 def create_key(outer_mobj):
979 if not outer_mobj.group('has_key'):
901130bb 980 return f'%{outer_mobj.group(0)}'
752cda38 981
901130bb 982 prefix = outer_mobj.group('prefix')
752cda38 983 key = outer_mobj.group('key')
901130bb 984 original_fmt = fmt = outer_mobj.group('format')
752cda38 985 mobj = re.match(INTERNAL_FORMAT_RE, key)
986 if mobj is None:
9fea350f 987 value, default, mobj = None, na, {'fields': ''}
752cda38 988 else:
e625be0d 989 mobj = mobj.groupdict()
752cda38 990 default = mobj['default'] if mobj['default'] is not None else na
991 value = get_value(mobj)
992
993 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
994 fmt = '0{:d}d'.format(field_size_compat_map[key])
995
996 value = default if value is None else value
752cda38 997
7d1eb38a 998 str_fmt = f'{fmt[:-1]}s'
999 if fmt[-1] == 'l':
1000 value, fmt = ', '.join(variadic(value)), str_fmt
1001 elif fmt[-1] == 'j':
6e84b215 1002 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
7d1eb38a 1003 elif fmt[-1] == 'q':
1004 value, fmt = compat_shlex_quote(str(value)), str_fmt
1005 elif fmt[-1] == 'c':
1006 value = str(value)
76a264ac 1007 if value is None:
1008 value, fmt = default, 's'
1009 else:
1010 value = value[0]
1011 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1012 value = float_or_none(value)
752cda38 1013 if value is None:
1014 value, fmt = default, 's'
901130bb 1015
752cda38 1016 if sanitize:
1017 if fmt[-1] == 'r':
1018 # If value is an object, sanitize might convert it to a string
1019 # So we convert it to repr first
7d1eb38a 1020 value, fmt = repr(value), str_fmt
639f1cea 1021 if fmt[-1] in 'csr':
9fea350f 1022 value = sanitize(mobj['fields'].split('.')[-1], value)
901130bb 1023
1024 key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
385a27fa 1025 TMPL_DICT[key] = value
901130bb 1026 return f'{prefix}%({key}){fmt}'
752cda38 1027
385a27fa 1028 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1029
de6000d9 1030 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1031 try:
586a91b6 1032 sanitize = lambda k, v: sanitize_filename(
45598aab 1033 compat_str(v),
1bb5c511 1034 restricted=self.params.get('restrictfilenames'),
40df485f 1035 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1036 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1037 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1038 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1039 filename = outtmpl % template_dict
15da37c7 1040
143db31d 1041 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1042 if force_ext is not None:
752cda38 1043 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1044
bdc3fd2f
U
1045 # https://github.com/blackjack4494/youtube-dlc/issues/85
1046 trim_file_name = self.params.get('trim_file_name', False)
1047 if trim_file_name:
1048 fn_groups = filename.rsplit('.')
1049 ext = fn_groups[-1]
1050 sub_ext = ''
1051 if len(fn_groups) > 2:
1052 sub_ext = fn_groups[-2]
1053 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1054
0202b52a 1055 return filename
8222d8de 1056 except ValueError as err:
6febd1c1 1057 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1058 return None
1059
de6000d9 1060 def prepare_filename(self, info_dict, dir_type='', warn=False):
1061 """Generate the output filename."""
21cd8fae 1062
de6000d9 1063 filename = self._prepare_filename(info_dict, dir_type or 'default')
1064
c84aeac6 1065 if warn:
21cd8fae 1066 if not self.params.get('paths'):
de6000d9 1067 pass
1068 elif filename == '-':
c84aeac6 1069 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1070 elif os.path.isabs(filename):
c84aeac6 1071 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1072 self.__prepare_filename_warned = True
1073 if filename == '-' or not filename:
1074 return filename
1075
21cd8fae 1076 return self.get_output_path(dir_type, filename)
0202b52a 1077
120fe513 1078 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1079 """ Returns None if the file should be downloaded """
8222d8de 1080
c77495e3 1081 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1082
8b0d7497 1083 def check_filter():
8b0d7497 1084 if 'title' in info_dict:
1085 # This can happen when we're just evaluating the playlist
1086 title = info_dict['title']
1087 matchtitle = self.params.get('matchtitle', False)
1088 if matchtitle:
1089 if not re.search(matchtitle, title, re.IGNORECASE):
1090 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1091 rejecttitle = self.params.get('rejecttitle', False)
1092 if rejecttitle:
1093 if re.search(rejecttitle, title, re.IGNORECASE):
1094 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1095 date = info_dict.get('upload_date')
1096 if date is not None:
1097 dateRange = self.params.get('daterange', DateRange())
1098 if date not in dateRange:
1099 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1100 view_count = info_dict.get('view_count')
1101 if view_count is not None:
1102 min_views = self.params.get('min_views')
1103 if min_views is not None and view_count < min_views:
1104 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1105 max_views = self.params.get('max_views')
1106 if max_views is not None and view_count > max_views:
1107 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1108 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1109 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1110
1111 if not incomplete:
1112 match_filter = self.params.get('match_filter')
1113 if match_filter is not None:
1114 ret = match_filter(info_dict)
1115 if ret is not None:
1116 return ret
1117 return None
1118
c77495e3 1119 if self.in_download_archive(info_dict):
1120 reason = '%s has already been recorded in the archive' % video_title
1121 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1122 else:
1123 reason = check_filter()
1124 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1125 if reason is not None:
120fe513 1126 if not silent:
1127 self.to_screen('[download] ' + reason)
c77495e3 1128 if self.params.get(break_opt, False):
1129 raise break_err()
8b0d7497 1130 return reason
fe7e0c98 1131
b6c45014
JMF
1132 @staticmethod
1133 def add_extra_info(info_dict, extra_info):
1134 '''Set the keys from extra_info in info dict if they are missing'''
1135 for key, value in extra_info.items():
1136 info_dict.setdefault(key, value)
1137
58f197b7 1138 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1139 process=True, force_generic_extractor=False):
41d1cca3 1140 """
1141 Return a list with a dictionary for each video extracted.
1142
1143 Arguments:
1144 url -- URL to extract
1145
1146 Keyword arguments:
1147 download -- whether to download videos during extraction
1148 ie_key -- extractor key hint
1149 extra_info -- dictionary containing the extra values to add to each result
1150 process -- whether to resolve all unresolved references (URLs, playlist items),
1151 must be True for download to work.
1152 force_generic_extractor -- force using the generic extractor
1153 """
fe7e0c98 1154
61aa5ba3 1155 if not ie_key and force_generic_extractor:
d22dec74
S
1156 ie_key = 'Generic'
1157
8222d8de 1158 if ie_key:
56c73665 1159 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1160 else:
1161 ies = self._ies
1162
1163 for ie in ies:
1164 if not ie.suitable(url):
1165 continue
1166
9a68de12 1167 ie_key = ie.ie_key()
1168 ie = self.get_info_extractor(ie_key)
8222d8de 1169 if not ie.working():
6febd1c1
PH
1170 self.report_warning('The program functionality for this site has been marked as broken, '
1171 'and will probably not work.')
8222d8de
JMF
1172
1173 try:
d0757229 1174 temp_id = str_or_none(
63be1aab 1175 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1176 else ie._match_id(url))
a0566bbf 1177 except (AssertionError, IndexError, AttributeError):
1178 temp_id = None
1179 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1180 self.to_screen("[%s] %s: has already been recorded in archive" % (
1181 ie_key, temp_id))
1182 break
58f197b7 1183 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1184 else:
1185 self.report_error('no suitable InfoExtractor for URL %s' % url)
1186
cc9d1493 1187 def __handle_extraction_exceptions(func, handle_all_errors=True):
a0566bbf 1188 def wrapper(self, *args, **kwargs):
1189 try:
1190 return func(self, *args, **kwargs)
773f291d
S
1191 except GeoRestrictedError as e:
1192 msg = e.msg
1193 if e.countries:
1194 msg += '\nThis video is available in %s.' % ', '.join(
1195 map(ISO3166Utils.short2full, e.countries))
1196 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1197 self.report_error(msg)
fb043a6e 1198 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1199 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1200 except ThrottledDownload:
1201 self.to_stderr('\r')
1202 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1203 return wrapper(self, *args, **kwargs)
8b0d7497 1204 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1205 raise
8222d8de 1206 except Exception as e:
cc9d1493 1207 if handle_all_errors and self.params.get('ignoreerrors', False):
9b9c5355 1208 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1209 else:
1210 raise
a0566bbf 1211 return wrapper
1212
1213 @__handle_extraction_exceptions
58f197b7 1214 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1215 ie_result = ie.extract(url)
1216 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1217 return
1218 if isinstance(ie_result, list):
1219 # Backwards compatibility: old IE result format
1220 ie_result = {
1221 '_type': 'compat_list',
1222 'entries': ie_result,
1223 }
e37d0efb 1224 if extra_info.get('original_url'):
1225 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1226 self.add_default_extra_info(ie_result, ie, url)
1227 if process:
1228 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1229 else:
a0566bbf 1230 return ie_result
fe7e0c98 1231
ea38e55f 1232 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1233 if url is not None:
1234 self.add_extra_info(ie_result, {
1235 'webpage_url': url,
1236 'original_url': url,
1237 'webpage_url_basename': url_basename(url),
1238 })
1239 if ie is not None:
1240 self.add_extra_info(ie_result, {
1241 'extractor': ie.IE_NAME,
1242 'extractor_key': ie.ie_key(),
1243 })
ea38e55f 1244
8222d8de
JMF
1245 def process_ie_result(self, ie_result, download=True, extra_info={}):
1246 """
1247 Take the result of the ie(may be modified) and resolve all unresolved
1248 references (URLs, playlist items).
1249
1250 It will also download the videos if 'download'.
1251 Returns the resolved ie_result.
1252 """
e8ee972c
PH
1253 result_type = ie_result.get('_type', 'video')
1254
057a5206 1255 if result_type in ('url', 'url_transparent'):
134c6ea8 1256 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1257 if ie_result.get('original_url'):
1258 extra_info.setdefault('original_url', ie_result['original_url'])
1259
057a5206 1260 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1261 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1262 or extract_flat is True):
ecb54191 1263 info_copy = ie_result.copy()
1264 self.add_extra_info(info_copy, extra_info)
6033d980 1265 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1266 self.add_default_extra_info(info_copy, ie, ie_result['url'])
ecb54191 1267 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1268 return ie_result
1269
8222d8de 1270 if result_type == 'video':
b6c45014 1271 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1272 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1273 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1274 if additional_urls:
1275 # TODO: Improve MetadataFromFieldPP to allow setting a list
1276 if isinstance(additional_urls, compat_str):
1277 additional_urls = [additional_urls]
1278 self.to_screen(
1279 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1280 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1281 ie_result['additional_entries'] = [
1282 self.extract_info(
1283 url, download, extra_info,
1284 force_generic_extractor=self.params.get('force_generic_extractor'))
1285 for url in additional_urls
1286 ]
1287 return ie_result
8222d8de
JMF
1288 elif result_type == 'url':
1289 # We have to add extra_info to the results because it may be
1290 # contained in a playlist
07cce701 1291 return self.extract_info(
1292 ie_result['url'], download,
1293 ie_key=ie_result.get('ie_key'),
1294 extra_info=extra_info)
7fc3fa05
PH
1295 elif result_type == 'url_transparent':
1296 # Use the information from the embedding page
1297 info = self.extract_info(
1298 ie_result['url'], ie_key=ie_result.get('ie_key'),
1299 extra_info=extra_info, download=False, process=False)
1300
1640eb09
S
1301 # extract_info may return None when ignoreerrors is enabled and
1302 # extraction failed with an error, don't crash and return early
1303 # in this case
1304 if not info:
1305 return info
1306
412c617d
PH
1307 force_properties = dict(
1308 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1309 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1310 if f in force_properties:
1311 del force_properties[f]
1312 new_result = info.copy()
1313 new_result.update(force_properties)
7fc3fa05 1314
0563f7ac
S
1315 # Extracted info may not be a video result (i.e.
1316 # info.get('_type', 'video') != video) but rather an url or
1317 # url_transparent. In such cases outer metadata (from ie_result)
1318 # should be propagated to inner one (info). For this to happen
1319 # _type of info should be overridden with url_transparent. This
067aa17e 1320 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1321 if new_result.get('_type') == 'url':
1322 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1323
1324 return self.process_ie_result(
1325 new_result, download=download, extra_info=extra_info)
40fcba5e 1326 elif result_type in ('playlist', 'multi_video'):
30a074c2 1327 # Protect from infinite recursion due to recursively nested playlists
1328 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1329 webpage_url = ie_result['webpage_url']
1330 if webpage_url in self._playlist_urls:
7e85e872 1331 self.to_screen(
30a074c2 1332 '[download] Skipping already downloaded playlist: %s'
1333 % ie_result.get('title') or ie_result.get('id'))
1334 return
7e85e872 1335
30a074c2 1336 self._playlist_level += 1
1337 self._playlist_urls.add(webpage_url)
bc516a3f 1338 self._sanitize_thumbnails(ie_result)
30a074c2 1339 try:
1340 return self.__process_playlist(ie_result, download)
1341 finally:
1342 self._playlist_level -= 1
1343 if not self._playlist_level:
1344 self._playlist_urls.clear()
8222d8de 1345 elif result_type == 'compat_list':
c9bf4114
PH
1346 self.report_warning(
1347 'Extractor %s returned a compat_list result. '
1348 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1349
8222d8de 1350 def _fixup(r):
9e1a5b84
JW
1351 self.add_extra_info(
1352 r,
9103bbc5
JMF
1353 {
1354 'extractor': ie_result['extractor'],
1355 'webpage_url': ie_result['webpage_url'],
29eb5174 1356 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1357 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1358 }
1359 )
8222d8de
JMF
1360 return r
1361 ie_result['entries'] = [
b6c45014 1362 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1363 for r in ie_result['entries']
1364 ]
1365 return ie_result
1366 else:
1367 raise Exception('Invalid result type: %s' % result_type)
1368
e92caff5 1369 def _ensure_dir_exists(self, path):
1370 return make_dir(path, self.report_error)
1371
30a074c2 1372 def __process_playlist(self, ie_result, download):
1373 # We process each entry in the playlist
1374 playlist = ie_result.get('title') or ie_result.get('id')
1375 self.to_screen('[download] Downloading playlist: %s' % playlist)
1376
498f5606 1377 if 'entries' not in ie_result:
1378 raise EntryNotInPlaylist()
1379 incomplete_entries = bool(ie_result.get('requested_entries'))
1380 if incomplete_entries:
1381 def fill_missing_entries(entries, indexes):
1382 ret = [None] * max(*indexes)
1383 for i, entry in zip(indexes, entries):
1384 ret[i - 1] = entry
1385 return ret
1386 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1387
30a074c2 1388 playlist_results = []
1389
56a8fb4f 1390 playliststart = self.params.get('playliststart', 1)
30a074c2 1391 playlistend = self.params.get('playlistend')
1392 # For backwards compatibility, interpret -1 as whole list
1393 if playlistend == -1:
1394 playlistend = None
1395
1396 playlistitems_str = self.params.get('playlist_items')
1397 playlistitems = None
1398 if playlistitems_str is not None:
1399 def iter_playlistitems(format):
1400 for string_segment in format.split(','):
1401 if '-' in string_segment:
1402 start, end = string_segment.split('-')
1403 for item in range(int(start), int(end) + 1):
1404 yield int(item)
1405 else:
1406 yield int(string_segment)
1407 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1408
1409 ie_entries = ie_result['entries']
56a8fb4f 1410 msg = (
1411 'Downloading %d videos' if not isinstance(ie_entries, list)
1412 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1413 if not isinstance(ie_entries, (list, PagedList)):
1414 ie_entries = LazyList(ie_entries)
1415
50fed816 1416 def get_entry(i):
1417 return YoutubeDL.__handle_extraction_exceptions(
cc9d1493 1418 lambda self, i: ie_entries[i - 1],
1419 False
50fed816 1420 )(self, i)
1421
56a8fb4f 1422 entries = []
1423 for i in playlistitems or itertools.count(playliststart):
1424 if playlistitems is None and playlistend is not None and playlistend < i:
1425 break
1426 entry = None
1427 try:
50fed816 1428 entry = get_entry(i)
56a8fb4f 1429 if entry is None:
498f5606 1430 raise EntryNotInPlaylist()
56a8fb4f 1431 except (IndexError, EntryNotInPlaylist):
1432 if incomplete_entries:
1433 raise EntryNotInPlaylist()
1434 elif not playlistitems:
1435 break
1436 entries.append(entry)
120fe513 1437 try:
1438 if entry is not None:
1439 self._match_entry(entry, incomplete=True, silent=True)
1440 except (ExistingVideoReached, RejectedVideoReached):
1441 break
56a8fb4f 1442 ie_result['entries'] = entries
30a074c2 1443
56a8fb4f 1444 # Save playlist_index before re-ordering
1445 entries = [
1446 ((playlistitems[i - 1] if playlistitems else i), entry)
1447 for i, entry in enumerate(entries, 1)
1448 if entry is not None]
1449 n_entries = len(entries)
498f5606 1450
498f5606 1451 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1452 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1453 ie_result['requested_entries'] = playlistitems
1454
1455 if self.params.get('allow_playlist_files', True):
1456 ie_copy = {
1457 'playlist': playlist,
1458 'playlist_id': ie_result.get('id'),
1459 'playlist_title': ie_result.get('title'),
1460 'playlist_uploader': ie_result.get('uploader'),
1461 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1462 'playlist_index': 0,
498f5606 1463 }
1464 ie_copy.update(dict(ie_result))
1465
1466 if self.params.get('writeinfojson', False):
1467 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1468 if not self._ensure_dir_exists(encodeFilename(infofn)):
1469 return
1470 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1471 self.to_screen('[info] Playlist metadata is already present')
1472 else:
1473 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1474 try:
8012d892 1475 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
498f5606 1476 except (OSError, IOError):
1477 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1478
681de68e 1479 # TODO: This should be passed to ThumbnailsConvertor if necessary
1480 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1481
498f5606 1482 if self.params.get('writedescription', False):
1483 descfn = self.prepare_filename(ie_copy, 'pl_description')
1484 if not self._ensure_dir_exists(encodeFilename(descfn)):
1485 return
1486 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1487 self.to_screen('[info] Playlist description is already present')
1488 elif ie_result.get('description') is None:
1489 self.report_warning('There\'s no playlist description to write.')
1490 else:
1491 try:
1492 self.to_screen('[info] Writing playlist description to: ' + descfn)
1493 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1494 descfile.write(ie_result['description'])
1495 except (OSError, IOError):
1496 self.report_error('Cannot write playlist description file ' + descfn)
1497 return
30a074c2 1498
1499 if self.params.get('playlistreverse', False):
1500 entries = entries[::-1]
30a074c2 1501 if self.params.get('playlistrandom', False):
1502 random.shuffle(entries)
1503
1504 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1505
56a8fb4f 1506 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1507 failures = 0
1508 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1509 for i, entry_tuple in enumerate(entries, 1):
1510 playlist_index, entry = entry_tuple
53ed7066 1511 if 'playlist_index' in self.params.get('compat_options', []):
1512 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1513 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1514 # This __x_forwarded_for_ip thing is a bit ugly but requires
1515 # minimal changes
1516 if x_forwarded_for:
1517 entry['__x_forwarded_for_ip'] = x_forwarded_for
1518 extra = {
1519 'n_entries': n_entries,
f59ae581 1520 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1521 'playlist_index': playlist_index,
1522 'playlist_autonumber': i,
30a074c2 1523 'playlist': playlist,
1524 'playlist_id': ie_result.get('id'),
1525 'playlist_title': ie_result.get('title'),
1526 'playlist_uploader': ie_result.get('uploader'),
1527 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1528 'extractor': ie_result['extractor'],
1529 'webpage_url': ie_result['webpage_url'],
1530 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1531 'extractor_key': ie_result['extractor_key'],
1532 }
1533
1534 if self._match_entry(entry, incomplete=True) is not None:
1535 continue
1536
1537 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1538 if not entry_result:
1539 failures += 1
1540 if failures >= max_failures:
1541 self.report_error(
1542 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1543 break
30a074c2 1544 # TODO: skip failed (empty) entries?
1545 playlist_results.append(entry_result)
1546 ie_result['entries'] = playlist_results
1547 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1548 return ie_result
1549
a0566bbf 1550 @__handle_extraction_exceptions
1551 def __process_iterable_entry(self, entry, download, extra_info):
1552 return self.process_ie_result(
1553 entry, download=download, extra_info=extra_info)
1554
67134eab
JMF
1555 def _build_format_filter(self, filter_spec):
1556 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1557
1558 OPERATORS = {
1559 '<': operator.lt,
1560 '<=': operator.le,
1561 '>': operator.gt,
1562 '>=': operator.ge,
1563 '=': operator.eq,
1564 '!=': operator.ne,
1565 }
67134eab 1566 operator_rex = re.compile(r'''(?x)\s*
187986a8 1567 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1568 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1569 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1570 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1571 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1572 if m:
1573 try:
1574 comparison_value = int(m.group('value'))
1575 except ValueError:
1576 comparison_value = parse_filesize(m.group('value'))
1577 if comparison_value is None:
1578 comparison_value = parse_filesize(m.group('value') + 'B')
1579 if comparison_value is None:
1580 raise ValueError(
1581 'Invalid value %r in format specification %r' % (
67134eab 1582 m.group('value'), filter_spec))
9ddb6925
S
1583 op = OPERATORS[m.group('op')]
1584
083c9df9 1585 if not m:
9ddb6925
S
1586 STR_OPERATORS = {
1587 '=': operator.eq,
10d33b34
YCH
1588 '^=': lambda attr, value: attr.startswith(value),
1589 '$=': lambda attr, value: attr.endswith(value),
1590 '*=': lambda attr, value: value in attr,
9ddb6925 1591 }
187986a8 1592 str_operator_rex = re.compile(r'''(?x)\s*
1593 (?P<key>[a-zA-Z0-9._-]+)\s*
1594 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1595 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1596 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1597 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1598 if m:
1599 comparison_value = m.group('value')
2cc779f4
S
1600 str_op = STR_OPERATORS[m.group('op')]
1601 if m.group('negation'):
e118a879 1602 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1603 else:
1604 op = str_op
083c9df9 1605
9ddb6925 1606 if not m:
187986a8 1607 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1608
1609 def _filter(f):
1610 actual_value = f.get(m.group('key'))
1611 if actual_value is None:
1612 return m.group('none_inclusive')
1613 return op(actual_value, comparison_value)
67134eab
JMF
1614 return _filter
1615
0017d9ad 1616 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1617
af0f7428
S
1618 def can_merge():
1619 merger = FFmpegMergerPP(self)
1620 return merger.available and merger.can_merge()
1621
91ebc640 1622 prefer_best = (
b7b04c78 1623 not self.params.get('simulate')
91ebc640 1624 and download
1625 and (
1626 not can_merge()
19807826 1627 or info_dict.get('is_live', False)
de6000d9 1628 or self.outtmpl_dict['default'] == '-'))
53ed7066 1629 compat = (
1630 prefer_best
1631 or self.params.get('allow_multiple_audio_streams', False)
1632 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1633
1634 return (
53ed7066 1635 'best/bestvideo+bestaudio' if prefer_best
1636 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1637 else 'bestvideo+bestaudio/best')
0017d9ad 1638
67134eab
JMF
1639 def build_format_selector(self, format_spec):
1640 def syntax_error(note, start):
1641 message = (
1642 'Invalid format specification: '
1643 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1644 return SyntaxError(message)
1645
1646 PICKFIRST = 'PICKFIRST'
1647 MERGE = 'MERGE'
1648 SINGLE = 'SINGLE'
0130afb7 1649 GROUP = 'GROUP'
67134eab
JMF
1650 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1651
91ebc640 1652 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1653 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1654
e8e73840 1655 check_formats = self.params.get('check_formats')
1656
67134eab
JMF
1657 def _parse_filter(tokens):
1658 filter_parts = []
1659 for type, string, start, _, _ in tokens:
1660 if type == tokenize.OP and string == ']':
1661 return ''.join(filter_parts)
1662 else:
1663 filter_parts.append(string)
1664
232541df 1665 def _remove_unused_ops(tokens):
17cc1534 1666 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1667 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1668 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1669 last_string, last_start, last_end, last_line = None, None, None, None
1670 for type, string, start, end, line in tokens:
1671 if type == tokenize.OP and string == '[':
1672 if last_string:
1673 yield tokenize.NAME, last_string, last_start, last_end, last_line
1674 last_string = None
1675 yield type, string, start, end, line
1676 # everything inside brackets will be handled by _parse_filter
1677 for type, string, start, end, line in tokens:
1678 yield type, string, start, end, line
1679 if type == tokenize.OP and string == ']':
1680 break
1681 elif type == tokenize.OP and string in ALLOWED_OPS:
1682 if last_string:
1683 yield tokenize.NAME, last_string, last_start, last_end, last_line
1684 last_string = None
1685 yield type, string, start, end, line
1686 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1687 if not last_string:
1688 last_string = string
1689 last_start = start
1690 last_end = end
1691 else:
1692 last_string += string
1693 if last_string:
1694 yield tokenize.NAME, last_string, last_start, last_end, last_line
1695
cf2ac6df 1696 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1697 selectors = []
1698 current_selector = None
1699 for type, string, start, _, _ in tokens:
1700 # ENCODING is only defined in python 3.x
1701 if type == getattr(tokenize, 'ENCODING', None):
1702 continue
1703 elif type in [tokenize.NAME, tokenize.NUMBER]:
1704 current_selector = FormatSelector(SINGLE, string, [])
1705 elif type == tokenize.OP:
cf2ac6df
JMF
1706 if string == ')':
1707 if not inside_group:
1708 # ')' will be handled by the parentheses group
1709 tokens.restore_last_token()
67134eab 1710 break
cf2ac6df 1711 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1712 tokens.restore_last_token()
1713 break
cf2ac6df
JMF
1714 elif inside_choice and string == ',':
1715 tokens.restore_last_token()
1716 break
1717 elif string == ',':
0a31a350
JMF
1718 if not current_selector:
1719 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1720 selectors.append(current_selector)
1721 current_selector = None
1722 elif string == '/':
d96d604e
JMF
1723 if not current_selector:
1724 raise syntax_error('"/" must follow a format selector', start)
67134eab 1725 first_choice = current_selector
cf2ac6df 1726 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1727 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1728 elif string == '[':
1729 if not current_selector:
1730 current_selector = FormatSelector(SINGLE, 'best', [])
1731 format_filter = _parse_filter(tokens)
1732 current_selector.filters.append(format_filter)
0130afb7
JMF
1733 elif string == '(':
1734 if current_selector:
1735 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1736 group = _parse_format_selection(tokens, inside_group=True)
1737 current_selector = FormatSelector(GROUP, group, [])
67134eab 1738 elif string == '+':
d03cfdce 1739 if not current_selector:
1740 raise syntax_error('Unexpected "+"', start)
1741 selector_1 = current_selector
1742 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1743 if not selector_2:
1744 raise syntax_error('Expected a selector', start)
1745 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1746 else:
1747 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1748 elif type == tokenize.ENDMARKER:
1749 break
1750 if current_selector:
1751 selectors.append(current_selector)
1752 return selectors
1753
f8d4ad9a 1754 def _merge(formats_pair):
1755 format_1, format_2 = formats_pair
1756
1757 formats_info = []
1758 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1759 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1760
1761 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1762 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1763 for (i, fmt_info) in enumerate(formats_info):
551f9388 1764 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1765 formats_info.pop(i)
1766 continue
1767 for aud_vid in ['audio', 'video']:
f8d4ad9a 1768 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1769 if get_no_more[aud_vid]:
1770 formats_info.pop(i)
f5510afe 1771 break
f8d4ad9a 1772 get_no_more[aud_vid] = True
1773
1774 if len(formats_info) == 1:
1775 return formats_info[0]
1776
1777 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1778 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1779
1780 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1781 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1782
1783 output_ext = self.params.get('merge_output_format')
1784 if not output_ext:
1785 if the_only_video:
1786 output_ext = the_only_video['ext']
1787 elif the_only_audio and not video_fmts:
1788 output_ext = the_only_audio['ext']
1789 else:
1790 output_ext = 'mkv'
1791
1792 new_dict = {
1793 'requested_formats': formats_info,
1794 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1795 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1796 'ext': output_ext,
1797 }
1798
1799 if the_only_video:
1800 new_dict.update({
1801 'width': the_only_video.get('width'),
1802 'height': the_only_video.get('height'),
1803 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1804 'fps': the_only_video.get('fps'),
1805 'vcodec': the_only_video.get('vcodec'),
1806 'vbr': the_only_video.get('vbr'),
1807 'stretched_ratio': the_only_video.get('stretched_ratio'),
1808 })
1809
1810 if the_only_audio:
1811 new_dict.update({
1812 'acodec': the_only_audio.get('acodec'),
1813 'abr': the_only_audio.get('abr'),
1814 })
1815
1816 return new_dict
1817
e8e73840 1818 def _check_formats(formats):
981052c9 1819 if not check_formats:
1820 yield from formats
b5ac45b1 1821 return
e8e73840 1822 for f in formats:
1823 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1824 temp_file = tempfile.NamedTemporaryFile(
1825 suffix='.tmp', delete=False,
1826 dir=self.get_output_path('temp') or None)
1827 temp_file.close()
fe346461 1828 try:
981052c9 1829 success, _ = self.dl(temp_file.name, f, test=True)
1830 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1831 success = False
fe346461 1832 finally:
21cd8fae 1833 if os.path.exists(temp_file.name):
1834 try:
1835 os.remove(temp_file.name)
1836 except OSError:
1837 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1838 if success:
e8e73840 1839 yield f
1840 else:
1841 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1842
67134eab 1843 def _build_selector_function(selector):
909d24dd 1844 if isinstance(selector, list): # ,
67134eab
JMF
1845 fs = [_build_selector_function(s) for s in selector]
1846
317f7ab6 1847 def selector_function(ctx):
67134eab 1848 for f in fs:
981052c9 1849 yield from f(ctx)
67134eab 1850 return selector_function
909d24dd 1851
1852 elif selector.type == GROUP: # ()
0130afb7 1853 selector_function = _build_selector_function(selector.selector)
909d24dd 1854
1855 elif selector.type == PICKFIRST: # /
67134eab
JMF
1856 fs = [_build_selector_function(s) for s in selector.selector]
1857
317f7ab6 1858 def selector_function(ctx):
67134eab 1859 for f in fs:
317f7ab6 1860 picked_formats = list(f(ctx))
67134eab
JMF
1861 if picked_formats:
1862 return picked_formats
1863 return []
67134eab 1864
981052c9 1865 elif selector.type == MERGE: # +
1866 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1867
1868 def selector_function(ctx):
1869 for pair in itertools.product(
1870 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1871 yield _merge(pair)
1872
909d24dd 1873 elif selector.type == SINGLE: # atom
598d185d 1874 format_spec = selector.selector or 'best'
909d24dd 1875
f8d4ad9a 1876 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1877 if format_spec == 'all':
1878 def selector_function(ctx):
981052c9 1879 yield from _check_formats(ctx['formats'])
f8d4ad9a 1880 elif format_spec == 'mergeall':
1881 def selector_function(ctx):
981052c9 1882 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1883 if not formats:
1884 return
921b76ca 1885 merged_format = formats[-1]
1886 for f in formats[-2::-1]:
f8d4ad9a 1887 merged_format = _merge((merged_format, f))
1888 yield merged_format
909d24dd 1889
1890 else:
e8e73840 1891 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1892 mobj = re.match(
1893 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1894 format_spec)
1895 if mobj is not None:
1896 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1897 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1898 format_type = (mobj.group('type') or [None])[0]
1899 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1900 format_modified = mobj.group('mod') is not None
909d24dd 1901
1902 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1903 _filter_f = (
eff63539 1904 (lambda f: f.get('%scodec' % format_type) != 'none')
1905 if format_type and format_modified # bv*, ba*, wv*, wa*
1906 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1907 if format_type # bv, ba, wv, wa
1908 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1909 if not format_modified # b, w
8326b00a 1910 else lambda f: True) # b*, w*
1911 filter_f = lambda f: _filter_f(f) and (
1912 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1913 else:
909d24dd 1914 filter_f = ((lambda f: f.get('ext') == format_spec)
1915 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1916 else (lambda f: f.get('format_id') == format_spec)) # id
1917
1918 def selector_function(ctx):
1919 formats = list(ctx['formats'])
909d24dd 1920 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1921 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1922 # for extractors with incomplete formats (audio only (soundcloud)
1923 # or video only (imgur)) best/worst will fallback to
1924 # best/worst {video,audio}-only format
e8e73840 1925 matches = formats
981052c9 1926 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1927 try:
e8e73840 1928 yield matches[format_idx - 1]
981052c9 1929 except IndexError:
1930 return
083c9df9 1931
67134eab 1932 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1933
317f7ab6
S
1934 def final_selector(ctx):
1935 ctx_copy = copy.deepcopy(ctx)
67134eab 1936 for _filter in filters:
317f7ab6
S
1937 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1938 return selector_function(ctx_copy)
67134eab 1939 return final_selector
083c9df9 1940
67134eab 1941 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1942 try:
232541df 1943 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1944 except tokenize.TokenError:
1945 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1946
1947 class TokenIterator(object):
1948 def __init__(self, tokens):
1949 self.tokens = tokens
1950 self.counter = 0
1951
1952 def __iter__(self):
1953 return self
1954
1955 def __next__(self):
1956 if self.counter >= len(self.tokens):
1957 raise StopIteration()
1958 value = self.tokens[self.counter]
1959 self.counter += 1
1960 return value
1961
1962 next = __next__
1963
1964 def restore_last_token(self):
1965 self.counter -= 1
1966
1967 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1968 return _build_selector_function(parsed_selector)
a9c58ad9 1969
e5660ee6
JMF
1970 def _calc_headers(self, info_dict):
1971 res = std_headers.copy()
1972
1973 add_headers = info_dict.get('http_headers')
1974 if add_headers:
1975 res.update(add_headers)
1976
1977 cookies = self._calc_cookies(info_dict)
1978 if cookies:
1979 res['Cookie'] = cookies
1980
0016b84e
S
1981 if 'X-Forwarded-For' not in res:
1982 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1983 if x_forwarded_for_ip:
1984 res['X-Forwarded-For'] = x_forwarded_for_ip
1985
e5660ee6
JMF
1986 return res
1987
1988 def _calc_cookies(self, info_dict):
5c2266df 1989 pr = sanitized_Request(info_dict['url'])
e5660ee6 1990 self.cookiejar.add_cookie_header(pr)
662435f7 1991 return pr.get_header('Cookie')
e5660ee6 1992
b0249bca 1993 def _sanitize_thumbnails(self, info_dict):
bc516a3f 1994 thumbnails = info_dict.get('thumbnails')
1995 if thumbnails is None:
1996 thumbnail = info_dict.get('thumbnail')
1997 if thumbnail:
1998 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1999 if thumbnails:
2000 thumbnails.sort(key=lambda t: (
2001 t.get('preference') if t.get('preference') is not None else -1,
2002 t.get('width') if t.get('width') is not None else -1,
2003 t.get('height') if t.get('height') is not None else -1,
2004 t.get('id') if t.get('id') is not None else '',
2005 t.get('url')))
b0249bca 2006
0ba692ac 2007 def thumbnail_tester():
2008 if self.params.get('check_formats'):
cca80fe6 2009 test_all = True
2010 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2011 else:
cca80fe6 2012 test_all = False
0ba692ac 2013 to_screen = self.write_debug
2014
2015 def test_thumbnail(t):
cca80fe6 2016 if not test_all and not t.get('_test_url'):
2017 return True
0ba692ac 2018 to_screen('Testing thumbnail %s' % t['id'])
2019 try:
2020 self.urlopen(HEADRequest(t['url']))
2021 except network_exceptions as err:
2022 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2023 t['id'], t['url'], error_to_compat_str(err)))
2024 return False
2025 return True
2026
2027 return test_thumbnail
b0249bca 2028
bc516a3f 2029 for i, t in enumerate(thumbnails):
bc516a3f 2030 if t.get('id') is None:
2031 t['id'] = '%d' % i
b0249bca 2032 if t.get('width') and t.get('height'):
2033 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2034 t['url'] = sanitize_url(t['url'])
0ba692ac 2035
2036 if self.params.get('check_formats') is not False:
2037 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2038 else:
2039 info_dict['thumbnails'] = thumbnails
bc516a3f 2040
dd82ffea
JMF
2041 def process_video_result(self, info_dict, download=True):
2042 assert info_dict.get('_type', 'video') == 'video'
2043
bec1fad2
PH
2044 if 'id' not in info_dict:
2045 raise ExtractorError('Missing "id" field in extractor result')
2046 if 'title' not in info_dict:
2047 raise ExtractorError('Missing "title" field in extractor result')
2048
c9969434
S
2049 def report_force_conversion(field, field_not, conversion):
2050 self.report_warning(
2051 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2052 % (field, field_not, conversion))
2053
2054 def sanitize_string_field(info, string_field):
2055 field = info.get(string_field)
2056 if field is None or isinstance(field, compat_str):
2057 return
2058 report_force_conversion(string_field, 'a string', 'string')
2059 info[string_field] = compat_str(field)
2060
2061 def sanitize_numeric_fields(info):
2062 for numeric_field in self._NUMERIC_FIELDS:
2063 field = info.get(numeric_field)
2064 if field is None or isinstance(field, compat_numeric_types):
2065 continue
2066 report_force_conversion(numeric_field, 'numeric', 'int')
2067 info[numeric_field] = int_or_none(field)
2068
2069 sanitize_string_field(info_dict, 'id')
2070 sanitize_numeric_fields(info_dict)
be6217b2 2071
dd82ffea
JMF
2072 if 'playlist' not in info_dict:
2073 # It isn't part of a playlist
2074 info_dict['playlist'] = None
2075 info_dict['playlist_index'] = None
2076
bc516a3f 2077 self._sanitize_thumbnails(info_dict)
d5519808 2078
536a55da 2079 thumbnail = info_dict.get('thumbnail')
bc516a3f 2080 thumbnails = info_dict.get('thumbnails')
536a55da
S
2081 if thumbnail:
2082 info_dict['thumbnail'] = sanitize_url(thumbnail)
2083 elif thumbnails:
d5519808
PH
2084 info_dict['thumbnail'] = thumbnails[-1]['url']
2085
ae30b840 2086 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2087 info_dict['display_id'] = info_dict['id']
2088
10db0d2f 2089 for ts_key, date_key in (
2090 ('timestamp', 'upload_date'),
2091 ('release_timestamp', 'release_date'),
2092 ):
2093 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2094 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2095 # see http://bugs.python.org/issue1646728)
2096 try:
2097 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2098 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2099 except (ValueError, OverflowError, OSError):
2100 pass
9d2ecdbc 2101
ae30b840 2102 live_keys = ('is_live', 'was_live')
2103 live_status = info_dict.get('live_status')
2104 if live_status is None:
2105 for key in live_keys:
2106 if info_dict.get(key) is False:
2107 continue
2108 if info_dict.get(key):
2109 live_status = key
2110 break
2111 if all(info_dict.get(key) is False for key in live_keys):
2112 live_status = 'not_live'
2113 if live_status:
2114 info_dict['live_status'] = live_status
2115 for key in live_keys:
2116 if info_dict.get(key) is None:
2117 info_dict[key] = (live_status == key)
2118
33d2fc2f
S
2119 # Auto generate title fields corresponding to the *_number fields when missing
2120 # in order to always have clean titles. This is very common for TV series.
2121 for field in ('chapter', 'season', 'episode'):
2122 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2123 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2124
05108a49
S
2125 for cc_kind in ('subtitles', 'automatic_captions'):
2126 cc = info_dict.get(cc_kind)
2127 if cc:
2128 for _, subtitle in cc.items():
2129 for subtitle_format in subtitle:
2130 if subtitle_format.get('url'):
2131 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2132 if subtitle_format.get('ext') is None:
2133 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2134
2135 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2136 subtitles = info_dict.get('subtitles')
4bba3716 2137
360e1ca5 2138 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2139 info_dict['id'], subtitles, automatic_captions)
a504ced0 2140
dd82ffea
JMF
2141 # We now pick which formats have to be downloaded
2142 if info_dict.get('formats') is None:
2143 # There's only one format available
2144 formats = [info_dict]
2145 else:
2146 formats = info_dict['formats']
2147
db95dc13 2148 if not formats:
b7da73eb 2149 if not self.params.get('ignore_no_formats_error'):
2150 raise ExtractorError('No video formats found!')
2151 else:
2152 self.report_warning('No video formats found!')
db95dc13 2153
73af5cc8
S
2154 def is_wellformed(f):
2155 url = f.get('url')
a5ac0c47 2156 if not url:
73af5cc8
S
2157 self.report_warning(
2158 '"url" field is missing or empty - skipping format, '
2159 'there is an error in extractor')
a5ac0c47
S
2160 return False
2161 if isinstance(url, bytes):
2162 sanitize_string_field(f, 'url')
2163 return True
73af5cc8
S
2164
2165 # Filter out malformed formats for better extraction robustness
2166 formats = list(filter(is_wellformed, formats))
2167
181c7053
S
2168 formats_dict = {}
2169
dd82ffea 2170 # We check that all the formats have the format and format_id fields
db95dc13 2171 for i, format in enumerate(formats):
c9969434
S
2172 sanitize_string_field(format, 'format_id')
2173 sanitize_numeric_fields(format)
dcf77cf1 2174 format['url'] = sanitize_url(format['url'])
e74e3b63 2175 if not format.get('format_id'):
8016c922 2176 format['format_id'] = compat_str(i)
e2effb08
S
2177 else:
2178 # Sanitize format_id from characters used in format selector expression
ec85ded8 2179 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2180 format_id = format['format_id']
2181 if format_id not in formats_dict:
2182 formats_dict[format_id] = []
2183 formats_dict[format_id].append(format)
2184
2185 # Make sure all formats have unique format_id
2186 for format_id, ambiguous_formats in formats_dict.items():
2187 if len(ambiguous_formats) > 1:
2188 for i, format in enumerate(ambiguous_formats):
2189 format['format_id'] = '%s-%d' % (format_id, i)
2190
2191 for i, format in enumerate(formats):
8c51aa65 2192 if format.get('format') is None:
6febd1c1 2193 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2194 id=format['format_id'],
2195 res=self.format_resolution(format),
6febd1c1 2196 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2197 )
c1002e96 2198 # Automatically determine file extension if missing
5b1d8575 2199 if format.get('ext') is None:
cce929ea 2200 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2201 # Automatically determine protocol if missing (useful for format
2202 # selection purposes)
6f0be937 2203 if format.get('protocol') is None:
b5559424 2204 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2205 # Add HTTP headers, so that external programs can use them from the
2206 # json output
2207 full_format_info = info_dict.copy()
2208 full_format_info.update(format)
2209 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2210 # Remove private housekeeping stuff
2211 if '__x_forwarded_for_ip' in info_dict:
2212 del info_dict['__x_forwarded_for_ip']
dd82ffea 2213
4bcc7bd1 2214 # TODO Central sorting goes here
99e206d5 2215
b7da73eb 2216 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2217 # only set the 'formats' fields if the original info_dict list them
2218 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2219 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2220 # which can't be exported to json
b3d9ef88 2221 info_dict['formats'] = formats
4ec82a72 2222
2223 info_dict, _ = self.pre_process(info_dict)
2224
b7b04c78 2225 if self.params.get('list_thumbnails'):
2226 self.list_thumbnails(info_dict)
2227 if self.params.get('listformats'):
2228 if not info_dict.get('formats'):
2229 raise ExtractorError('No video formats found', expected=True)
2230 self.list_formats(info_dict)
2231 if self.params.get('listsubtitles'):
2232 if 'automatic_captions' in info_dict:
2233 self.list_subtitles(
2234 info_dict['id'], automatic_captions, 'automatic captions')
2235 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2236 list_only = self.params.get('simulate') is None and (
2237 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2238 if list_only:
b7b04c78 2239 # Without this printing, -F --print-json will not work
169dbde9 2240 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2241 return
2242
187986a8 2243 format_selector = self.format_selector
2244 if format_selector is None:
0017d9ad 2245 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2246 self.write_debug('Default format spec: %s' % req_format)
187986a8 2247 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2248
2249 # While in format selection we may need to have an access to the original
2250 # format set in order to calculate some metrics or do some processing.
2251 # For now we need to be able to guess whether original formats provided
2252 # by extractor are incomplete or not (i.e. whether extractor provides only
2253 # video-only or audio-only formats) for proper formats selection for
2254 # extractors with such incomplete formats (see
067aa17e 2255 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2256 # Since formats may be filtered during format selection and may not match
2257 # the original formats the results may be incorrect. Thus original formats
2258 # or pre-calculated metrics should be passed to format selection routines
2259 # as well.
2260 # We will pass a context object containing all necessary additional data
2261 # instead of just formats.
2262 # This fixes incorrect format selection issue (see
067aa17e 2263 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2264 incomplete_formats = (
317f7ab6 2265 # All formats are video-only or
3089bc74 2266 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2267 # all formats are audio-only
3089bc74 2268 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2269
2270 ctx = {
2271 'formats': formats,
2272 'incomplete_formats': incomplete_formats,
2273 }
2274
2275 formats_to_download = list(format_selector(ctx))
dd82ffea 2276 if not formats_to_download:
b7da73eb 2277 if not self.params.get('ignore_no_formats_error'):
2278 raise ExtractorError('Requested format is not available', expected=True)
2279 else:
2280 self.report_warning('Requested format is not available')
4513a41a
A
2281 # Process what we can, even without any available formats.
2282 self.process_info(dict(info_dict))
b7da73eb 2283 elif download:
2284 self.to_screen(
07cce701 2285 '[info] %s: Downloading %d format(s): %s' % (
2286 info_dict['id'], len(formats_to_download),
2287 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2288 for fmt in formats_to_download:
dd82ffea 2289 new_info = dict(info_dict)
4ec82a72 2290 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2291 new_info['__original_infodict'] = info_dict
b7da73eb 2292 new_info.update(fmt)
dd82ffea
JMF
2293 self.process_info(new_info)
2294 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2295 if formats_to_download:
2296 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2297 return info_dict
2298
98c70d6f 2299 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2300 """Select the requested subtitles and their format"""
98c70d6f
JMF
2301 available_subs = {}
2302 if normal_subtitles and self.params.get('writesubtitles'):
2303 available_subs.update(normal_subtitles)
2304 if automatic_captions and self.params.get('writeautomaticsub'):
2305 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2306 if lang not in available_subs:
2307 available_subs[lang] = cap_info
2308
4d171848
JMF
2309 if (not self.params.get('writesubtitles') and not
2310 self.params.get('writeautomaticsub') or not
2311 available_subs):
2312 return None
a504ced0 2313
c32b0aab 2314 all_sub_langs = available_subs.keys()
a504ced0 2315 if self.params.get('allsubtitles', False):
c32b0aab 2316 requested_langs = all_sub_langs
2317 elif self.params.get('subtitleslangs', False):
2318 requested_langs = set()
2319 for lang in self.params.get('subtitleslangs'):
2320 if lang == 'all':
2321 requested_langs.update(all_sub_langs)
2322 continue
2323 discard = lang[0] == '-'
2324 if discard:
2325 lang = lang[1:]
2326 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2327 if discard:
2328 for lang in current_langs:
2329 requested_langs.discard(lang)
2330 else:
2331 requested_langs.update(current_langs)
2332 elif 'en' in available_subs:
2333 requested_langs = ['en']
a504ced0 2334 else:
c32b0aab 2335 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2336 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2337
2338 formats_query = self.params.get('subtitlesformat', 'best')
2339 formats_preference = formats_query.split('/') if formats_query else []
2340 subs = {}
2341 for lang in requested_langs:
2342 formats = available_subs.get(lang)
2343 if formats is None:
2344 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2345 continue
a504ced0
JMF
2346 for ext in formats_preference:
2347 if ext == 'best':
2348 f = formats[-1]
2349 break
2350 matches = list(filter(lambda f: f['ext'] == ext, formats))
2351 if matches:
2352 f = matches[-1]
2353 break
2354 else:
2355 f = formats[-1]
2356 self.report_warning(
2357 'No subtitle format found matching "%s" for language %s, '
2358 'using %s' % (formats_query, lang, f['ext']))
2359 subs[lang] = f
2360 return subs
2361
d06daf23 2362 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2363 def print_mandatory(field, actual_field=None):
2364 if actual_field is None:
2365 actual_field = field
d06daf23 2366 if (self.params.get('force%s' % field, False)
53c18592 2367 and (not incomplete or info_dict.get(actual_field) is not None)):
2368 self.to_stdout(info_dict[actual_field])
d06daf23
S
2369
2370 def print_optional(field):
2371 if (self.params.get('force%s' % field, False)
2372 and info_dict.get(field) is not None):
2373 self.to_stdout(info_dict[field])
2374
53c18592 2375 info_dict = info_dict.copy()
2376 if filename is not None:
2377 info_dict['filename'] = filename
2378 if info_dict.get('requested_formats') is not None:
2379 # For RTMP URLs, also include the playpath
2380 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2381 elif 'url' in info_dict:
2382 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2383
2b8a2973 2384 if self.params.get('forceprint') or self.params.get('forcejson'):
2385 self.post_extract(info_dict)
53c18592 2386 for tmpl in self.params.get('forceprint', []):
2387 if re.match(r'\w+$', tmpl):
2388 tmpl = '%({})s'.format(tmpl)
2389 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2390 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2391
d06daf23
S
2392 print_mandatory('title')
2393 print_mandatory('id')
53c18592 2394 print_mandatory('url', 'urls')
d06daf23
S
2395 print_optional('thumbnail')
2396 print_optional('description')
53c18592 2397 print_optional('filename')
d06daf23
S
2398 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2399 self.to_stdout(formatSeconds(info_dict['duration']))
2400 print_mandatory('format')
53c18592 2401
2b8a2973 2402 if self.params.get('forcejson'):
6e84b215 2403 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2404
e8e73840 2405 def dl(self, name, info, subtitle=False, test=False):
2406
2407 if test:
2408 verbose = self.params.get('verbose')
2409 params = {
2410 'test': True,
2411 'quiet': not verbose,
2412 'verbose': verbose,
2413 'noprogress': not verbose,
2414 'nopart': True,
2415 'skip_unavailable_fragments': False,
2416 'keep_fragments': False,
2417 'overwrites': True,
2418 '_no_ytdl_file': True,
2419 }
2420 else:
2421 params = self.params
96fccc10 2422 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2423 if not test:
2424 for ph in self._progress_hooks:
2425 fd.add_progress_hook(ph)
18e674b4 2426 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2427 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2428 new_info = dict(info)
2429 if new_info.get('http_headers') is None:
2430 new_info['http_headers'] = self._calc_headers(new_info)
2431 return fd.download(name, new_info, subtitle)
2432
8222d8de
JMF
2433 def process_info(self, info_dict):
2434 """Process a single resolved IE result."""
2435
2436 assert info_dict.get('_type', 'video') == 'video'
fd288278 2437
0202b52a 2438 info_dict.setdefault('__postprocessors', [])
2439
fd288278
PH
2440 max_downloads = self.params.get('max_downloads')
2441 if max_downloads is not None:
2442 if self._num_downloads >= int(max_downloads):
2443 raise MaxDownloadsReached()
8222d8de 2444
d06daf23 2445 # TODO: backward compatibility, to be removed
8222d8de 2446 info_dict['fulltitle'] = info_dict['title']
8222d8de 2447
4513a41a 2448 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2449 info_dict['format'] = info_dict['ext']
2450
c77495e3 2451 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2452 return
2453
277d6ff5 2454 self.post_extract(info_dict)
fd288278 2455 self._num_downloads += 1
8222d8de 2456
dcf64d43 2457 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2458 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2459 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2460 files_to_move = {}
8222d8de
JMF
2461
2462 # Forced printings
4513a41a 2463 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2464
b7b04c78 2465 if self.params.get('simulate'):
2d30509f 2466 if self.params.get('force_write_download_archive', False):
2467 self.record_download_archive(info_dict)
2468
2469 # Do nothing else if in simulate mode
8222d8de
JMF
2470 return
2471
de6000d9 2472 if full_filename is None:
8222d8de
JMF
2473 return
2474
e92caff5 2475 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2476 return
e92caff5 2477 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2478 return
2479
2480 if self.params.get('writedescription', False):
de6000d9 2481 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2482 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2483 return
0c3d0f51 2484 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2485 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2486 elif info_dict.get('description') is None:
2487 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2488 else:
2489 try:
6febd1c1 2490 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2491 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2492 descfile.write(info_dict['description'])
7b6fefc9 2493 except (OSError, IOError):
6febd1c1 2494 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2495 return
8222d8de 2496
1fb07d10 2497 if self.params.get('writeannotations', False):
de6000d9 2498 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2499 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2500 return
0c3d0f51 2501 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2502 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2503 elif not info_dict.get('annotations'):
2504 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2505 else:
2506 try:
6febd1c1 2507 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2508 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2509 annofile.write(info_dict['annotations'])
2510 except (KeyError, TypeError):
6febd1c1 2511 self.report_warning('There are no annotations to write.')
7b6fefc9 2512 except (OSError, IOError):
6febd1c1 2513 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2514 return
1fb07d10 2515
c4a91be7 2516 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2517 self.params.get('writeautomaticsub')])
c4a91be7 2518
c84dd8a9 2519 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2520 # subtitles download errors are already managed as troubles in relevant IE
2521 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2522 subtitles = info_dict['requested_subtitles']
fa57af1e 2523 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2524 for sub_lang, sub_info in subtitles.items():
2525 sub_format = sub_info['ext']
56d868db 2526 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2527 sub_filename_final = subtitles_filename(
2528 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2529 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2530 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2531 sub_info['filepath'] = sub_filename
0202b52a 2532 files_to_move[sub_filename] = sub_filename_final
a504ced0 2533 else:
0c9df79e 2534 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2535 if sub_info.get('data') is not None:
2536 try:
2537 # Use newline='' to prevent conversion of newline characters
067aa17e 2538 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2539 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2540 subfile.write(sub_info['data'])
dcf64d43 2541 sub_info['filepath'] = sub_filename
0202b52a 2542 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2543 except (OSError, IOError):
2544 self.report_error('Cannot write subtitles file ' + sub_filename)
2545 return
7b6fefc9 2546 else:
5ff1bc0c 2547 try:
e8e73840 2548 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2549 sub_info['filepath'] = sub_filename
0202b52a 2550 files_to_move[sub_filename] = sub_filename_final
fe346461 2551 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2552 self.report_warning('Unable to download subtitle for "%s": %s' %
2553 (sub_lang, error_to_compat_str(err)))
2554 continue
8222d8de 2555
8222d8de 2556 if self.params.get('writeinfojson', False):
de6000d9 2557 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2558 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2559 return
0c3d0f51 2560 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2561 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2562 else:
66c935fb 2563 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2564 try:
8012d892 2565 write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2566 except (OSError, IOError):
66c935fb 2567 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2568 return
de6000d9 2569 info_dict['__infojson_filename'] = infofn
8222d8de 2570
56d868db 2571 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2572 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2573 thumb_filename = replace_extension(
2574 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2575 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2576
732044af 2577 # Write internet shortcut files
2578 url_link = webloc_link = desktop_link = False
2579 if self.params.get('writelink', False):
2580 if sys.platform == "darwin": # macOS.
2581 webloc_link = True
2582 elif sys.platform.startswith("linux"):
2583 desktop_link = True
2584 else: # if sys.platform in ['win32', 'cygwin']:
2585 url_link = True
2586 if self.params.get('writeurllink', False):
2587 url_link = True
2588 if self.params.get('writewebloclink', False):
2589 webloc_link = True
2590 if self.params.get('writedesktoplink', False):
2591 desktop_link = True
2592
2593 if url_link or webloc_link or desktop_link:
2594 if 'webpage_url' not in info_dict:
2595 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2596 return
2597 ascii_url = iri_to_uri(info_dict['webpage_url'])
2598
2599 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2600 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2601 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2602 self.to_screen('[info] Internet shortcut is already present')
2603 else:
2604 try:
2605 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2606 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2607 template_vars = {'url': ascii_url}
2608 if embed_filename:
2609 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2610 linkfile.write(template % template_vars)
2611 except (OSError, IOError):
2612 self.report_error('Cannot write internet shortcut ' + linkfn)
2613 return False
2614 return True
2615
2616 if url_link:
2617 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2618 return
2619 if webloc_link:
2620 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2621 return
2622 if desktop_link:
2623 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2624 return
2625
56d868db 2626 try:
2627 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2628 except PostProcessingError as err:
2629 self.report_error('Preprocessing: %s' % str(err))
2630 return
2631
732044af 2632 must_record_download_archive = False
56d868db 2633 if self.params.get('skip_download', False):
2634 info_dict['filepath'] = temp_filename
2635 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2636 info_dict['__files_to_move'] = files_to_move
2637 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2638 else:
2639 # Download
4340deca 2640 try:
0202b52a 2641
6b591b29 2642 def existing_file(*filepaths):
2643 ext = info_dict.get('ext')
2644 final_ext = self.params.get('final_ext', ext)
2645 existing_files = []
2646 for file in orderedSet(filepaths):
2647 if final_ext != ext:
2648 converted = replace_extension(file, final_ext, ext)
2649 if os.path.exists(encodeFilename(converted)):
2650 existing_files.append(converted)
2651 if os.path.exists(encodeFilename(file)):
2652 existing_files.append(file)
2653
2654 if not existing_files or self.params.get('overwrites', False):
2655 for file in orderedSet(existing_files):
2656 self.report_file_delete(file)
2657 os.remove(encodeFilename(file))
2658 return None
2659
2660 self.report_file_already_downloaded(existing_files[0])
2661 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2662 return existing_files[0]
0202b52a 2663
2664 success = True
4340deca 2665 if info_dict.get('requested_formats') is not None:
81cd954a
S
2666
2667 def compatible_formats(formats):
d03cfdce 2668 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2669 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2670 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2671 if len(video_formats) > 2 or len(audio_formats) > 2:
2672 return False
2673
81cd954a 2674 # Check extension
d03cfdce 2675 exts = set(format.get('ext') for format in formats)
2676 COMPATIBLE_EXTS = (
2677 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2678 set(('webm',)),
2679 )
2680 for ext_sets in COMPATIBLE_EXTS:
2681 if ext_sets.issuperset(exts):
2682 return True
81cd954a
S
2683 # TODO: Check acodec/vcodec
2684 return False
2685
2686 requested_formats = info_dict['requested_formats']
0202b52a 2687 old_ext = info_dict['ext']
3b297919 2688 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2689 info_dict['ext'] = 'mkv'
2690 self.report_warning(
2691 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2692 new_ext = info_dict['ext']
0202b52a 2693
124bc071 2694 def correct_ext(filename, ext=new_ext):
96fccc10 2695 if filename == '-':
2696 return filename
0202b52a 2697 filename_real_ext = os.path.splitext(filename)[1][1:]
2698 filename_wo_ext = (
2699 os.path.splitext(filename)[0]
124bc071 2700 if filename_real_ext in (old_ext, new_ext)
0202b52a 2701 else filename)
124bc071 2702 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2703
38c6902b 2704 # Ensure filename always has a correct extension for successful merge
0202b52a 2705 full_filename = correct_ext(full_filename)
2706 temp_filename = correct_ext(temp_filename)
2707 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2708 info_dict['__real_download'] = False
18e674b4 2709
2710 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2711 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2712 info_dict['protocol'] = _protocols.pop()
dbf5416a 2713 directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2714 if dl_filename is not None:
2715 pass
96fccc10 2716 elif (directly_mergable and get_suitable_downloader(
a46a815b 2717 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2718 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2719 success, real_download = self.dl(temp_filename, info_dict)
2720 info_dict['__real_download'] = real_download
18e674b4 2721 else:
2722 downloaded = []
2723 merger = FFmpegMergerPP(self)
2724 if self.params.get('allow_unplayable_formats'):
2725 self.report_warning(
2726 'You have requested merging of multiple formats '
2727 'while also allowing unplayable formats to be downloaded. '
2728 'The formats won\'t be merged to prevent data corruption.')
2729 elif not merger.available:
2730 self.report_warning(
2731 'You have requested merging of multiple formats but ffmpeg is not installed. '
2732 'The formats won\'t be merged.')
2733
96fccc10 2734 if temp_filename == '-':
2735 reason = ('using a downloader other than ffmpeg' if directly_mergable
2736 else 'but the formats are incompatible for simultaneous download' if merger.available
2737 else 'but ffmpeg is not installed')
2738 self.report_warning(
2739 f'You have requested downloading multiple formats to stdout {reason}. '
2740 'The formats will be streamed one after the other')
2741 fname = temp_filename
dbf5416a 2742 for f in requested_formats:
2743 new_info = dict(info_dict)
2744 del new_info['requested_formats']
2745 new_info.update(f)
96fccc10 2746 if temp_filename != '-':
124bc071 2747 fname = prepend_extension(
2748 correct_ext(temp_filename, new_info['ext']),
2749 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2750 if not self._ensure_dir_exists(fname):
2751 return
2752 downloaded.append(fname)
dbf5416a 2753 partial_success, real_download = self.dl(fname, new_info)
2754 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2755 success = success and partial_success
2756 if merger.available and not self.params.get('allow_unplayable_formats'):
2757 info_dict['__postprocessors'].append(merger)
2758 info_dict['__files_to_merge'] = downloaded
2759 # Even if there were no downloads, it is being merged only now
2760 info_dict['__real_download'] = True
2761 else:
2762 for file in downloaded:
2763 files_to_move[file] = None
4340deca
P
2764 else:
2765 # Just a single file
0202b52a 2766 dl_filename = existing_file(full_filename, temp_filename)
2767 if dl_filename is None:
e8e73840 2768 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2769 info_dict['__real_download'] = real_download
2770
0202b52a 2771 dl_filename = dl_filename or temp_filename
c571435f 2772 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2773
3158150c 2774 except network_exceptions as err:
7960b056 2775 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2776 return
2777 except (OSError, IOError) as err:
2778 raise UnavailableVideoError(err)
2779 except (ContentTooShortError, ) as err:
2780 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2781 return
8222d8de 2782
de6000d9 2783 if success and full_filename != '-':
f17f8651 2784
fd7cfb64 2785 def fixup():
2786 do_fixup = True
2787 fixup_policy = self.params.get('fixup')
2788 vid = info_dict['id']
2789
2790 if fixup_policy in ('ignore', 'never'):
2791 return
2792 elif fixup_policy == 'warn':
2793 do_fixup = False
f89b3e2d 2794 elif fixup_policy != 'force':
2795 assert fixup_policy in ('detect_or_warn', None)
2796 if not info_dict.get('__real_download'):
2797 do_fixup = False
fd7cfb64 2798
2799 def ffmpeg_fixup(cndn, msg, cls):
2800 if not cndn:
2801 return
2802 if not do_fixup:
2803 self.report_warning(f'{vid}: {msg}')
2804 return
2805 pp = cls(self)
2806 if pp.available:
2807 info_dict['__postprocessors'].append(pp)
2808 else:
2809 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2810
2811 stretched_ratio = info_dict.get('stretched_ratio')
2812 ffmpeg_fixup(
2813 stretched_ratio not in (1, None),
2814 f'Non-uniform pixel ratio {stretched_ratio}',
2815 FFmpegFixupStretchedPP)
2816
2817 ffmpeg_fixup(
2818 (info_dict.get('requested_formats') is None
2819 and info_dict.get('container') == 'm4a_dash'
2820 and info_dict.get('ext') == 'm4a'),
2821 'writing DASH m4a. Only some players support this container',
2822 FFmpegFixupM4aPP)
2823
2824 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2825 if 'protocol' in info_dict else None)
2826 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2827 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2828 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2829
2830 fixup()
8222d8de 2831 try:
23c1a667 2832 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2833 except PostProcessingError as err:
2834 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2835 return
ab8e5e51
AM
2836 try:
2837 for ph in self._post_hooks:
23c1a667 2838 ph(info_dict['filepath'])
ab8e5e51
AM
2839 except Exception as err:
2840 self.report_error('post hooks: %s' % str(err))
2841 return
2d30509f 2842 must_record_download_archive = True
2843
2844 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2845 self.record_download_archive(info_dict)
c3e6ffba 2846 max_downloads = self.params.get('max_downloads')
2847 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2848 raise MaxDownloadsReached()
8222d8de
JMF
2849
2850 def download(self, url_list):
2851 """Download a given list of URLs."""
de6000d9 2852 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2853 if (len(url_list) > 1
2854 and outtmpl != '-'
2855 and '%' not in outtmpl
2856 and self.params.get('max_downloads') != 1):
acd69589 2857 raise SameFileError(outtmpl)
8222d8de
JMF
2858
2859 for url in url_list:
2860 try:
5f6a1245 2861 # It also downloads the videos
61aa5ba3
S
2862 res = self.extract_info(
2863 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2864 except UnavailableVideoError:
6febd1c1 2865 self.report_error('unable to download video')
8222d8de 2866 except MaxDownloadsReached:
8b0d7497 2867 self.to_screen('[info] Maximum number of downloaded files reached')
2868 raise
2869 except ExistingVideoReached:
d83cb531 2870 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2871 raise
2872 except RejectedVideoReached:
d83cb531 2873 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2874 raise
63e0be34
PH
2875 else:
2876 if self.params.get('dump_single_json', False):
277d6ff5 2877 self.post_extract(res)
6e84b215 2878 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2879
2880 return self._download_retcode
2881
1dcc4c0c 2882 def download_with_info_file(self, info_filename):
31bd3925
JMF
2883 with contextlib.closing(fileinput.FileInput(
2884 [info_filename], mode='r',
2885 openhook=fileinput.hook_encoded('utf-8'))) as f:
2886 # FileInput doesn't have a read method, we can't call json.load
8012d892 2887 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2888 try:
2889 self.process_ie_result(info, download=True)
d3f62c19 2890 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2891 webpage_url = info.get('webpage_url')
2892 if webpage_url is not None:
6febd1c1 2893 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2894 return self.download([webpage_url])
2895 else:
2896 raise
2897 return self._download_retcode
1dcc4c0c 2898
cb202fd2 2899 @staticmethod
8012d892 2900 def sanitize_info(info_dict, remove_private_keys=False):
2901 ''' Sanitize the infodict for converting to json '''
6e84b215 2902 info_dict.setdefault('epoch', int(time.time()))
2903 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2904 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2905 if remove_private_keys:
6e84b215 2906 remove_keys |= {
2907 'requested_formats', 'requested_subtitles', 'requested_entries',
2908 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2909 }
ae8f99e6 2910 empty_values = (None, {}, [], set(), tuple())
2911 reject = lambda k, v: k not in keep_keys and (
2912 k.startswith('_') or k in remove_keys or v in empty_values)
2913 else:
ae8f99e6 2914 reject = lambda k, v: k in remove_keys
5226731e 2915 filter_fn = lambda obj: (
b0249bca 2916 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2917 else obj if not isinstance(obj, dict)
ae8f99e6 2918 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2919 return filter_fn(info_dict)
cb202fd2 2920
8012d892 2921 @staticmethod
2922 def filter_requested_info(info_dict, actually_filter=True):
2923 ''' Alias of sanitize_info for backward compatibility '''
2924 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2925
dcf64d43 2926 def run_pp(self, pp, infodict):
5bfa4862 2927 files_to_delete = []
dcf64d43 2928 if '__files_to_move' not in infodict:
2929 infodict['__files_to_move'] = {}
af819c21 2930 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2931 if not files_to_delete:
dcf64d43 2932 return infodict
5bfa4862 2933
2934 if self.params.get('keepvideo', False):
2935 for f in files_to_delete:
dcf64d43 2936 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2937 else:
2938 for old_filename in set(files_to_delete):
2939 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2940 try:
2941 os.remove(encodeFilename(old_filename))
2942 except (IOError, OSError):
2943 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2944 if old_filename in infodict['__files_to_move']:
2945 del infodict['__files_to_move'][old_filename]
2946 return infodict
5bfa4862 2947
277d6ff5 2948 @staticmethod
2949 def post_extract(info_dict):
2950 def actual_post_extract(info_dict):
2951 if info_dict.get('_type') in ('playlist', 'multi_video'):
2952 for video_dict in info_dict.get('entries', {}):
b050d210 2953 actual_post_extract(video_dict or {})
277d6ff5 2954 return
2955
07cce701 2956 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2957 extra = post_extractor().items()
2958 info_dict.update(extra)
07cce701 2959 info_dict.pop('__post_extractor', None)
277d6ff5 2960
4ec82a72 2961 original_infodict = info_dict.get('__original_infodict') or {}
2962 original_infodict.update(extra)
2963 original_infodict.pop('__post_extractor', None)
2964
b050d210 2965 actual_post_extract(info_dict or {})
277d6ff5 2966
56d868db 2967 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2968 info = dict(ie_info)
56d868db 2969 info['__files_to_move'] = files_to_move or {}
2970 for pp in self._pps[key]:
dcf64d43 2971 info = self.run_pp(pp, info)
56d868db 2972 return info, info.pop('__files_to_move', None)
5bfa4862 2973
dcf64d43 2974 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2975 """Run all the postprocessors on the given file."""
2976 info = dict(ie_info)
2977 info['filepath'] = filename
dcf64d43 2978 info['__files_to_move'] = files_to_move or {}
0202b52a 2979
56d868db 2980 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2981 info = self.run_pp(pp, info)
2982 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2983 del info['__files_to_move']
56d868db 2984 for pp in self._pps['after_move']:
dcf64d43 2985 info = self.run_pp(pp, info)
23c1a667 2986 return info
c1c9a79c 2987
5db07df6 2988 def _make_archive_id(self, info_dict):
e9fef7ee
S
2989 video_id = info_dict.get('id')
2990 if not video_id:
2991 return
5db07df6
PH
2992 # Future-proof against any change in case
2993 # and backwards compatibility with prior versions
e9fef7ee 2994 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2995 if extractor is None:
1211bb6d
S
2996 url = str_or_none(info_dict.get('url'))
2997 if not url:
2998 return
e9fef7ee
S
2999 # Try to find matching extractor for the URL and take its ie_key
3000 for ie in self._ies:
1211bb6d 3001 if ie.suitable(url):
e9fef7ee
S
3002 extractor = ie.ie_key()
3003 break
3004 else:
3005 return
d0757229 3006 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3007
3008 def in_download_archive(self, info_dict):
3009 fn = self.params.get('download_archive')
3010 if fn is None:
3011 return False
3012
3013 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3014 if not vid_id:
7012b23c 3015 return False # Incomplete video information
5db07df6 3016
a45e8619 3017 return vid_id in self.archive
c1c9a79c
PH
3018
3019 def record_download_archive(self, info_dict):
3020 fn = self.params.get('download_archive')
3021 if fn is None:
3022 return
5db07df6
PH
3023 vid_id = self._make_archive_id(info_dict)
3024 assert vid_id
c1c9a79c 3025 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3026 archive_file.write(vid_id + '\n')
a45e8619 3027 self.archive.add(vid_id)
dd82ffea 3028
8c51aa65 3029 @staticmethod
8abeeb94 3030 def format_resolution(format, default='unknown'):
fb04e403 3031 if format.get('vcodec') == 'none':
8326b00a 3032 if format.get('acodec') == 'none':
3033 return 'images'
fb04e403 3034 return 'audio only'
f49d89ee
PH
3035 if format.get('resolution') is not None:
3036 return format['resolution']
35615307
DA
3037 if format.get('width') and format.get('height'):
3038 res = '%dx%d' % (format['width'], format['height'])
3039 elif format.get('height'):
3040 res = '%sp' % format['height']
3041 elif format.get('width'):
388ae76b 3042 res = '%dx?' % format['width']
8c51aa65 3043 else:
8abeeb94 3044 res = default
8c51aa65
JMF
3045 return res
3046
c57f7757
PH
3047 def _format_note(self, fdict):
3048 res = ''
3049 if fdict.get('ext') in ['f4f', 'f4m']:
3050 res += '(unsupported) '
32f90364
PH
3051 if fdict.get('language'):
3052 if res:
3053 res += ' '
9016d76f 3054 res += '[%s] ' % fdict['language']
c57f7757
PH
3055 if fdict.get('format_note') is not None:
3056 res += fdict['format_note'] + ' '
3057 if fdict.get('tbr') is not None:
3058 res += '%4dk ' % fdict['tbr']
3059 if fdict.get('container') is not None:
3060 if res:
3061 res += ', '
3062 res += '%s container' % fdict['container']
3089bc74
S
3063 if (fdict.get('vcodec') is not None
3064 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3065 if res:
3066 res += ', '
3067 res += fdict['vcodec']
91c7271a 3068 if fdict.get('vbr') is not None:
c57f7757
PH
3069 res += '@'
3070 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3071 res += 'video@'
3072 if fdict.get('vbr') is not None:
3073 res += '%4dk' % fdict['vbr']
fbb21cf5 3074 if fdict.get('fps') is not None:
5d583bdf
S
3075 if res:
3076 res += ', '
3077 res += '%sfps' % fdict['fps']
c57f7757
PH
3078 if fdict.get('acodec') is not None:
3079 if res:
3080 res += ', '
3081 if fdict['acodec'] == 'none':
3082 res += 'video only'
3083 else:
3084 res += '%-5s' % fdict['acodec']
3085 elif fdict.get('abr') is not None:
3086 if res:
3087 res += ', '
3088 res += 'audio'
3089 if fdict.get('abr') is not None:
3090 res += '@%3dk' % fdict['abr']
3091 if fdict.get('asr') is not None:
3092 res += ' (%5dHz)' % fdict['asr']
3093 if fdict.get('filesize') is not None:
3094 if res:
3095 res += ', '
3096 res += format_bytes(fdict['filesize'])
9732d77e
PH
3097 elif fdict.get('filesize_approx') is not None:
3098 if res:
3099 res += ', '
3100 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3101 return res
91c7271a 3102
c57f7757 3103 def list_formats(self, info_dict):
94badb25 3104 formats = info_dict.get('formats', [info_dict])
53ed7066 3105 new_format = (
3106 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3107 and self.params.get('listformats_table', True) is not False)
76d321f6 3108 if new_format:
3109 table = [
3110 [
3111 format_field(f, 'format_id'),
3112 format_field(f, 'ext'),
3113 self.format_resolution(f),
3114 format_field(f, 'fps', '%d'),
3115 '|',
3116 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3117 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3118 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3119 '|',
3120 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3121 format_field(f, 'vbr', '%4dk'),
3122 format_field(f, 'acodec', default='unknown').replace('none', ''),
3123 format_field(f, 'abr', '%3dk'),
3124 format_field(f, 'asr', '%5dHz'),
3f698246 3125 ', '.join(filter(None, (
3126 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3127 format_field(f, 'language', '[%s]'),
3128 format_field(f, 'format_note'),
3129 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3130 ))),
3f698246 3131 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3132 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3133 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3134 else:
3135 table = [
3136 [
3137 format_field(f, 'format_id'),
3138 format_field(f, 'ext'),
3139 self.format_resolution(f),
3140 self._format_note(f)]
3141 for f in formats
3142 if f.get('preference') is None or f['preference'] >= -1000]
3143 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3144
cfb56d1a 3145 self.to_screen(
169dbde9 3146 '[info] Available formats for %s:' % info_dict['id'])
3147 self.to_stdout(render_table(
bc97cdae 3148 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3149
3150 def list_thumbnails(self, info_dict):
b0249bca 3151 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3152 if not thumbnails:
b7b72db9 3153 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3154 return
cfb56d1a
PH
3155
3156 self.to_screen(
3157 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3158 self.to_stdout(render_table(
cfb56d1a
PH
3159 ['ID', 'width', 'height', 'URL'],
3160 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3161
360e1ca5 3162 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3163 if not subtitles:
360e1ca5 3164 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3165 return
a504ced0 3166 self.to_screen(
edab9dbf 3167 'Available %s for %s:' % (name, video_id))
2412044c 3168
3169 def _row(lang, formats):
49c258e1 3170 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3171 if len(set(names)) == 1:
7aee40c1 3172 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3173 return [lang, ', '.join(names), ', '.join(exts)]
3174
169dbde9 3175 self.to_stdout(render_table(
2412044c 3176 ['Language', 'Name', 'Formats'],
3177 [_row(lang, formats) for lang, formats in subtitles.items()],
3178 hideEmpty=True))
a504ced0 3179
dca08720
PH
3180 def urlopen(self, req):
3181 """ Start an HTTP download """
82d8a8b6 3182 if isinstance(req, compat_basestring):
67dda517 3183 req = sanitized_Request(req)
19a41fc6 3184 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3185
3186 def print_debug_header(self):
3187 if not self.params.get('verbose'):
3188 return
62fec3b2 3189
4192b51c 3190 if type('') is not compat_str:
067aa17e 3191 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3192 self.report_warning(
3193 'Your Python is broken! Update to a newer and supported version')
3194
c6afed48
PH
3195 stdout_encoding = getattr(
3196 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3197 encoding_str = (
734f90bb
PH
3198 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3199 locale.getpreferredencoding(),
3200 sys.getfilesystemencoding(),
c6afed48 3201 stdout_encoding,
b0472057 3202 self.get_encoding()))
4192b51c 3203 write_string(encoding_str, encoding=None)
734f90bb 3204
e5813e53 3205 source = (
3206 '(exe)' if hasattr(sys, 'frozen')
3207 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3208 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3209 else '')
3210 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3211 if _LAZY_LOADER:
f74980cb 3212 self._write_string('[debug] Lazy loading extractors enabled\n')
3213 if _PLUGIN_CLASSES:
3214 self._write_string(
3215 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3216 if self.params.get('compat_opts'):
3217 self._write_string(
3218 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3219 try:
3220 sp = subprocess.Popen(
3221 ['git', 'rev-parse', '--short', 'HEAD'],
3222 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3223 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3224 out, err = process_communicate_or_kill(sp)
dca08720
PH
3225 out = out.decode().strip()
3226 if re.match('[0-9a-f]+', out):
f74980cb 3227 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3228 except Exception:
dca08720
PH
3229 try:
3230 sys.exc_clear()
70a1165b 3231 except Exception:
dca08720 3232 pass
b300cda4
S
3233
3234 def python_implementation():
3235 impl_name = platform.python_implementation()
3236 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3237 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3238 return impl_name
3239
e5813e53 3240 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3241 platform.python_version(),
3242 python_implementation(),
3243 platform.architecture()[0],
b300cda4 3244 platform_name()))
d28b5171 3245
73fac4e9 3246 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3247 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3248 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3249 exe_str = ', '.join(
3250 '%s %s' % (exe, v)
3251 for exe, v in sorted(exe_versions.items())
3252 if v
3253 )
3254 if not exe_str:
3255 exe_str = 'none'
3256 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3257
3258 proxy_map = {}
3259 for handler in self._opener.handlers:
3260 if hasattr(handler, 'proxies'):
3261 proxy_map.update(handler.proxies)
734f90bb 3262 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3263
58b1f00d
PH
3264 if self.params.get('call_home', False):
3265 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3266 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3267 return
58b1f00d
PH
3268 latest_version = self.urlopen(
3269 'https://yt-dl.org/latest/version').read().decode('utf-8')
3270 if version_tuple(latest_version) > version_tuple(__version__):
3271 self.report_warning(
3272 'You are using an outdated version (newest version: %s)! '
3273 'See https://yt-dl.org/update if you need help updating.' %
3274 latest_version)
3275
e344693b 3276 def _setup_opener(self):
6ad14cab 3277 timeout_val = self.params.get('socket_timeout')
19a41fc6 3278 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3279
982ee69a 3280 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3281 opts_cookiefile = self.params.get('cookiefile')
3282 opts_proxy = self.params.get('proxy')
3283
982ee69a 3284 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3285
6a3f4c3f 3286 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3287 if opts_proxy is not None:
3288 if opts_proxy == '':
3289 proxies = {}
3290 else:
3291 proxies = {'http': opts_proxy, 'https': opts_proxy}
3292 else:
3293 proxies = compat_urllib_request.getproxies()
067aa17e 3294 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3295 if 'http' in proxies and 'https' not in proxies:
3296 proxies['https'] = proxies['http']
91410c9b 3297 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3298
3299 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3300 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3301 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3302 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3303 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3304
3305 # When passing our own FileHandler instance, build_opener won't add the
3306 # default FileHandler and allows us to disable the file protocol, which
3307 # can be used for malicious purposes (see
067aa17e 3308 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3309 file_handler = compat_urllib_request.FileHandler()
3310
3311 def file_open(*args, **kwargs):
7a5c1cfe 3312 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3313 file_handler.file_open = file_open
3314
3315 opener = compat_urllib_request.build_opener(
fca6dba8 3316 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3317
dca08720
PH
3318 # Delete the default user-agent header, which would otherwise apply in
3319 # cases where our custom HTTP handler doesn't come into play
067aa17e 3320 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3321 opener.addheaders = []
3322 self._opener = opener
62fec3b2
PH
3323
3324 def encode(self, s):
3325 if isinstance(s, bytes):
3326 return s # Already encoded
3327
3328 try:
3329 return s.encode(self.get_encoding())
3330 except UnicodeEncodeError as err:
3331 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3332 raise
3333
3334 def get_encoding(self):
3335 encoding = self.params.get('encoding')
3336 if encoding is None:
3337 encoding = preferredencoding()
3338 return encoding
ec82d85a 3339
de6000d9 3340 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3341 write_all = self.params.get('write_all_thumbnails', False)
3342 thumbnails = []
3343 if write_all or self.params.get('writethumbnail', False):
0202b52a 3344 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3345 multiple = write_all and len(thumbnails) > 1
ec82d85a 3346
0202b52a 3347 ret = []
981052c9 3348 for t in thumbnails[::-1]:
ec82d85a 3349 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3350 suffix = '%s.' % t['id'] if multiple else ''
3351 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3352 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3353
0c3d0f51 3354 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3355 ret.append(suffix + thumb_ext)
8ba87148 3356 t['filepath'] = thumb_filename
ec82d85a
PH
3357 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3358 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3359 else:
5ef7d9bd 3360 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3361 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3362 try:
3363 uf = self.urlopen(t['url'])
d3d89c32 3364 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3365 shutil.copyfileobj(uf, thumbf)
de6000d9 3366 ret.append(suffix + thumb_ext)
ec82d85a
PH
3367 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3368 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3369 t['filepath'] = thumb_filename
3158150c 3370 except network_exceptions as err:
ec82d85a 3371 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3372 (t['url'], error_to_compat_str(err)))
6c4fd172 3373 if ret and not write_all:
3374 break
0202b52a 3375 return ret