]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[tiktok] Use API to fetch higher quality video (#843)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
7d1eb38a 38 compat_shlex_quote,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b 44)
982ee69a 45from .cookies import load_cookies
8c25f81b 46from .utils import (
eedb7ba5
S
47 age_restricted,
48 args_to_str,
ce02ed60
PH
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
acd69589 52 DEFAULT_OUTTMPL,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
498f5606 61 EntryNotInPlaylist,
a06916d9 62 error_to_compat_str,
8b0d7497 63 ExistingVideoReached,
590bc6f6 64 expand_path,
ce02ed60 65 ExtractorError,
e29663c6 66 float_or_none,
02dbf93f 67 format_bytes,
76d321f6 68 format_field,
901130bb 69 STR_FORMAT_RE_TMPL,
70 STR_FORMAT_TYPES,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
b0249bca 73 HEADRequest,
c9969434 74 int_or_none,
732044af 75 iri_to_uri,
773f291d 76 ISO3166Utils,
56a8fb4f 77 LazyList,
ce02ed60 78 locked_file,
0202b52a 79 make_dir,
dca08720 80 make_HTTPS_handler,
ce02ed60 81 MaxDownloadsReached,
3158150c 82 network_exceptions,
cd6fc19e 83 orderedSet,
a06916d9 84 OUTTMPL_TYPES,
b7ab0590 85 PagedList,
083c9df9 86 parse_filesize,
91410c9b 87 PerRequestProxyHandler,
dca08720 88 platform_name,
eedb7ba5 89 PostProcessingError,
ce02ed60 90 preferredencoding,
eedb7ba5 91 prepend_extension,
a06916d9 92 process_communicate_or_kill,
51fb4995 93 register_socks_protocols,
a06916d9 94 RejectedVideoReached,
cfb56d1a 95 render_table,
eedb7ba5 96 replace_extension,
ce02ed60
PH
97 SameFileError,
98 sanitize_filename,
1bb5c511 99 sanitize_path,
dcf77cf1 100 sanitize_url,
67dda517 101 sanitized_Request,
e5660ee6 102 std_headers,
1211bb6d 103 str_or_none,
e29663c6 104 strftime_or_none,
ce02ed60 105 subtitles_filename,
51d9739f 106 ThrottledDownload,
732044af 107 to_high_limit_path,
324ad820 108 traverse_obj,
6033d980 109 try_get,
ce02ed60 110 UnavailableVideoError,
29eb5174 111 url_basename,
7d1eb38a 112 variadic,
58b1f00d 113 version_tuple,
ce02ed60
PH
114 write_json_file,
115 write_string,
6a3f4c3f 116 YoutubeDLCookieProcessor,
dca08720 117 YoutubeDLHandler,
fca6dba8 118 YoutubeDLRedirectHandler,
ce02ed60 119)
a0e07d31 120from .cache import Cache
52a8a1e1 121from .extractor import (
122 gen_extractor_classes,
123 get_info_extractor,
124 _LAZY_LOADER,
125 _PLUGIN_CLASSES
126)
4c54b89e 127from .extractor.openload import PhantomJSwrapper
52a8a1e1 128from .downloader import (
dbf5416a 129 FFmpegFD,
52a8a1e1 130 get_suitable_downloader,
131 shorten_protocol_name
132)
4c83c967 133from .downloader.rtmp import rtmpdump_version
4f026faf 134from .postprocessor import (
e36d50c5 135 get_postprocessor,
136 FFmpegFixupDurationPP,
f17f8651 137 FFmpegFixupM3u8PP,
62cd676c 138 FFmpegFixupM4aPP,
6271f1ca 139 FFmpegFixupStretchedPP,
e36d50c5 140 FFmpegFixupTimestampPP,
4f026faf
PH
141 FFmpegMergerPP,
142 FFmpegPostProcessor,
0202b52a 143 MoveFilesAfterDownloadPP,
4f026faf 144)
dca08720 145from .version import __version__
8222d8de 146
e9c0cdd3
YCH
147if compat_os_name == 'nt':
148 import ctypes
149
2459b6e1 150
8222d8de
JMF
151class YoutubeDL(object):
152 """YoutubeDL class.
153
154 YoutubeDL objects are the ones responsible of downloading the
155 actual video file and writing it to disk if the user has requested
156 it, among some other tasks. In most cases there should be one per
157 program. As, given a video URL, the downloader doesn't know how to
158 extract all the needed information, task that InfoExtractors do, it
159 has to pass the URL to one of them.
160
161 For this, YoutubeDL objects have a method that allows
162 InfoExtractors to be registered in a given order. When it is passed
163 a URL, the YoutubeDL object handles it to the first InfoExtractor it
164 finds that reports being able to handle it. The InfoExtractor extracts
165 all the information about the video or videos the URL refers to, and
166 YoutubeDL process the extracted information, possibly using a File
167 Downloader to download the video.
168
169 YoutubeDL objects accept a lot of parameters. In order not to saturate
170 the object constructor with arguments, it receives a dictionary of
171 options instead. These options are available through the params
172 attribute for the InfoExtractors to use. The YoutubeDL also
173 registers itself as the downloader in charge for the InfoExtractors
174 that are added to it, so this is a "mutual registration".
175
176 Available options:
177
178 username: Username for authentication purposes.
179 password: Password for authentication purposes.
180940e0 180 videopassword: Password for accessing a video.
1da50aa3
S
181 ap_mso: Adobe Pass multiple-system operator identifier.
182 ap_username: Multiple-system operator account username.
183 ap_password: Multiple-system operator account password.
8222d8de
JMF
184 usenetrc: Use netrc for authentication instead.
185 verbose: Print additional info to stdout.
186 quiet: Do not print messages to stdout.
ad8915b7 187 no_warnings: Do not print out anything for warnings.
53c18592 188 forceprint: A list of templates to force print
189 forceurl: Force printing final URL. (Deprecated)
190 forcetitle: Force printing title. (Deprecated)
191 forceid: Force printing ID. (Deprecated)
192 forcethumbnail: Force printing thumbnail URL. (Deprecated)
193 forcedescription: Force printing description. (Deprecated)
194 forcefilename: Force printing final filename. (Deprecated)
195 forceduration: Force printing duration. (Deprecated)
8694c600 196 forcejson: Force printing info_dict as JSON.
63e0be34
PH
197 dump_single_json: Force printing the info_dict of the whole playlist
198 (or video) as a single JSON line.
c25228e5 199 force_write_download_archive: Force writing download archive regardless
200 of 'skip_download' or 'simulate'.
b7b04c78 201 simulate: Do not download the video files. If unset (or None),
202 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 203 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
c25228e5 208 format_sort: How to sort the video formats. see "Sorting Formats"
209 for more details.
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
211 for more details.
212 allow_multiple_video_streams: Allow multiple video streams to be merged
213 into a single file
214 allow_multiple_audio_streams: Allow multiple audio streams to be merged
215 into a single file
0ba692ac 216 check_formats Whether to test if the formats are downloadable.
217 Can be True (check all), False (check none)
218 or None (check only if requested by extractor)
4524baf0 219 paths: Dictionary of output paths. The allowed keys are 'home'
220 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 221 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 222 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 223 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
224 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
225 restrictfilenames: Do not allow "&" and spaces in file names
226 trim_file_name: Limit length of filename (extension excluded)
4524baf0 227 windowsfilenames: Force the filenames to be windows compatible
a820dc72 228 ignoreerrors: Do not stop on download errors
7a5c1cfe 229 (Default True when running yt-dlp,
a820dc72 230 but False when directly accessing YoutubeDL class)
26e2805c 231 skip_playlist_after_errors: Number of allowed failures until the rest of
232 the playlist is skipped
d22dec74 233 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 234 overwrites: Overwrite all video and metadata files if True,
235 overwrite only non-video files if None
236 and don't overwrite any file if False
34488702 237 For compatibility with youtube-dl,
238 "nooverwrites" may also be used instead
8222d8de
JMF
239 playliststart: Playlist item to start at.
240 playlistend: Playlist item to end at.
c14e88f0 241 playlist_items: Specific indices of playlist to download.
ff815fe6 242 playlistreverse: Download playlist items in reverse order.
75822ca7 243 playlistrandom: Download playlist items in random order.
8222d8de
JMF
244 matchtitle: Download only matching titles.
245 rejecttitle: Reject downloads for matching titles.
8bf9319e 246 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
247 logtostderr: Log messages to stderr instead of stdout.
248 writedescription: Write the video description to a .description file
249 writeinfojson: Write the video description to a .info.json file
75d43ca0 250 clean_infojson: Remove private fields from the infojson
34488702 251 getcomments: Extract video comments. This will not be written to disk
06167fbb 252 unless writeinfojson is also given
1fb07d10 253 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 254 writethumbnail: Write the thumbnail image to a file
c25228e5 255 allow_playlist_files: Whether to write playlists' description, infojson etc
256 also to disk when using the 'write*' options
ec82d85a 257 write_all_thumbnails: Write all thumbnail formats to files
732044af 258 writelink: Write an internet shortcut file, depending on the
259 current platform (.url/.webloc/.desktop)
260 writeurllink: Write a Windows internet shortcut file (.url)
261 writewebloclink: Write a macOS internet shortcut file (.webloc)
262 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 263 writesubtitles: Write the video subtitles to a file
741dd8ea 264 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 265 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 266 Downloads all the subtitles of the video
0b7f3118 267 (requires writesubtitles or writeautomaticsub)
8222d8de 268 listsubtitles: Lists all available subtitles for the video
a504ced0 269 subtitlesformat: The format code for subtitles
c32b0aab 270 subtitleslangs: List of languages of the subtitles to download (can be regex).
271 The list may contain "all" to refer to all the available
272 subtitles. The language can be prefixed with a "-" to
273 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
274 keepvideo: Keep the video file after post-processing
275 daterange: A DateRange object, download only if the upload_date is in the range.
276 skip_download: Skip the actual download of the video file
c35f9e72 277 cachedir: Location of the cache files in the filesystem.
a0e07d31 278 False to disable filesystem cache.
47192f92 279 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
280 age_limit: An integer representing the user's age in years.
281 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
282 min_views: An integer representing the minimum view count the video
283 must have in order to not be skipped.
284 Videos without view count information are always
285 downloaded. None for no limit.
286 max_views: An integer representing the maximum view count.
287 Videos that are more popular than that are not
288 downloaded.
289 Videos without view count information are always
290 downloaded. None for no limit.
291 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
292 Videos already present in the file are not downloaded
293 again.
8a51f564 294 break_on_existing: Stop the download process after attempting to download a
295 file that is in the archive.
296 break_on_reject: Stop the download process when encountering a video that
297 has been filtered out.
298 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
299 cookiesfrombrowser: A tuple containing the name of the browser and the profile
300 name/path from where cookies are loaded.
301 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 302 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
303 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
304 At the moment, this is only supported by YouTube.
a1ee09e8 305 proxy: URL of the proxy server to use
38cce791 306 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 307 on geo-restricted sites.
e344693b 308 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
309 bidi_workaround: Work around buggy terminals without bidirectional text
310 support, using fridibi
a0ddb8a2 311 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 312 include_ads: Download ads as well
04b4d394
PH
313 default_search: Prepend this string if an input url is not valid.
314 'auto' for elaborate guessing
62fec3b2 315 encoding: Use this encoding instead of the system-specified.
e8ee972c 316 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
317 Pass in 'in_playlist' to only show this behavior for
318 playlist items.
4f026faf 319 postprocessors: A list of dictionaries, each with an entry
71b640cc 320 * key: The name of the postprocessor. See
7a5c1cfe 321 yt_dlp/postprocessor/__init__.py for a list.
56d868db 322 * when: When to run the postprocessor. Can be one of
323 pre_process|before_dl|post_process|after_move.
324 Assumed to be 'post_process' if not given
ab8e5e51
AM
325 post_hooks: A list of functions that get called as the final step
326 for each video file, after all postprocessors have been
327 called. The filename will be passed as the only argument.
71b640cc
PH
328 progress_hooks: A list of functions that get called on download
329 progress, with a dictionary with the entries
5cda4eda 330 * status: One of "downloading", "error", or "finished".
ee69b99a 331 Check this first and ignore unknown values.
3ba7740d 332 * info_dict: The extracted info_dict
71b640cc 333
5cda4eda 334 If status is one of "downloading", or "finished", the
ee69b99a
PH
335 following properties may also be present:
336 * filename: The final filename (always present)
5cda4eda 337 * tmpfilename: The filename we're currently writing to
71b640cc
PH
338 * downloaded_bytes: Bytes on disk
339 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
340 * total_bytes_estimate: Guess of the eventual file size,
341 None if unavailable.
342 * elapsed: The number of seconds since download started.
71b640cc
PH
343 * eta: The estimated time in seconds, None if unknown
344 * speed: The download speed in bytes/second, None if
345 unknown
5cda4eda
PH
346 * fragment_index: The counter of the currently
347 downloaded video fragment.
348 * fragment_count: The number of fragments (= individual
349 files that will be merged)
71b640cc
PH
350
351 Progress hooks are guaranteed to be called at least once
352 (with status "finished") if the download is successful.
45598f15 353 merge_output_format: Extension to use when merging formats.
6b591b29 354 final_ext: Expected final extension; used to detect when the file was
355 already downloaded and converted. "merge_output_format" is
356 replaced by this extension when given
6271f1ca
PH
357 fixup: Automatically correct known faults of the file.
358 One of:
359 - "never": do nothing
360 - "warn": only emit a warning
361 - "detect_or_warn": check whether we can do anything
62cd676c 362 about it, warn otherwise (default)
504f20dd 363 source_address: Client-side IP address to bind to.
6ec6cb4e 364 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 365 yt-dlp servers for debugging. (BROKEN)
1cf376f5 366 sleep_interval_requests: Number of seconds to sleep between requests
367 during extraction
7aa589a5
S
368 sleep_interval: Number of seconds to sleep before each download when
369 used alone or a lower bound of a range for randomized
370 sleep before each download (minimum possible number
371 of seconds to sleep) when used along with
372 max_sleep_interval.
373 max_sleep_interval:Upper bound of a range for randomized sleep before each
374 download (maximum possible number of seconds to sleep).
375 Must only be used along with sleep_interval.
376 Actual sleep time will be a random float from range
377 [sleep_interval; max_sleep_interval].
1cf376f5 378 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
379 listformats: Print an overview of available video formats and exit.
380 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
381 match_filter: A function that gets called with the info_dict of
382 every video.
383 If it returns a message, the video is ignored.
384 If it returns None, the video is downloaded.
385 match_filter_func in utils.py is one example for this.
7e5db8c9 386 no_color: Do not emit color codes in output.
0a840f58 387 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 388 HTTP header
0a840f58 389 geo_bypass_country:
773f291d
S
390 Two-letter ISO 3166-2 country code that will be used for
391 explicit geographic restriction bypassing via faking
504f20dd 392 X-Forwarded-For HTTP header
5f95927a
S
393 geo_bypass_ip_block:
394 IP range in CIDR notation that will be used similarly to
504f20dd 395 geo_bypass_country
71b640cc 396
85729c51 397 The following options determine which downloader is picked:
52a8a1e1 398 external_downloader: A dictionary of protocol keys and the executable of the
399 external downloader to use for it. The allowed protocols
400 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
401 Set the value to 'native' to use the native downloader
402 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
403 or {'m3u8': 'ffmpeg'} instead.
404 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
405 if True, otherwise use ffmpeg/avconv if False, otherwise
406 use downloader suggested by extractor if None.
53ed7066 407 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 408 The following options do not work when used through the API:
409 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 410 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 411 Refer __init__.py for their implementation
fe7e0c98 412
8222d8de 413 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 414 the downloader (see yt_dlp/downloader/common.py):
51d9739f 415 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
416 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
417 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
418
419 The following options are used by the post processors:
d4a24f40 420 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 421 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
422 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
423 to the binary or its containing directory.
43820c03 424 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 425 and a list of additional command-line arguments for the
426 postprocessor/executable. The dict can also have "PP+EXE" keys
427 which are used when the given exe is used by the given PP.
428 Use 'default' as the name for arguments to passed to all PP
429 For compatibility with youtube-dl, a single list of args
430 can also be used
e409895f 431
432 The following options are used by the extractors:
62bff2c1 433 extractor_retries: Number of times to retry for known errors
434 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 435 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 436 discontinuities such as ad breaks (default: False)
5d3a0e79 437 extractor_args: A dictionary of arguments to be passed to the extractors.
438 See "EXTRACTOR ARGUMENTS" for details.
439 Eg: {'youtube': {'skip': ['dash', 'hls']}}
440 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
441 If True (default), DASH manifests and related
62bff2c1 442 data will be downloaded and processed by extractor.
443 You can reduce network I/O by disabling it if you don't
444 care about DASH. (only for youtube)
5d3a0e79 445 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
446 If True (default), HLS manifests and related
62bff2c1 447 data will be downloaded and processed by extractor.
448 You can reduce network I/O by disabling it if you don't
449 care about HLS. (only for youtube)
8222d8de
JMF
450 """
451
c9969434
S
452 _NUMERIC_FIELDS = set((
453 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
454 'timestamp', 'upload_year', 'upload_month', 'upload_day',
455 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
456 'average_rating', 'comment_count', 'age_limit',
457 'start_time', 'end_time',
458 'chapter_number', 'season_number', 'episode_number',
459 'track_number', 'disc_number', 'release_year',
460 'playlist_index',
461 ))
462
8222d8de 463 params = None
8b7491c8 464 _ies = {}
56d868db 465 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 466 _printed_messages = set()
1cf376f5 467 _first_webpage_request = True
8222d8de
JMF
468 _download_retcode = None
469 _num_downloads = None
30a074c2 470 _playlist_level = 0
471 _playlist_urls = set()
8222d8de
JMF
472 _screen_file = None
473
3511266b 474 def __init__(self, params=None, auto_init=True):
8222d8de 475 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
476 if params is None:
477 params = {}
8b7491c8 478 self._ies = {}
56c73665 479 self._ies_instances = {}
56d868db 480 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 481 self._printed_messages = set()
1cf376f5 482 self._first_webpage_request = True
ab8e5e51 483 self._post_hooks = []
933605d7 484 self._progress_hooks = []
8222d8de
JMF
485 self._download_retcode = 0
486 self._num_downloads = 0
487 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 488 self._err_file = sys.stderr
4abf617b
S
489 self.params = {
490 # Default parameters
491 'nocheckcertificate': False,
492 }
493 self.params.update(params)
a0e07d31 494 self.cache = Cache(self)
34308b30 495
a61f4b28 496 if sys.version_info < (3, 6):
497 self.report_warning(
0181adef 498 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 499
88acdbc2 500 if self.params.get('allow_unplayable_formats'):
501 self.report_warning(
502 'You have asked for unplayable formats to be listed/downloaded. '
503 'This is a developer option intended for debugging. '
504 'If you experience any issues while using this option, DO NOT open a bug report')
505
be5df5ee
S
506 def check_deprecated(param, option, suggestion):
507 if self.params.get(param) is not None:
53ed7066 508 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
509 return True
510 return False
511
512 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
513 if self.params.get('geo_verification_proxy') is None:
514 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
515
0d1bb027 516 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
517 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 518 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 519
520 for msg in self.params.get('warnings', []):
521 self.report_warning(msg)
522
b868936c 523 if self.params.get('overwrites') is None:
524 self.params.pop('overwrites', None)
525 elif self.params.get('nooverwrites') is not None:
526 # nooverwrites was unnecessarily changed to overwrites
527 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
528 # This ensures compatibility with both keys
529 self.params['overwrites'] = not self.params['nooverwrites']
530 else:
531 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 532
0783b09b 533 if params.get('bidi_workaround', False):
1c088fa8
PH
534 try:
535 import pty
536 master, slave = pty.openpty()
003c69a8 537 width = compat_get_terminal_size().columns
1c088fa8
PH
538 if width is None:
539 width_args = []
540 else:
541 width_args = ['-w', str(width)]
5d681e96 542 sp_kwargs = dict(
1c088fa8
PH
543 stdin=subprocess.PIPE,
544 stdout=slave,
545 stderr=self._err_file)
5d681e96
PH
546 try:
547 self._output_process = subprocess.Popen(
548 ['bidiv'] + width_args, **sp_kwargs
549 )
550 except OSError:
5d681e96
PH
551 self._output_process = subprocess.Popen(
552 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
553 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 554 except OSError as ose:
66e7ace1 555 if ose.errno == errno.ENOENT:
6febd1c1 556 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
557 else:
558 raise
0783b09b 559
3089bc74
S
560 if (sys.platform != 'win32'
561 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
562 and not params.get('restrictfilenames', False)):
e9137224 563 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 564 self.report_warning(
6febd1c1 565 'Assuming --restrict-filenames since file system encoding '
1b725173 566 'cannot encode all characters. '
6febd1c1 567 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 568 self.params['restrictfilenames'] = True
34308b30 569
de6000d9 570 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 571
187986a8 572 # Creating format selector here allows us to catch syntax errors before the extraction
573 self.format_selector = (
574 None if self.params.get('format') is None
575 else self.build_format_selector(self.params['format']))
576
dca08720
PH
577 self._setup_opener()
578
4cd0a709 579 """Preload the archive, if any is specified"""
580 def preload_download_archive(fn):
581 if fn is None:
582 return False
0760b0a7 583 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 584 try:
585 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
586 for line in archive_file:
587 self.archive.add(line.strip())
588 except IOError as ioe:
589 if ioe.errno != errno.ENOENT:
590 raise
591 return False
592 return True
593
594 self.archive = set()
595 preload_download_archive(self.params.get('download_archive'))
596
3511266b
PH
597 if auto_init:
598 self.print_debug_header()
599 self.add_default_info_extractors()
600
4f026faf 601 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 602 pp_def = dict(pp_def_raw)
fd7cfb64 603 when = pp_def.pop('when', 'post_process')
604 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 605 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 606 self.add_post_processor(pp, when=when)
4f026faf 607
ab8e5e51
AM
608 for ph in self.params.get('post_hooks', []):
609 self.add_post_hook(ph)
610
71b640cc
PH
611 for ph in self.params.get('progress_hooks', []):
612 self.add_progress_hook(ph)
613
51fb4995
YCH
614 register_socks_protocols()
615
7d4111ed
PH
616 def warn_if_short_id(self, argv):
617 # short YouTube ID starting with dash?
618 idxs = [
619 i for i, a in enumerate(argv)
620 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
621 if idxs:
622 correct_argv = (
7a5c1cfe 623 ['yt-dlp']
3089bc74
S
624 + [a for i, a in enumerate(argv) if i not in idxs]
625 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
626 )
627 self.report_warning(
628 'Long argument string detected. '
629 'Use -- to separate parameters and URLs, like this:\n%s\n' %
630 args_to_str(correct_argv))
631
8222d8de
JMF
632 def add_info_extractor(self, ie):
633 """Add an InfoExtractor object to the end of the list."""
8b7491c8 634 ie_key = ie.ie_key()
635 self._ies[ie_key] = ie
e52d7f85 636 if not isinstance(ie, type):
8b7491c8 637 self._ies_instances[ie_key] = ie
e52d7f85 638 ie.set_downloader(self)
8222d8de 639
8b7491c8 640 def _get_info_extractor_class(self, ie_key):
641 ie = self._ies.get(ie_key)
642 if ie is None:
643 ie = get_info_extractor(ie_key)
644 self.add_info_extractor(ie)
645 return ie
646
56c73665
JMF
647 def get_info_extractor(self, ie_key):
648 """
649 Get an instance of an IE with name ie_key, it will try to get one from
650 the _ies list, if there's no instance it will create a new one and add
651 it to the extractor list.
652 """
653 ie = self._ies_instances.get(ie_key)
654 if ie is None:
655 ie = get_info_extractor(ie_key)()
656 self.add_info_extractor(ie)
657 return ie
658
023fa8c4
JMF
659 def add_default_info_extractors(self):
660 """
661 Add the InfoExtractors returned by gen_extractors to the end of the list
662 """
e52d7f85 663 for ie in gen_extractor_classes():
023fa8c4
JMF
664 self.add_info_extractor(ie)
665
56d868db 666 def add_post_processor(self, pp, when='post_process'):
8222d8de 667 """Add a PostProcessor object to the end of the chain."""
5bfa4862 668 self._pps[when].append(pp)
8222d8de
JMF
669 pp.set_downloader(self)
670
ab8e5e51
AM
671 def add_post_hook(self, ph):
672 """Add the post hook"""
673 self._post_hooks.append(ph)
674
933605d7
JMF
675 def add_progress_hook(self, ph):
676 """Add the progress hook (currently only for the file downloader)"""
677 self._progress_hooks.append(ph)
8ab470f1 678
1c088fa8 679 def _bidi_workaround(self, message):
5d681e96 680 if not hasattr(self, '_output_channel'):
1c088fa8
PH
681 return message
682
5d681e96 683 assert hasattr(self, '_output_process')
11b85ce6 684 assert isinstance(message, compat_str)
6febd1c1
PH
685 line_count = message.count('\n') + 1
686 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 687 self._output_process.stdin.flush()
6febd1c1 688 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 689 for _ in range(line_count))
6febd1c1 690 return res[:-len('\n')]
1c088fa8 691
b35496d8 692 def _write_string(self, message, out=None, only_once=False):
693 if only_once:
694 if message in self._printed_messages:
695 return
696 self._printed_messages.add(message)
697 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 698
848887eb 699 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 700 """Print message to stdout"""
8bf9319e 701 if self.params.get('logger'):
43afe285 702 self.params['logger'].debug(message)
835a1478 703 elif not quiet or self.params.get('verbose'):
704 self._write_string(
705 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
706 self._err_file if quiet else self._screen_file)
8222d8de 707
b35496d8 708 def to_stderr(self, message, only_once=False):
0760b0a7 709 """Print message to stderr"""
11b85ce6 710 assert isinstance(message, compat_str)
8bf9319e 711 if self.params.get('logger'):
43afe285
IB
712 self.params['logger'].error(message)
713 else:
b35496d8 714 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 715
1e5b9a95
PH
716 def to_console_title(self, message):
717 if not self.params.get('consoletitle', False):
718 return
4bede0d8
C
719 if compat_os_name == 'nt':
720 if ctypes.windll.kernel32.GetConsoleWindow():
721 # c_wchar_p() might not be necessary if `message` is
722 # already of type unicode()
723 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 724 elif 'TERM' in os.environ:
b46696bd 725 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 726
bdde425c
PH
727 def save_console_title(self):
728 if not self.params.get('consoletitle', False):
729 return
b7b04c78 730 if self.params.get('simulate'):
94c3442e 731 return
4bede0d8 732 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 733 # Save the title on stack
734f90bb 734 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
735
736 def restore_console_title(self):
737 if not self.params.get('consoletitle', False):
738 return
b7b04c78 739 if self.params.get('simulate'):
94c3442e 740 return
4bede0d8 741 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 742 # Restore the title from stack
734f90bb 743 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
744
745 def __enter__(self):
746 self.save_console_title()
747 return self
748
749 def __exit__(self, *args):
750 self.restore_console_title()
f89197d7 751
dca08720 752 if self.params.get('cookiefile') is not None:
1bab3437 753 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 754
8222d8de
JMF
755 def trouble(self, message=None, tb=None):
756 """Determine action to take when a download problem appears.
757
758 Depending on if the downloader has been configured to ignore
759 download errors or not, this method may throw an exception or
760 not when errors are found, after printing the message.
761
762 tb, if given, is additional traceback information.
763 """
764 if message is not None:
765 self.to_stderr(message)
766 if self.params.get('verbose'):
767 if tb is None:
768 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 769 tb = ''
8222d8de 770 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 771 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 772 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
773 else:
774 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 775 tb = ''.join(tb_data)
c19bc311 776 if tb:
777 self.to_stderr(tb)
8222d8de
JMF
778 if not self.params.get('ignoreerrors', False):
779 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
780 exc_info = sys.exc_info()[1].exc_info
781 else:
782 exc_info = sys.exc_info()
783 raise DownloadError(message, exc_info)
784 self._download_retcode = 1
785
0760b0a7 786 def to_screen(self, message, skip_eol=False):
787 """Print message to stdout if not in quiet mode"""
788 self.to_stdout(
789 message, skip_eol, quiet=self.params.get('quiet', False))
790
c84aeac6 791 def report_warning(self, message, only_once=False):
8222d8de
JMF
792 '''
793 Print the message to stderr, it will be prefixed with 'WARNING:'
794 If stderr is a tty file the 'WARNING:' will be colored
795 '''
6d07ce01
JMF
796 if self.params.get('logger') is not None:
797 self.params['logger'].warning(message)
8222d8de 798 else:
ad8915b7
PH
799 if self.params.get('no_warnings'):
800 return
e9c0cdd3 801 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
802 _msg_header = '\033[0;33mWARNING:\033[0m'
803 else:
804 _msg_header = 'WARNING:'
805 warning_message = '%s %s' % (_msg_header, message)
b35496d8 806 self.to_stderr(warning_message, only_once)
8222d8de
JMF
807
808 def report_error(self, message, tb=None):
809 '''
810 Do the same as trouble, but prefixes the message with 'ERROR:', colored
811 in red if stderr is a tty file.
812 '''
e9c0cdd3 813 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 814 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 815 else:
6febd1c1
PH
816 _msg_header = 'ERROR:'
817 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
818 self.trouble(error_message, tb)
819
b35496d8 820 def write_debug(self, message, only_once=False):
0760b0a7 821 '''Log debug message or Print message to stderr'''
822 if not self.params.get('verbose', False):
823 return
824 message = '[debug] %s' % message
825 if self.params.get('logger'):
826 self.params['logger'].debug(message)
827 else:
b35496d8 828 self.to_stderr(message, only_once)
0760b0a7 829
8222d8de
JMF
830 def report_file_already_downloaded(self, file_name):
831 """Report file has already been fully downloaded."""
832 try:
6febd1c1 833 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 834 except UnicodeEncodeError:
6febd1c1 835 self.to_screen('[download] The file has already been downloaded')
8222d8de 836
0c3d0f51 837 def report_file_delete(self, file_name):
838 """Report that existing file will be deleted."""
839 try:
c25228e5 840 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 841 except UnicodeEncodeError:
c25228e5 842 self.to_screen('Deleting existing file')
0c3d0f51 843
1151c407 844 def raise_no_formats(self, info, forced=False):
845 has_drm = info.get('__has_drm')
88acdbc2 846 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
847 expected = self.params.get('ignore_no_formats_error')
848 if forced or not expected:
1151c407 849 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
850 expected=has_drm or expected)
88acdbc2 851 else:
852 self.report_warning(msg)
853
de6000d9 854 def parse_outtmpl(self):
855 outtmpl_dict = self.params.get('outtmpl', {})
856 if not isinstance(outtmpl_dict, dict):
857 outtmpl_dict = {'default': outtmpl_dict}
858 outtmpl_dict.update({
859 k: v for k, v in DEFAULT_OUTTMPL.items()
860 if not outtmpl_dict.get(k)})
861 for key, val in outtmpl_dict.items():
862 if isinstance(val, bytes):
863 self.report_warning(
864 'Parameter outtmpl is bytes, but should be a unicode string. '
865 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
866 return outtmpl_dict
867
21cd8fae 868 def get_output_path(self, dir_type='', filename=None):
869 paths = self.params.get('paths', {})
870 assert isinstance(paths, dict)
871 path = os.path.join(
872 expand_path(paths.get('home', '').strip()),
873 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
874 filename or '')
875
876 # Temporary fix for #4787
877 # 'Treat' all problem characters by passing filename through preferredencoding
878 # to workaround encoding issues with subprocess on python2 @ Windows
879 if sys.version_info < (3, 0) and sys.platform == 'win32':
880 path = encodeFilename(path, True).decode(preferredencoding())
881 return sanitize_path(path, force=self.params.get('windowsfilenames'))
882
76a264ac 883 @staticmethod
901130bb 884 def _outtmpl_expandpath(outtmpl):
885 # expand_path translates '%%' into '%' and '$$' into '$'
886 # correspondingly that is not what we want since we need to keep
887 # '%%' intact for template dict substitution step. Working around
888 # with boundary-alike separator hack.
889 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
890 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
891
892 # outtmpl should be expand_path'ed before template dict substitution
893 # because meta fields may contain env variables we don't want to
894 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
895 # title "Hello $PATH", we don't want `$PATH` to be expanded.
896 return expand_path(outtmpl).replace(sep, '')
897
898 @staticmethod
899 def escape_outtmpl(outtmpl):
900 ''' Escape any remaining strings like %s, %abc% etc. '''
901 return re.sub(
902 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
903 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
904 outtmpl)
905
906 @classmethod
907 def validate_outtmpl(cls, outtmpl):
76a264ac 908 ''' @return None or Exception object '''
7d1eb38a 909 outtmpl = re.sub(
910 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
911 lambda mobj: f'{mobj.group(0)[:-1]}s',
912 cls._outtmpl_expandpath(outtmpl))
76a264ac 913 try:
7d1eb38a 914 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 915 return None
916 except ValueError as err:
917 return err
918
143db31d 919 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 920 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 921 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 922
6e84b215 923 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
924 for key in ('__original_infodict', '__postprocessors'):
925 info_dict.pop(key, None)
752cda38 926 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 927 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 928 if info_dict.get('duration', None) is not None
929 else None)
752cda38 930 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
931 if info_dict.get('resolution') is None:
932 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 933
143db31d 934 # For fields playlist_index and autonumber convert all occurrences
935 # of %(field)s to %(field)0Nd for backward compatibility
936 field_size_compat_map = {
752cda38 937 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
938 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 939 }
752cda38 940
385a27fa 941 TMPL_DICT = {}
7d1eb38a 942 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
385a27fa 943 MATH_FUNCTIONS = {
944 '+': float.__add__,
945 '-': float.__sub__,
946 }
e625be0d 947 # Field is of the form key1.key2...
948 # where keys (except first) can be string, int or slice
2b8a2973 949 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 950 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
951 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 952 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
953 (?P<negate>-)?
385a27fa 954 (?P<fields>{field})
955 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 956 (?:>(?P<strf_format>.+?))?
957 (?:\|(?P<default>.*?))?
385a27fa 958 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 959
2b8a2973 960 def _traverse_infodict(k):
961 k = k.split('.')
962 if k[0] == '':
963 k.pop(0)
964 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 965
752cda38 966 def get_value(mdict):
967 # Object traversal
2b8a2973 968 value = _traverse_infodict(mdict['fields'])
752cda38 969 # Negative
970 if mdict['negate']:
971 value = float_or_none(value)
972 if value is not None:
973 value *= -1
974 # Do maths
385a27fa 975 offset_key = mdict['maths']
976 if offset_key:
752cda38 977 value = float_or_none(value)
978 operator = None
385a27fa 979 while offset_key:
980 item = re.match(
981 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
982 offset_key).group(0)
983 offset_key = offset_key[len(item):]
984 if operator is None:
752cda38 985 operator = MATH_FUNCTIONS[item]
385a27fa 986 continue
987 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
988 offset = float_or_none(item)
989 if offset is None:
2b8a2973 990 offset = float_or_none(_traverse_infodict(item))
385a27fa 991 try:
992 value = operator(value, multiplier * offset)
993 except (TypeError, ZeroDivisionError):
994 return None
995 operator = None
752cda38 996 # Datetime formatting
997 if mdict['strf_format']:
998 value = strftime_or_none(value, mdict['strf_format'])
999
1000 return value
1001
b868936c 1002 na = self.params.get('outtmpl_na_placeholder', 'NA')
1003
6e84b215 1004 def _dumpjson_default(obj):
1005 if isinstance(obj, (set, LazyList)):
1006 return list(obj)
1007 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1008
752cda38 1009 def create_key(outer_mobj):
1010 if not outer_mobj.group('has_key'):
901130bb 1011 return f'%{outer_mobj.group(0)}'
752cda38 1012 key = outer_mobj.group('key')
752cda38 1013 mobj = re.match(INTERNAL_FORMAT_RE, key)
1014 if mobj is None:
9fea350f 1015 value, default, mobj = None, na, {'fields': ''}
752cda38 1016 else:
e625be0d 1017 mobj = mobj.groupdict()
752cda38 1018 default = mobj['default'] if mobj['default'] is not None else na
1019 value = get_value(mobj)
1020
b868936c 1021 fmt = outer_mobj.group('format')
752cda38 1022 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1023 fmt = '0{:d}d'.format(field_size_compat_map[key])
1024
1025 value = default if value is None else value
752cda38 1026
7d1eb38a 1027 str_fmt = f'{fmt[:-1]}s'
1028 if fmt[-1] == 'l':
1029 value, fmt = ', '.join(variadic(value)), str_fmt
1030 elif fmt[-1] == 'j':
6e84b215 1031 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
7d1eb38a 1032 elif fmt[-1] == 'q':
1033 value, fmt = compat_shlex_quote(str(value)), str_fmt
1034 elif fmt[-1] == 'c':
1035 value = str(value)
76a264ac 1036 if value is None:
1037 value, fmt = default, 's'
1038 else:
1039 value = value[0]
1040 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1041 value = float_or_none(value)
752cda38 1042 if value is None:
1043 value, fmt = default, 's'
901130bb 1044
752cda38 1045 if sanitize:
1046 if fmt[-1] == 'r':
1047 # If value is an object, sanitize might convert it to a string
1048 # So we convert it to repr first
7d1eb38a 1049 value, fmt = repr(value), str_fmt
639f1cea 1050 if fmt[-1] in 'csr':
9fea350f 1051 value = sanitize(mobj['fields'].split('.')[-1], value)
901130bb 1052
b868936c 1053 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1054 TMPL_DICT[key] = value
b868936c 1055 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1056
385a27fa 1057 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1058
de6000d9 1059 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1060 try:
586a91b6 1061 sanitize = lambda k, v: sanitize_filename(
45598aab 1062 compat_str(v),
1bb5c511 1063 restricted=self.params.get('restrictfilenames'),
40df485f 1064 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1065 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1066 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1067 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1068 filename = outtmpl % template_dict
15da37c7 1069
143db31d 1070 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1071 if force_ext is not None:
752cda38 1072 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1073
bdc3fd2f
U
1074 # https://github.com/blackjack4494/youtube-dlc/issues/85
1075 trim_file_name = self.params.get('trim_file_name', False)
1076 if trim_file_name:
1077 fn_groups = filename.rsplit('.')
1078 ext = fn_groups[-1]
1079 sub_ext = ''
1080 if len(fn_groups) > 2:
1081 sub_ext = fn_groups[-2]
1082 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1083
0202b52a 1084 return filename
8222d8de 1085 except ValueError as err:
6febd1c1 1086 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1087 return None
1088
de6000d9 1089 def prepare_filename(self, info_dict, dir_type='', warn=False):
1090 """Generate the output filename."""
21cd8fae 1091
de6000d9 1092 filename = self._prepare_filename(info_dict, dir_type or 'default')
1093
c84aeac6 1094 if warn:
21cd8fae 1095 if not self.params.get('paths'):
de6000d9 1096 pass
1097 elif filename == '-':
c84aeac6 1098 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1099 elif os.path.isabs(filename):
c84aeac6 1100 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1101 if filename == '-' or not filename:
1102 return filename
1103
21cd8fae 1104 return self.get_output_path(dir_type, filename)
0202b52a 1105
120fe513 1106 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1107 """ Returns None if the file should be downloaded """
8222d8de 1108
c77495e3 1109 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1110
8b0d7497 1111 def check_filter():
8b0d7497 1112 if 'title' in info_dict:
1113 # This can happen when we're just evaluating the playlist
1114 title = info_dict['title']
1115 matchtitle = self.params.get('matchtitle', False)
1116 if matchtitle:
1117 if not re.search(matchtitle, title, re.IGNORECASE):
1118 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1119 rejecttitle = self.params.get('rejecttitle', False)
1120 if rejecttitle:
1121 if re.search(rejecttitle, title, re.IGNORECASE):
1122 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1123 date = info_dict.get('upload_date')
1124 if date is not None:
1125 dateRange = self.params.get('daterange', DateRange())
1126 if date not in dateRange:
1127 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1128 view_count = info_dict.get('view_count')
1129 if view_count is not None:
1130 min_views = self.params.get('min_views')
1131 if min_views is not None and view_count < min_views:
1132 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1133 max_views = self.params.get('max_views')
1134 if max_views is not None and view_count > max_views:
1135 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1136 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1137 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1138
8f18aca8 1139 match_filter = self.params.get('match_filter')
1140 if match_filter is not None:
1141 try:
1142 ret = match_filter(info_dict, incomplete=incomplete)
1143 except TypeError:
1144 # For backward compatibility
1145 ret = None if incomplete else match_filter(info_dict)
1146 if ret is not None:
1147 return ret
8b0d7497 1148 return None
1149
c77495e3 1150 if self.in_download_archive(info_dict):
1151 reason = '%s has already been recorded in the archive' % video_title
1152 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1153 else:
1154 reason = check_filter()
1155 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1156 if reason is not None:
120fe513 1157 if not silent:
1158 self.to_screen('[download] ' + reason)
c77495e3 1159 if self.params.get(break_opt, False):
1160 raise break_err()
8b0d7497 1161 return reason
fe7e0c98 1162
b6c45014
JMF
1163 @staticmethod
1164 def add_extra_info(info_dict, extra_info):
1165 '''Set the keys from extra_info in info dict if they are missing'''
1166 for key, value in extra_info.items():
1167 info_dict.setdefault(key, value)
1168
409e1828 1169 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1170 process=True, force_generic_extractor=False):
41d1cca3 1171 """
1172 Return a list with a dictionary for each video extracted.
1173
1174 Arguments:
1175 url -- URL to extract
1176
1177 Keyword arguments:
1178 download -- whether to download videos during extraction
1179 ie_key -- extractor key hint
1180 extra_info -- dictionary containing the extra values to add to each result
1181 process -- whether to resolve all unresolved references (URLs, playlist items),
1182 must be True for download to work.
1183 force_generic_extractor -- force using the generic extractor
1184 """
fe7e0c98 1185
409e1828 1186 if extra_info is None:
1187 extra_info = {}
1188
61aa5ba3 1189 if not ie_key and force_generic_extractor:
d22dec74
S
1190 ie_key = 'Generic'
1191
8222d8de 1192 if ie_key:
8b7491c8 1193 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1194 else:
1195 ies = self._ies
1196
8b7491c8 1197 for ie_key, ie in ies.items():
8222d8de
JMF
1198 if not ie.suitable(url):
1199 continue
1200
1201 if not ie.working():
6febd1c1
PH
1202 self.report_warning('The program functionality for this site has been marked as broken, '
1203 'and will probably not work.')
8222d8de 1204
1151c407 1205 temp_id = ie.get_temp_id(url)
a0566bbf 1206 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1207 self.to_screen("[%s] %s: has already been recorded in archive" % (
1208 ie_key, temp_id))
1209 break
8b7491c8 1210 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1211 else:
1212 self.report_error('no suitable InfoExtractor for URL %s' % url)
1213
8e5fecc8 1214 def __handle_extraction_exceptions(func):
1215
a0566bbf 1216 def wrapper(self, *args, **kwargs):
1217 try:
1218 return func(self, *args, **kwargs)
773f291d
S
1219 except GeoRestrictedError as e:
1220 msg = e.msg
1221 if e.countries:
1222 msg += '\nThis video is available in %s.' % ', '.join(
1223 map(ISO3166Utils.short2full, e.countries))
1224 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1225 self.report_error(msg)
fb043a6e 1226 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1227 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1228 except ThrottledDownload:
1229 self.to_stderr('\r')
1230 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1231 return wrapper(self, *args, **kwargs)
8e5fecc8 1232 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1233 raise
8222d8de 1234 except Exception as e:
8e5fecc8 1235 if self.params.get('ignoreerrors', False):
9b9c5355 1236 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1237 else:
1238 raise
a0566bbf 1239 return wrapper
1240
1241 @__handle_extraction_exceptions
58f197b7 1242 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1243 ie_result = ie.extract(url)
1244 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1245 return
1246 if isinstance(ie_result, list):
1247 # Backwards compatibility: old IE result format
1248 ie_result = {
1249 '_type': 'compat_list',
1250 'entries': ie_result,
1251 }
e37d0efb 1252 if extra_info.get('original_url'):
1253 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1254 self.add_default_extra_info(ie_result, ie, url)
1255 if process:
1256 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1257 else:
a0566bbf 1258 return ie_result
fe7e0c98 1259
ea38e55f 1260 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1261 if url is not None:
1262 self.add_extra_info(ie_result, {
1263 'webpage_url': url,
1264 'original_url': url,
1265 'webpage_url_basename': url_basename(url),
1266 })
1267 if ie is not None:
1268 self.add_extra_info(ie_result, {
1269 'extractor': ie.IE_NAME,
1270 'extractor_key': ie.ie_key(),
1271 })
ea38e55f 1272
58adec46 1273 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1274 """
1275 Take the result of the ie(may be modified) and resolve all unresolved
1276 references (URLs, playlist items).
1277
1278 It will also download the videos if 'download'.
1279 Returns the resolved ie_result.
1280 """
58adec46 1281 if extra_info is None:
1282 extra_info = {}
e8ee972c
PH
1283 result_type = ie_result.get('_type', 'video')
1284
057a5206 1285 if result_type in ('url', 'url_transparent'):
134c6ea8 1286 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1287 if ie_result.get('original_url'):
1288 extra_info.setdefault('original_url', ie_result['original_url'])
1289
057a5206 1290 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1291 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1292 or extract_flat is True):
ecb54191 1293 info_copy = ie_result.copy()
6033d980 1294 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
4614bc22 1295 if not ie_result.get('id'):
1296 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1297 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1298 self.add_extra_info(info_copy, extra_info)
ecb54191 1299 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1300 if self.params.get('force_write_download_archive', False):
1301 self.record_download_archive(info_copy)
e8ee972c
PH
1302 return ie_result
1303
8222d8de 1304 if result_type == 'video':
b6c45014 1305 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1306 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1307 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1308 if additional_urls:
e9f4ccd1 1309 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1310 if isinstance(additional_urls, compat_str):
1311 additional_urls = [additional_urls]
1312 self.to_screen(
1313 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1314 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1315 ie_result['additional_entries'] = [
1316 self.extract_info(
1317 url, download, extra_info,
1318 force_generic_extractor=self.params.get('force_generic_extractor'))
1319 for url in additional_urls
1320 ]
1321 return ie_result
8222d8de
JMF
1322 elif result_type == 'url':
1323 # We have to add extra_info to the results because it may be
1324 # contained in a playlist
07cce701 1325 return self.extract_info(
1326 ie_result['url'], download,
1327 ie_key=ie_result.get('ie_key'),
1328 extra_info=extra_info)
7fc3fa05
PH
1329 elif result_type == 'url_transparent':
1330 # Use the information from the embedding page
1331 info = self.extract_info(
1332 ie_result['url'], ie_key=ie_result.get('ie_key'),
1333 extra_info=extra_info, download=False, process=False)
1334
1640eb09
S
1335 # extract_info may return None when ignoreerrors is enabled and
1336 # extraction failed with an error, don't crash and return early
1337 # in this case
1338 if not info:
1339 return info
1340
412c617d
PH
1341 force_properties = dict(
1342 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1343 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1344 if f in force_properties:
1345 del force_properties[f]
1346 new_result = info.copy()
1347 new_result.update(force_properties)
7fc3fa05 1348
0563f7ac
S
1349 # Extracted info may not be a video result (i.e.
1350 # info.get('_type', 'video') != video) but rather an url or
1351 # url_transparent. In such cases outer metadata (from ie_result)
1352 # should be propagated to inner one (info). For this to happen
1353 # _type of info should be overridden with url_transparent. This
067aa17e 1354 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1355 if new_result.get('_type') == 'url':
1356 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1357
1358 return self.process_ie_result(
1359 new_result, download=download, extra_info=extra_info)
40fcba5e 1360 elif result_type in ('playlist', 'multi_video'):
30a074c2 1361 # Protect from infinite recursion due to recursively nested playlists
1362 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1363 webpage_url = ie_result['webpage_url']
1364 if webpage_url in self._playlist_urls:
7e85e872 1365 self.to_screen(
30a074c2 1366 '[download] Skipping already downloaded playlist: %s'
1367 % ie_result.get('title') or ie_result.get('id'))
1368 return
7e85e872 1369
30a074c2 1370 self._playlist_level += 1
1371 self._playlist_urls.add(webpage_url)
bc516a3f 1372 self._sanitize_thumbnails(ie_result)
30a074c2 1373 try:
1374 return self.__process_playlist(ie_result, download)
1375 finally:
1376 self._playlist_level -= 1
1377 if not self._playlist_level:
1378 self._playlist_urls.clear()
8222d8de 1379 elif result_type == 'compat_list':
c9bf4114
PH
1380 self.report_warning(
1381 'Extractor %s returned a compat_list result. '
1382 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1383
8222d8de 1384 def _fixup(r):
b868936c 1385 self.add_extra_info(r, {
1386 'extractor': ie_result['extractor'],
1387 'webpage_url': ie_result['webpage_url'],
1388 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1389 'extractor_key': ie_result['extractor_key'],
1390 })
8222d8de
JMF
1391 return r
1392 ie_result['entries'] = [
b6c45014 1393 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1394 for r in ie_result['entries']
1395 ]
1396 return ie_result
1397 else:
1398 raise Exception('Invalid result type: %s' % result_type)
1399
e92caff5 1400 def _ensure_dir_exists(self, path):
1401 return make_dir(path, self.report_error)
1402
30a074c2 1403 def __process_playlist(self, ie_result, download):
1404 # We process each entry in the playlist
1405 playlist = ie_result.get('title') or ie_result.get('id')
1406 self.to_screen('[download] Downloading playlist: %s' % playlist)
1407
498f5606 1408 if 'entries' not in ie_result:
1409 raise EntryNotInPlaylist()
1410 incomplete_entries = bool(ie_result.get('requested_entries'))
1411 if incomplete_entries:
1412 def fill_missing_entries(entries, indexes):
1413 ret = [None] * max(*indexes)
1414 for i, entry in zip(indexes, entries):
1415 ret[i - 1] = entry
1416 return ret
1417 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1418
30a074c2 1419 playlist_results = []
1420
56a8fb4f 1421 playliststart = self.params.get('playliststart', 1)
30a074c2 1422 playlistend = self.params.get('playlistend')
1423 # For backwards compatibility, interpret -1 as whole list
1424 if playlistend == -1:
1425 playlistend = None
1426
1427 playlistitems_str = self.params.get('playlist_items')
1428 playlistitems = None
1429 if playlistitems_str is not None:
1430 def iter_playlistitems(format):
1431 for string_segment in format.split(','):
1432 if '-' in string_segment:
1433 start, end = string_segment.split('-')
1434 for item in range(int(start), int(end) + 1):
1435 yield int(item)
1436 else:
1437 yield int(string_segment)
1438 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1439
1440 ie_entries = ie_result['entries']
56a8fb4f 1441 msg = (
1442 'Downloading %d videos' if not isinstance(ie_entries, list)
1443 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1444
1445 if isinstance(ie_entries, list):
1446 def get_entry(i):
1447 return ie_entries[i - 1]
1448 else:
1449 if not isinstance(ie_entries, PagedList):
1450 ie_entries = LazyList(ie_entries)
1451
1452 def get_entry(i):
1453 return YoutubeDL.__handle_extraction_exceptions(
1454 lambda self, i: ie_entries[i - 1]
1455 )(self, i)
50fed816 1456
56a8fb4f 1457 entries = []
1458 for i in playlistitems or itertools.count(playliststart):
1459 if playlistitems is None and playlistend is not None and playlistend < i:
1460 break
1461 entry = None
1462 try:
50fed816 1463 entry = get_entry(i)
56a8fb4f 1464 if entry is None:
498f5606 1465 raise EntryNotInPlaylist()
56a8fb4f 1466 except (IndexError, EntryNotInPlaylist):
1467 if incomplete_entries:
1468 raise EntryNotInPlaylist()
1469 elif not playlistitems:
1470 break
1471 entries.append(entry)
120fe513 1472 try:
1473 if entry is not None:
1474 self._match_entry(entry, incomplete=True, silent=True)
1475 except (ExistingVideoReached, RejectedVideoReached):
1476 break
56a8fb4f 1477 ie_result['entries'] = entries
30a074c2 1478
56a8fb4f 1479 # Save playlist_index before re-ordering
1480 entries = [
9e598870 1481 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1482 for i, entry in enumerate(entries, 1)
1483 if entry is not None]
1484 n_entries = len(entries)
498f5606 1485
498f5606 1486 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1487 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1488 ie_result['requested_entries'] = playlistitems
1489
1490 if self.params.get('allow_playlist_files', True):
1491 ie_copy = {
1492 'playlist': playlist,
1493 'playlist_id': ie_result.get('id'),
1494 'playlist_title': ie_result.get('title'),
1495 'playlist_uploader': ie_result.get('uploader'),
1496 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1497 'playlist_index': 0,
498f5606 1498 }
1499 ie_copy.update(dict(ie_result))
1500
1501 if self.params.get('writeinfojson', False):
1502 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1503 if not self._ensure_dir_exists(encodeFilename(infofn)):
1504 return
1505 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1506 self.to_screen('[info] Playlist metadata is already present')
1507 else:
1508 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1509 try:
8012d892 1510 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
498f5606 1511 except (OSError, IOError):
1512 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1513
681de68e 1514 # TODO: This should be passed to ThumbnailsConvertor if necessary
1515 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1516
498f5606 1517 if self.params.get('writedescription', False):
1518 descfn = self.prepare_filename(ie_copy, 'pl_description')
1519 if not self._ensure_dir_exists(encodeFilename(descfn)):
1520 return
1521 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1522 self.to_screen('[info] Playlist description is already present')
1523 elif ie_result.get('description') is None:
1524 self.report_warning('There\'s no playlist description to write.')
1525 else:
1526 try:
1527 self.to_screen('[info] Writing playlist description to: ' + descfn)
1528 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1529 descfile.write(ie_result['description'])
1530 except (OSError, IOError):
1531 self.report_error('Cannot write playlist description file ' + descfn)
1532 return
30a074c2 1533
1534 if self.params.get('playlistreverse', False):
1535 entries = entries[::-1]
30a074c2 1536 if self.params.get('playlistrandom', False):
1537 random.shuffle(entries)
1538
1539 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1540
56a8fb4f 1541 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1542 failures = 0
1543 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1544 for i, entry_tuple in enumerate(entries, 1):
1545 playlist_index, entry = entry_tuple
81139999 1546 if 'playlist-index' in self.params.get('compat_opts', []):
1547 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1548 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1549 # This __x_forwarded_for_ip thing is a bit ugly but requires
1550 # minimal changes
1551 if x_forwarded_for:
1552 entry['__x_forwarded_for_ip'] = x_forwarded_for
1553 extra = {
1554 'n_entries': n_entries,
f59ae581 1555 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1556 'playlist_index': playlist_index,
1557 'playlist_autonumber': i,
30a074c2 1558 'playlist': playlist,
1559 'playlist_id': ie_result.get('id'),
1560 'playlist_title': ie_result.get('title'),
1561 'playlist_uploader': ie_result.get('uploader'),
1562 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1563 'extractor': ie_result['extractor'],
1564 'webpage_url': ie_result['webpage_url'],
1565 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1566 'extractor_key': ie_result['extractor_key'],
1567 }
1568
1569 if self._match_entry(entry, incomplete=True) is not None:
1570 continue
1571
1572 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1573 if not entry_result:
1574 failures += 1
1575 if failures >= max_failures:
1576 self.report_error(
1577 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1578 break
30a074c2 1579 # TODO: skip failed (empty) entries?
1580 playlist_results.append(entry_result)
1581 ie_result['entries'] = playlist_results
1582 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1583 return ie_result
1584
a0566bbf 1585 @__handle_extraction_exceptions
1586 def __process_iterable_entry(self, entry, download, extra_info):
1587 return self.process_ie_result(
1588 entry, download=download, extra_info=extra_info)
1589
67134eab
JMF
1590 def _build_format_filter(self, filter_spec):
1591 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1592
1593 OPERATORS = {
1594 '<': operator.lt,
1595 '<=': operator.le,
1596 '>': operator.gt,
1597 '>=': operator.ge,
1598 '=': operator.eq,
1599 '!=': operator.ne,
1600 }
67134eab 1601 operator_rex = re.compile(r'''(?x)\s*
187986a8 1602 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1603 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1604 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1605 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1606 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1607 if m:
1608 try:
1609 comparison_value = int(m.group('value'))
1610 except ValueError:
1611 comparison_value = parse_filesize(m.group('value'))
1612 if comparison_value is None:
1613 comparison_value = parse_filesize(m.group('value') + 'B')
1614 if comparison_value is None:
1615 raise ValueError(
1616 'Invalid value %r in format specification %r' % (
67134eab 1617 m.group('value'), filter_spec))
9ddb6925
S
1618 op = OPERATORS[m.group('op')]
1619
083c9df9 1620 if not m:
9ddb6925
S
1621 STR_OPERATORS = {
1622 '=': operator.eq,
10d33b34
YCH
1623 '^=': lambda attr, value: attr.startswith(value),
1624 '$=': lambda attr, value: attr.endswith(value),
1625 '*=': lambda attr, value: value in attr,
9ddb6925 1626 }
187986a8 1627 str_operator_rex = re.compile(r'''(?x)\s*
1628 (?P<key>[a-zA-Z0-9._-]+)\s*
1629 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1630 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1631 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1632 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1633 if m:
1634 comparison_value = m.group('value')
2cc779f4
S
1635 str_op = STR_OPERATORS[m.group('op')]
1636 if m.group('negation'):
e118a879 1637 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1638 else:
1639 op = str_op
083c9df9 1640
9ddb6925 1641 if not m:
187986a8 1642 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1643
1644 def _filter(f):
1645 actual_value = f.get(m.group('key'))
1646 if actual_value is None:
1647 return m.group('none_inclusive')
1648 return op(actual_value, comparison_value)
67134eab
JMF
1649 return _filter
1650
0017d9ad 1651 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1652
af0f7428
S
1653 def can_merge():
1654 merger = FFmpegMergerPP(self)
1655 return merger.available and merger.can_merge()
1656
91ebc640 1657 prefer_best = (
b7b04c78 1658 not self.params.get('simulate')
91ebc640 1659 and download
1660 and (
1661 not can_merge()
19807826 1662 or info_dict.get('is_live', False)
de6000d9 1663 or self.outtmpl_dict['default'] == '-'))
53ed7066 1664 compat = (
1665 prefer_best
1666 or self.params.get('allow_multiple_audio_streams', False)
1667 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1668
1669 return (
53ed7066 1670 'best/bestvideo+bestaudio' if prefer_best
1671 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1672 else 'bestvideo+bestaudio/best')
0017d9ad 1673
67134eab
JMF
1674 def build_format_selector(self, format_spec):
1675 def syntax_error(note, start):
1676 message = (
1677 'Invalid format specification: '
1678 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1679 return SyntaxError(message)
1680
1681 PICKFIRST = 'PICKFIRST'
1682 MERGE = 'MERGE'
1683 SINGLE = 'SINGLE'
0130afb7 1684 GROUP = 'GROUP'
67134eab
JMF
1685 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1686
91ebc640 1687 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1688 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1689
e8e73840 1690 check_formats = self.params.get('check_formats')
1691
67134eab
JMF
1692 def _parse_filter(tokens):
1693 filter_parts = []
1694 for type, string, start, _, _ in tokens:
1695 if type == tokenize.OP and string == ']':
1696 return ''.join(filter_parts)
1697 else:
1698 filter_parts.append(string)
1699
232541df 1700 def _remove_unused_ops(tokens):
17cc1534 1701 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1702 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1703 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1704 last_string, last_start, last_end, last_line = None, None, None, None
1705 for type, string, start, end, line in tokens:
1706 if type == tokenize.OP and string == '[':
1707 if last_string:
1708 yield tokenize.NAME, last_string, last_start, last_end, last_line
1709 last_string = None
1710 yield type, string, start, end, line
1711 # everything inside brackets will be handled by _parse_filter
1712 for type, string, start, end, line in tokens:
1713 yield type, string, start, end, line
1714 if type == tokenize.OP and string == ']':
1715 break
1716 elif type == tokenize.OP and string in ALLOWED_OPS:
1717 if last_string:
1718 yield tokenize.NAME, last_string, last_start, last_end, last_line
1719 last_string = None
1720 yield type, string, start, end, line
1721 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1722 if not last_string:
1723 last_string = string
1724 last_start = start
1725 last_end = end
1726 else:
1727 last_string += string
1728 if last_string:
1729 yield tokenize.NAME, last_string, last_start, last_end, last_line
1730
cf2ac6df 1731 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1732 selectors = []
1733 current_selector = None
1734 for type, string, start, _, _ in tokens:
1735 # ENCODING is only defined in python 3.x
1736 if type == getattr(tokenize, 'ENCODING', None):
1737 continue
1738 elif type in [tokenize.NAME, tokenize.NUMBER]:
1739 current_selector = FormatSelector(SINGLE, string, [])
1740 elif type == tokenize.OP:
cf2ac6df
JMF
1741 if string == ')':
1742 if not inside_group:
1743 # ')' will be handled by the parentheses group
1744 tokens.restore_last_token()
67134eab 1745 break
cf2ac6df 1746 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1747 tokens.restore_last_token()
1748 break
cf2ac6df
JMF
1749 elif inside_choice and string == ',':
1750 tokens.restore_last_token()
1751 break
1752 elif string == ',':
0a31a350
JMF
1753 if not current_selector:
1754 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1755 selectors.append(current_selector)
1756 current_selector = None
1757 elif string == '/':
d96d604e
JMF
1758 if not current_selector:
1759 raise syntax_error('"/" must follow a format selector', start)
67134eab 1760 first_choice = current_selector
cf2ac6df 1761 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1762 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1763 elif string == '[':
1764 if not current_selector:
1765 current_selector = FormatSelector(SINGLE, 'best', [])
1766 format_filter = _parse_filter(tokens)
1767 current_selector.filters.append(format_filter)
0130afb7
JMF
1768 elif string == '(':
1769 if current_selector:
1770 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1771 group = _parse_format_selection(tokens, inside_group=True)
1772 current_selector = FormatSelector(GROUP, group, [])
67134eab 1773 elif string == '+':
d03cfdce 1774 if not current_selector:
1775 raise syntax_error('Unexpected "+"', start)
1776 selector_1 = current_selector
1777 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1778 if not selector_2:
1779 raise syntax_error('Expected a selector', start)
1780 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1781 else:
1782 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1783 elif type == tokenize.ENDMARKER:
1784 break
1785 if current_selector:
1786 selectors.append(current_selector)
1787 return selectors
1788
f8d4ad9a 1789 def _merge(formats_pair):
1790 format_1, format_2 = formats_pair
1791
1792 formats_info = []
1793 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1794 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1795
1796 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1797 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1798 for (i, fmt_info) in enumerate(formats_info):
551f9388 1799 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1800 formats_info.pop(i)
1801 continue
1802 for aud_vid in ['audio', 'video']:
f8d4ad9a 1803 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1804 if get_no_more[aud_vid]:
1805 formats_info.pop(i)
f5510afe 1806 break
f8d4ad9a 1807 get_no_more[aud_vid] = True
1808
1809 if len(formats_info) == 1:
1810 return formats_info[0]
1811
1812 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1813 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1814
1815 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1816 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1817
1818 output_ext = self.params.get('merge_output_format')
1819 if not output_ext:
1820 if the_only_video:
1821 output_ext = the_only_video['ext']
1822 elif the_only_audio and not video_fmts:
1823 output_ext = the_only_audio['ext']
1824 else:
1825 output_ext = 'mkv'
1826
1827 new_dict = {
1828 'requested_formats': formats_info,
1829 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1830 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1831 'ext': output_ext,
1832 }
1833
1834 if the_only_video:
1835 new_dict.update({
1836 'width': the_only_video.get('width'),
1837 'height': the_only_video.get('height'),
1838 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1839 'fps': the_only_video.get('fps'),
1840 'vcodec': the_only_video.get('vcodec'),
1841 'vbr': the_only_video.get('vbr'),
1842 'stretched_ratio': the_only_video.get('stretched_ratio'),
1843 })
1844
1845 if the_only_audio:
1846 new_dict.update({
1847 'acodec': the_only_audio.get('acodec'),
1848 'abr': the_only_audio.get('abr'),
1849 })
1850
1851 return new_dict
1852
e8e73840 1853 def _check_formats(formats):
981052c9 1854 if not check_formats:
1855 yield from formats
b5ac45b1 1856 return
e8e73840 1857 for f in formats:
1858 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1859 temp_file = tempfile.NamedTemporaryFile(
1860 suffix='.tmp', delete=False,
1861 dir=self.get_output_path('temp') or None)
1862 temp_file.close()
fe346461 1863 try:
981052c9 1864 success, _ = self.dl(temp_file.name, f, test=True)
1865 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1866 success = False
fe346461 1867 finally:
21cd8fae 1868 if os.path.exists(temp_file.name):
1869 try:
1870 os.remove(temp_file.name)
1871 except OSError:
1872 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1873 if success:
e8e73840 1874 yield f
1875 else:
1876 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1877
67134eab 1878 def _build_selector_function(selector):
909d24dd 1879 if isinstance(selector, list): # ,
67134eab
JMF
1880 fs = [_build_selector_function(s) for s in selector]
1881
317f7ab6 1882 def selector_function(ctx):
67134eab 1883 for f in fs:
981052c9 1884 yield from f(ctx)
67134eab 1885 return selector_function
909d24dd 1886
1887 elif selector.type == GROUP: # ()
0130afb7 1888 selector_function = _build_selector_function(selector.selector)
909d24dd 1889
1890 elif selector.type == PICKFIRST: # /
67134eab
JMF
1891 fs = [_build_selector_function(s) for s in selector.selector]
1892
317f7ab6 1893 def selector_function(ctx):
67134eab 1894 for f in fs:
317f7ab6 1895 picked_formats = list(f(ctx))
67134eab
JMF
1896 if picked_formats:
1897 return picked_formats
1898 return []
67134eab 1899
981052c9 1900 elif selector.type == MERGE: # +
1901 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1902
1903 def selector_function(ctx):
1904 for pair in itertools.product(
1905 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1906 yield _merge(pair)
1907
909d24dd 1908 elif selector.type == SINGLE: # atom
598d185d 1909 format_spec = selector.selector or 'best'
909d24dd 1910
f8d4ad9a 1911 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1912 if format_spec == 'all':
1913 def selector_function(ctx):
981052c9 1914 yield from _check_formats(ctx['formats'])
f8d4ad9a 1915 elif format_spec == 'mergeall':
1916 def selector_function(ctx):
981052c9 1917 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1918 if not formats:
1919 return
921b76ca 1920 merged_format = formats[-1]
1921 for f in formats[-2::-1]:
f8d4ad9a 1922 merged_format = _merge((merged_format, f))
1923 yield merged_format
909d24dd 1924
1925 else:
e8e73840 1926 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1927 mobj = re.match(
1928 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1929 format_spec)
1930 if mobj is not None:
1931 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1932 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1933 format_type = (mobj.group('type') or [None])[0]
1934 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1935 format_modified = mobj.group('mod') is not None
909d24dd 1936
1937 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1938 _filter_f = (
eff63539 1939 (lambda f: f.get('%scodec' % format_type) != 'none')
1940 if format_type and format_modified # bv*, ba*, wv*, wa*
1941 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1942 if format_type # bv, ba, wv, wa
1943 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1944 if not format_modified # b, w
8326b00a 1945 else lambda f: True) # b*, w*
1946 filter_f = lambda f: _filter_f(f) and (
1947 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1948 else:
909d24dd 1949 filter_f = ((lambda f: f.get('ext') == format_spec)
1950 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1951 else (lambda f: f.get('format_id') == format_spec)) # id
1952
1953 def selector_function(ctx):
1954 formats = list(ctx['formats'])
909d24dd 1955 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1956 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1957 # for extractors with incomplete formats (audio only (soundcloud)
1958 # or video only (imgur)) best/worst will fallback to
1959 # best/worst {video,audio}-only format
e8e73840 1960 matches = formats
981052c9 1961 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1962 try:
e8e73840 1963 yield matches[format_idx - 1]
981052c9 1964 except IndexError:
1965 return
083c9df9 1966
67134eab 1967 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1968
317f7ab6
S
1969 def final_selector(ctx):
1970 ctx_copy = copy.deepcopy(ctx)
67134eab 1971 for _filter in filters:
317f7ab6
S
1972 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1973 return selector_function(ctx_copy)
67134eab 1974 return final_selector
083c9df9 1975
67134eab 1976 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1977 try:
232541df 1978 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1979 except tokenize.TokenError:
1980 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1981
1982 class TokenIterator(object):
1983 def __init__(self, tokens):
1984 self.tokens = tokens
1985 self.counter = 0
1986
1987 def __iter__(self):
1988 return self
1989
1990 def __next__(self):
1991 if self.counter >= len(self.tokens):
1992 raise StopIteration()
1993 value = self.tokens[self.counter]
1994 self.counter += 1
1995 return value
1996
1997 next = __next__
1998
1999 def restore_last_token(self):
2000 self.counter -= 1
2001
2002 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2003 return _build_selector_function(parsed_selector)
a9c58ad9 2004
e5660ee6
JMF
2005 def _calc_headers(self, info_dict):
2006 res = std_headers.copy()
2007
2008 add_headers = info_dict.get('http_headers')
2009 if add_headers:
2010 res.update(add_headers)
2011
2012 cookies = self._calc_cookies(info_dict)
2013 if cookies:
2014 res['Cookie'] = cookies
2015
0016b84e
S
2016 if 'X-Forwarded-For' not in res:
2017 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2018 if x_forwarded_for_ip:
2019 res['X-Forwarded-For'] = x_forwarded_for_ip
2020
e5660ee6
JMF
2021 return res
2022
2023 def _calc_cookies(self, info_dict):
5c2266df 2024 pr = sanitized_Request(info_dict['url'])
e5660ee6 2025 self.cookiejar.add_cookie_header(pr)
662435f7 2026 return pr.get_header('Cookie')
e5660ee6 2027
b0249bca 2028 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2029 thumbnails = info_dict.get('thumbnails')
2030 if thumbnails is None:
2031 thumbnail = info_dict.get('thumbnail')
2032 if thumbnail:
2033 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2034 if thumbnails:
2035 thumbnails.sort(key=lambda t: (
2036 t.get('preference') if t.get('preference') is not None else -1,
2037 t.get('width') if t.get('width') is not None else -1,
2038 t.get('height') if t.get('height') is not None else -1,
2039 t.get('id') if t.get('id') is not None else '',
2040 t.get('url')))
b0249bca 2041
0ba692ac 2042 def thumbnail_tester():
2043 if self.params.get('check_formats'):
cca80fe6 2044 test_all = True
2045 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2046 else:
cca80fe6 2047 test_all = False
0ba692ac 2048 to_screen = self.write_debug
2049
2050 def test_thumbnail(t):
cca80fe6 2051 if not test_all and not t.get('_test_url'):
2052 return True
0ba692ac 2053 to_screen('Testing thumbnail %s' % t['id'])
2054 try:
2055 self.urlopen(HEADRequest(t['url']))
2056 except network_exceptions as err:
2057 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2058 t['id'], t['url'], error_to_compat_str(err)))
2059 return False
2060 return True
2061
2062 return test_thumbnail
b0249bca 2063
bc516a3f 2064 for i, t in enumerate(thumbnails):
bc516a3f 2065 if t.get('id') is None:
2066 t['id'] = '%d' % i
b0249bca 2067 if t.get('width') and t.get('height'):
2068 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2069 t['url'] = sanitize_url(t['url'])
0ba692ac 2070
2071 if self.params.get('check_formats') is not False:
2072 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2073 else:
2074 info_dict['thumbnails'] = thumbnails
bc516a3f 2075
dd82ffea
JMF
2076 def process_video_result(self, info_dict, download=True):
2077 assert info_dict.get('_type', 'video') == 'video'
2078
bec1fad2
PH
2079 if 'id' not in info_dict:
2080 raise ExtractorError('Missing "id" field in extractor result')
2081 if 'title' not in info_dict:
1151c407 2082 raise ExtractorError('Missing "title" field in extractor result',
2083 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2084
c9969434
S
2085 def report_force_conversion(field, field_not, conversion):
2086 self.report_warning(
2087 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2088 % (field, field_not, conversion))
2089
2090 def sanitize_string_field(info, string_field):
2091 field = info.get(string_field)
2092 if field is None or isinstance(field, compat_str):
2093 return
2094 report_force_conversion(string_field, 'a string', 'string')
2095 info[string_field] = compat_str(field)
2096
2097 def sanitize_numeric_fields(info):
2098 for numeric_field in self._NUMERIC_FIELDS:
2099 field = info.get(numeric_field)
2100 if field is None or isinstance(field, compat_numeric_types):
2101 continue
2102 report_force_conversion(numeric_field, 'numeric', 'int')
2103 info[numeric_field] = int_or_none(field)
2104
2105 sanitize_string_field(info_dict, 'id')
2106 sanitize_numeric_fields(info_dict)
be6217b2 2107
dd82ffea
JMF
2108 if 'playlist' not in info_dict:
2109 # It isn't part of a playlist
2110 info_dict['playlist'] = None
2111 info_dict['playlist_index'] = None
2112
bc516a3f 2113 self._sanitize_thumbnails(info_dict)
d5519808 2114
536a55da 2115 thumbnail = info_dict.get('thumbnail')
bc516a3f 2116 thumbnails = info_dict.get('thumbnails')
536a55da
S
2117 if thumbnail:
2118 info_dict['thumbnail'] = sanitize_url(thumbnail)
2119 elif thumbnails:
d5519808
PH
2120 info_dict['thumbnail'] = thumbnails[-1]['url']
2121
ae30b840 2122 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2123 info_dict['display_id'] = info_dict['id']
2124
10db0d2f 2125 for ts_key, date_key in (
2126 ('timestamp', 'upload_date'),
2127 ('release_timestamp', 'release_date'),
2128 ):
2129 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2130 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2131 # see http://bugs.python.org/issue1646728)
2132 try:
2133 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2134 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2135 except (ValueError, OverflowError, OSError):
2136 pass
9d2ecdbc 2137
ae30b840 2138 live_keys = ('is_live', 'was_live')
2139 live_status = info_dict.get('live_status')
2140 if live_status is None:
2141 for key in live_keys:
2142 if info_dict.get(key) is False:
2143 continue
2144 if info_dict.get(key):
2145 live_status = key
2146 break
2147 if all(info_dict.get(key) is False for key in live_keys):
2148 live_status = 'not_live'
2149 if live_status:
2150 info_dict['live_status'] = live_status
2151 for key in live_keys:
2152 if info_dict.get(key) is None:
2153 info_dict[key] = (live_status == key)
2154
33d2fc2f
S
2155 # Auto generate title fields corresponding to the *_number fields when missing
2156 # in order to always have clean titles. This is very common for TV series.
2157 for field in ('chapter', 'season', 'episode'):
2158 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2159 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2160
05108a49
S
2161 for cc_kind in ('subtitles', 'automatic_captions'):
2162 cc = info_dict.get(cc_kind)
2163 if cc:
2164 for _, subtitle in cc.items():
2165 for subtitle_format in subtitle:
2166 if subtitle_format.get('url'):
2167 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2168 if subtitle_format.get('ext') is None:
2169 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2170
2171 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2172 subtitles = info_dict.get('subtitles')
4bba3716 2173
360e1ca5 2174 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2175 info_dict['id'], subtitles, automatic_captions)
a504ced0 2176
dd82ffea
JMF
2177 # We now pick which formats have to be downloaded
2178 if info_dict.get('formats') is None:
2179 # There's only one format available
2180 formats = [info_dict]
2181 else:
2182 formats = info_dict['formats']
2183
e0493e90 2184 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2185 if not self.params.get('allow_unplayable_formats'):
2186 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2187
db95dc13 2188 if not formats:
1151c407 2189 self.raise_no_formats(info_dict)
db95dc13 2190
73af5cc8
S
2191 def is_wellformed(f):
2192 url = f.get('url')
a5ac0c47 2193 if not url:
73af5cc8
S
2194 self.report_warning(
2195 '"url" field is missing or empty - skipping format, '
2196 'there is an error in extractor')
a5ac0c47
S
2197 return False
2198 if isinstance(url, bytes):
2199 sanitize_string_field(f, 'url')
2200 return True
73af5cc8
S
2201
2202 # Filter out malformed formats for better extraction robustness
2203 formats = list(filter(is_wellformed, formats))
2204
181c7053
S
2205 formats_dict = {}
2206
dd82ffea 2207 # We check that all the formats have the format and format_id fields
db95dc13 2208 for i, format in enumerate(formats):
c9969434
S
2209 sanitize_string_field(format, 'format_id')
2210 sanitize_numeric_fields(format)
dcf77cf1 2211 format['url'] = sanitize_url(format['url'])
e74e3b63 2212 if not format.get('format_id'):
8016c922 2213 format['format_id'] = compat_str(i)
e2effb08
S
2214 else:
2215 # Sanitize format_id from characters used in format selector expression
ec85ded8 2216 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2217 format_id = format['format_id']
2218 if format_id not in formats_dict:
2219 formats_dict[format_id] = []
2220 formats_dict[format_id].append(format)
2221
2222 # Make sure all formats have unique format_id
2223 for format_id, ambiguous_formats in formats_dict.items():
2224 if len(ambiguous_formats) > 1:
2225 for i, format in enumerate(ambiguous_formats):
2226 format['format_id'] = '%s-%d' % (format_id, i)
2227
2228 for i, format in enumerate(formats):
8c51aa65 2229 if format.get('format') is None:
6febd1c1 2230 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2231 id=format['format_id'],
2232 res=self.format_resolution(format),
b868936c 2233 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2234 )
c1002e96 2235 # Automatically determine file extension if missing
5b1d8575 2236 if format.get('ext') is None:
cce929ea 2237 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2238 # Automatically determine protocol if missing (useful for format
2239 # selection purposes)
6f0be937 2240 if format.get('protocol') is None:
b5559424 2241 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2242 # Add HTTP headers, so that external programs can use them from the
2243 # json output
2244 full_format_info = info_dict.copy()
2245 full_format_info.update(format)
2246 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2247 # Remove private housekeeping stuff
2248 if '__x_forwarded_for_ip' in info_dict:
2249 del info_dict['__x_forwarded_for_ip']
dd82ffea 2250
4bcc7bd1 2251 # TODO Central sorting goes here
99e206d5 2252
88acdbc2 2253 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2254 # only set the 'formats' fields if the original info_dict list them
2255 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2256 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2257 # which can't be exported to json
b3d9ef88 2258 info_dict['formats'] = formats
4ec82a72 2259
2260 info_dict, _ = self.pre_process(info_dict)
2261
b7b04c78 2262 if self.params.get('list_thumbnails'):
2263 self.list_thumbnails(info_dict)
2264 if self.params.get('listformats'):
86c66b2d 2265 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2266 self.to_screen('%s has no formats' % info_dict['id'])
2267 else:
2268 self.list_formats(info_dict)
b7b04c78 2269 if self.params.get('listsubtitles'):
2270 if 'automatic_captions' in info_dict:
2271 self.list_subtitles(
2272 info_dict['id'], automatic_captions, 'automatic captions')
2273 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2274 list_only = self.params.get('simulate') is None and (
2275 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2276 if list_only:
b7b04c78 2277 # Without this printing, -F --print-json will not work
169dbde9 2278 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2279 return
2280
187986a8 2281 format_selector = self.format_selector
2282 if format_selector is None:
0017d9ad 2283 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2284 self.write_debug('Default format spec: %s' % req_format)
187986a8 2285 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2286
2287 # While in format selection we may need to have an access to the original
2288 # format set in order to calculate some metrics or do some processing.
2289 # For now we need to be able to guess whether original formats provided
2290 # by extractor are incomplete or not (i.e. whether extractor provides only
2291 # video-only or audio-only formats) for proper formats selection for
2292 # extractors with such incomplete formats (see
067aa17e 2293 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2294 # Since formats may be filtered during format selection and may not match
2295 # the original formats the results may be incorrect. Thus original formats
2296 # or pre-calculated metrics should be passed to format selection routines
2297 # as well.
2298 # We will pass a context object containing all necessary additional data
2299 # instead of just formats.
2300 # This fixes incorrect format selection issue (see
067aa17e 2301 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2302 incomplete_formats = (
317f7ab6 2303 # All formats are video-only or
3089bc74 2304 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2305 # all formats are audio-only
3089bc74 2306 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2307
2308 ctx = {
2309 'formats': formats,
2310 'incomplete_formats': incomplete_formats,
2311 }
2312
2313 formats_to_download = list(format_selector(ctx))
dd82ffea 2314 if not formats_to_download:
b7da73eb 2315 if not self.params.get('ignore_no_formats_error'):
1151c407 2316 raise ExtractorError('Requested format is not available', expected=True,
2317 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2318 else:
2319 self.report_warning('Requested format is not available')
4513a41a
A
2320 # Process what we can, even without any available formats.
2321 self.process_info(dict(info_dict))
b7da73eb 2322 elif download:
2323 self.to_screen(
07cce701 2324 '[info] %s: Downloading %d format(s): %s' % (
2325 info_dict['id'], len(formats_to_download),
2326 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2327 for fmt in formats_to_download:
dd82ffea 2328 new_info = dict(info_dict)
4ec82a72 2329 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2330 new_info['__original_infodict'] = info_dict
b7da73eb 2331 new_info.update(fmt)
dd82ffea
JMF
2332 self.process_info(new_info)
2333 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2334 if formats_to_download:
2335 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2336 return info_dict
2337
98c70d6f 2338 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2339 """Select the requested subtitles and their format"""
98c70d6f
JMF
2340 available_subs = {}
2341 if normal_subtitles and self.params.get('writesubtitles'):
2342 available_subs.update(normal_subtitles)
2343 if automatic_captions and self.params.get('writeautomaticsub'):
2344 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2345 if lang not in available_subs:
2346 available_subs[lang] = cap_info
2347
4d171848
JMF
2348 if (not self.params.get('writesubtitles') and not
2349 self.params.get('writeautomaticsub') or not
2350 available_subs):
2351 return None
a504ced0 2352
c32b0aab 2353 all_sub_langs = available_subs.keys()
a504ced0 2354 if self.params.get('allsubtitles', False):
c32b0aab 2355 requested_langs = all_sub_langs
2356 elif self.params.get('subtitleslangs', False):
2357 requested_langs = set()
2358 for lang in self.params.get('subtitleslangs'):
2359 if lang == 'all':
2360 requested_langs.update(all_sub_langs)
2361 continue
2362 discard = lang[0] == '-'
2363 if discard:
2364 lang = lang[1:]
2365 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2366 if discard:
2367 for lang in current_langs:
2368 requested_langs.discard(lang)
2369 else:
2370 requested_langs.update(current_langs)
2371 elif 'en' in available_subs:
2372 requested_langs = ['en']
a504ced0 2373 else:
c32b0aab 2374 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2375 if requested_langs:
2376 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2377
2378 formats_query = self.params.get('subtitlesformat', 'best')
2379 formats_preference = formats_query.split('/') if formats_query else []
2380 subs = {}
2381 for lang in requested_langs:
2382 formats = available_subs.get(lang)
2383 if formats is None:
2384 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2385 continue
a504ced0
JMF
2386 for ext in formats_preference:
2387 if ext == 'best':
2388 f = formats[-1]
2389 break
2390 matches = list(filter(lambda f: f['ext'] == ext, formats))
2391 if matches:
2392 f = matches[-1]
2393 break
2394 else:
2395 f = formats[-1]
2396 self.report_warning(
2397 'No subtitle format found matching "%s" for language %s, '
2398 'using %s' % (formats_query, lang, f['ext']))
2399 subs[lang] = f
2400 return subs
2401
d06daf23 2402 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2403 def print_mandatory(field, actual_field=None):
2404 if actual_field is None:
2405 actual_field = field
d06daf23 2406 if (self.params.get('force%s' % field, False)
53c18592 2407 and (not incomplete or info_dict.get(actual_field) is not None)):
2408 self.to_stdout(info_dict[actual_field])
d06daf23
S
2409
2410 def print_optional(field):
2411 if (self.params.get('force%s' % field, False)
2412 and info_dict.get(field) is not None):
2413 self.to_stdout(info_dict[field])
2414
53c18592 2415 info_dict = info_dict.copy()
2416 if filename is not None:
2417 info_dict['filename'] = filename
2418 if info_dict.get('requested_formats') is not None:
2419 # For RTMP URLs, also include the playpath
2420 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2421 elif 'url' in info_dict:
2422 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2423
2b8a2973 2424 if self.params.get('forceprint') or self.params.get('forcejson'):
2425 self.post_extract(info_dict)
53c18592 2426 for tmpl in self.params.get('forceprint', []):
2427 if re.match(r'\w+$', tmpl):
2428 tmpl = '%({})s'.format(tmpl)
2429 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2430 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2431
d06daf23
S
2432 print_mandatory('title')
2433 print_mandatory('id')
53c18592 2434 print_mandatory('url', 'urls')
d06daf23
S
2435 print_optional('thumbnail')
2436 print_optional('description')
53c18592 2437 print_optional('filename')
b868936c 2438 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2439 self.to_stdout(formatSeconds(info_dict['duration']))
2440 print_mandatory('format')
53c18592 2441
2b8a2973 2442 if self.params.get('forcejson'):
6e84b215 2443 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2444
e8e73840 2445 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2446 if not info.get('url'):
1151c407 2447 self.raise_no_formats(info, True)
e8e73840 2448
2449 if test:
2450 verbose = self.params.get('verbose')
2451 params = {
2452 'test': True,
2453 'quiet': not verbose,
2454 'verbose': verbose,
2455 'noprogress': not verbose,
2456 'nopart': True,
2457 'skip_unavailable_fragments': False,
2458 'keep_fragments': False,
2459 'overwrites': True,
2460 '_no_ytdl_file': True,
2461 }
2462 else:
2463 params = self.params
96fccc10 2464 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2465 if not test:
2466 for ph in self._progress_hooks:
2467 fd.add_progress_hook(ph)
18e674b4 2468 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2469 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2470 new_info = dict(info)
2471 if new_info.get('http_headers') is None:
2472 new_info['http_headers'] = self._calc_headers(new_info)
2473 return fd.download(name, new_info, subtitle)
2474
8222d8de
JMF
2475 def process_info(self, info_dict):
2476 """Process a single resolved IE result."""
2477
2478 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2479
2480 max_downloads = self.params.get('max_downloads')
2481 if max_downloads is not None:
2482 if self._num_downloads >= int(max_downloads):
2483 raise MaxDownloadsReached()
8222d8de 2484
d06daf23 2485 # TODO: backward compatibility, to be removed
8222d8de 2486 info_dict['fulltitle'] = info_dict['title']
8222d8de 2487
4513a41a 2488 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2489 info_dict['format'] = info_dict['ext']
2490
c77495e3 2491 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2492 return
2493
277d6ff5 2494 self.post_extract(info_dict)
fd288278 2495 self._num_downloads += 1
8222d8de 2496
dcf64d43 2497 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2498 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2499 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2500 files_to_move = {}
8222d8de
JMF
2501
2502 # Forced printings
4513a41a 2503 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2504
b7b04c78 2505 if self.params.get('simulate'):
2d30509f 2506 if self.params.get('force_write_download_archive', False):
2507 self.record_download_archive(info_dict)
2508
2509 # Do nothing else if in simulate mode
8222d8de
JMF
2510 return
2511
de6000d9 2512 if full_filename is None:
8222d8de
JMF
2513 return
2514
e92caff5 2515 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2516 return
e92caff5 2517 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2518 return
2519
2520 if self.params.get('writedescription', False):
de6000d9 2521 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2522 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2523 return
0c3d0f51 2524 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2525 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2526 elif info_dict.get('description') is None:
2527 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2528 else:
2529 try:
6febd1c1 2530 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2531 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2532 descfile.write(info_dict['description'])
7b6fefc9 2533 except (OSError, IOError):
6febd1c1 2534 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2535 return
8222d8de 2536
1fb07d10 2537 if self.params.get('writeannotations', False):
de6000d9 2538 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2539 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2540 return
0c3d0f51 2541 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2542 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2543 elif not info_dict.get('annotations'):
2544 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2545 else:
2546 try:
6febd1c1 2547 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2548 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2549 annofile.write(info_dict['annotations'])
2550 except (KeyError, TypeError):
6febd1c1 2551 self.report_warning('There are no annotations to write.')
7b6fefc9 2552 except (OSError, IOError):
6febd1c1 2553 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2554 return
1fb07d10 2555
c4a91be7 2556 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2557 self.params.get('writeautomaticsub')])
c4a91be7 2558
c84dd8a9 2559 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2560 # subtitles download errors are already managed as troubles in relevant IE
2561 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2562 subtitles = info_dict['requested_subtitles']
fa57af1e 2563 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2564 for sub_lang, sub_info in subtitles.items():
2565 sub_format = sub_info['ext']
56d868db 2566 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2567 sub_filename_final = subtitles_filename(
2568 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2569 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2570 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2571 sub_info['filepath'] = sub_filename
0202b52a 2572 files_to_move[sub_filename] = sub_filename_final
a504ced0 2573 else:
0c9df79e 2574 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2575 if sub_info.get('data') is not None:
2576 try:
2577 # Use newline='' to prevent conversion of newline characters
067aa17e 2578 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2579 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2580 subfile.write(sub_info['data'])
dcf64d43 2581 sub_info['filepath'] = sub_filename
0202b52a 2582 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2583 except (OSError, IOError):
2584 self.report_error('Cannot write subtitles file ' + sub_filename)
2585 return
7b6fefc9 2586 else:
5ff1bc0c 2587 try:
e8e73840 2588 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2589 sub_info['filepath'] = sub_filename
0202b52a 2590 files_to_move[sub_filename] = sub_filename_final
fe346461 2591 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2592 self.report_warning('Unable to download subtitle for "%s": %s' %
2593 (sub_lang, error_to_compat_str(err)))
2594 continue
8222d8de 2595
8222d8de 2596 if self.params.get('writeinfojson', False):
de6000d9 2597 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2598 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2599 return
0c3d0f51 2600 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2601 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2602 else:
66c935fb 2603 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2604 try:
8012d892 2605 write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2606 except (OSError, IOError):
66c935fb 2607 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2608 return
de6000d9 2609 info_dict['__infojson_filename'] = infofn
8222d8de 2610
56d868db 2611 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2612 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2613 thumb_filename = replace_extension(
2614 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2615 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2616
732044af 2617 # Write internet shortcut files
2618 url_link = webloc_link = desktop_link = False
2619 if self.params.get('writelink', False):
2620 if sys.platform == "darwin": # macOS.
2621 webloc_link = True
2622 elif sys.platform.startswith("linux"):
2623 desktop_link = True
2624 else: # if sys.platform in ['win32', 'cygwin']:
2625 url_link = True
2626 if self.params.get('writeurllink', False):
2627 url_link = True
2628 if self.params.get('writewebloclink', False):
2629 webloc_link = True
2630 if self.params.get('writedesktoplink', False):
2631 desktop_link = True
2632
2633 if url_link or webloc_link or desktop_link:
2634 if 'webpage_url' not in info_dict:
2635 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2636 return
2637 ascii_url = iri_to_uri(info_dict['webpage_url'])
2638
2639 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2640 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2641 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2642 self.to_screen('[info] Internet shortcut is already present')
2643 else:
2644 try:
2645 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2646 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2647 template_vars = {'url': ascii_url}
2648 if embed_filename:
2649 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2650 linkfile.write(template % template_vars)
2651 except (OSError, IOError):
2652 self.report_error('Cannot write internet shortcut ' + linkfn)
2653 return False
2654 return True
2655
2656 if url_link:
2657 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2658 return
2659 if webloc_link:
2660 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2661 return
2662 if desktop_link:
2663 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2664 return
2665
56d868db 2666 try:
2667 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2668 except PostProcessingError as err:
2669 self.report_error('Preprocessing: %s' % str(err))
2670 return
2671
732044af 2672 must_record_download_archive = False
56d868db 2673 if self.params.get('skip_download', False):
2674 info_dict['filepath'] = temp_filename
2675 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2676 info_dict['__files_to_move'] = files_to_move
2677 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2678 else:
2679 # Download
b868936c 2680 info_dict.setdefault('__postprocessors', [])
4340deca 2681 try:
0202b52a 2682
6b591b29 2683 def existing_file(*filepaths):
2684 ext = info_dict.get('ext')
2685 final_ext = self.params.get('final_ext', ext)
2686 existing_files = []
2687 for file in orderedSet(filepaths):
2688 if final_ext != ext:
2689 converted = replace_extension(file, final_ext, ext)
2690 if os.path.exists(encodeFilename(converted)):
2691 existing_files.append(converted)
2692 if os.path.exists(encodeFilename(file)):
2693 existing_files.append(file)
2694
2695 if not existing_files or self.params.get('overwrites', False):
2696 for file in orderedSet(existing_files):
2697 self.report_file_delete(file)
2698 os.remove(encodeFilename(file))
2699 return None
2700
6b591b29 2701 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2702 return existing_files[0]
0202b52a 2703
2704 success = True
4340deca 2705 if info_dict.get('requested_formats') is not None:
81cd954a
S
2706
2707 def compatible_formats(formats):
d03cfdce 2708 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2709 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2710 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2711 if len(video_formats) > 2 or len(audio_formats) > 2:
2712 return False
2713
81cd954a 2714 # Check extension
d03cfdce 2715 exts = set(format.get('ext') for format in formats)
2716 COMPATIBLE_EXTS = (
2717 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2718 set(('webm',)),
2719 )
2720 for ext_sets in COMPATIBLE_EXTS:
2721 if ext_sets.issuperset(exts):
2722 return True
81cd954a
S
2723 # TODO: Check acodec/vcodec
2724 return False
2725
2726 requested_formats = info_dict['requested_formats']
0202b52a 2727 old_ext = info_dict['ext']
3b297919 2728 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2729 info_dict['ext'] = 'mkv'
2730 self.report_warning(
2731 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2732 new_ext = info_dict['ext']
0202b52a 2733
124bc071 2734 def correct_ext(filename, ext=new_ext):
96fccc10 2735 if filename == '-':
2736 return filename
0202b52a 2737 filename_real_ext = os.path.splitext(filename)[1][1:]
2738 filename_wo_ext = (
2739 os.path.splitext(filename)[0]
124bc071 2740 if filename_real_ext in (old_ext, new_ext)
0202b52a 2741 else filename)
124bc071 2742 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2743
38c6902b 2744 # Ensure filename always has a correct extension for successful merge
0202b52a 2745 full_filename = correct_ext(full_filename)
2746 temp_filename = correct_ext(temp_filename)
2747 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2748 info_dict['__real_download'] = False
18e674b4 2749
2750 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2751 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2752 info_dict['protocol'] = _protocols.pop()
dbf5416a 2753 directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2754 if dl_filename is not None:
6c7274ec 2755 self.report_file_already_downloaded(dl_filename)
96fccc10 2756 elif (directly_mergable and get_suitable_downloader(
a46a815b 2757 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2758 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2759 success, real_download = self.dl(temp_filename, info_dict)
2760 info_dict['__real_download'] = real_download
18e674b4 2761 else:
2762 downloaded = []
2763 merger = FFmpegMergerPP(self)
2764 if self.params.get('allow_unplayable_formats'):
2765 self.report_warning(
2766 'You have requested merging of multiple formats '
2767 'while also allowing unplayable formats to be downloaded. '
2768 'The formats won\'t be merged to prevent data corruption.')
2769 elif not merger.available:
2770 self.report_warning(
2771 'You have requested merging of multiple formats but ffmpeg is not installed. '
2772 'The formats won\'t be merged.')
2773
96fccc10 2774 if temp_filename == '-':
2775 reason = ('using a downloader other than ffmpeg' if directly_mergable
2776 else 'but the formats are incompatible for simultaneous download' if merger.available
2777 else 'but ffmpeg is not installed')
2778 self.report_warning(
2779 f'You have requested downloading multiple formats to stdout {reason}. '
2780 'The formats will be streamed one after the other')
2781 fname = temp_filename
dbf5416a 2782 for f in requested_formats:
2783 new_info = dict(info_dict)
2784 del new_info['requested_formats']
2785 new_info.update(f)
96fccc10 2786 if temp_filename != '-':
124bc071 2787 fname = prepend_extension(
2788 correct_ext(temp_filename, new_info['ext']),
2789 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2790 if not self._ensure_dir_exists(fname):
2791 return
2792 downloaded.append(fname)
dbf5416a 2793 partial_success, real_download = self.dl(fname, new_info)
2794 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2795 success = success and partial_success
2796 if merger.available and not self.params.get('allow_unplayable_formats'):
2797 info_dict['__postprocessors'].append(merger)
2798 info_dict['__files_to_merge'] = downloaded
2799 # Even if there were no downloads, it is being merged only now
2800 info_dict['__real_download'] = True
2801 else:
2802 for file in downloaded:
2803 files_to_move[file] = None
4340deca
P
2804 else:
2805 # Just a single file
0202b52a 2806 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2807 if dl_filename is None or dl_filename == temp_filename:
2808 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2809 # So we should try to resume the download
e8e73840 2810 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2811 info_dict['__real_download'] = real_download
6c7274ec 2812 else:
2813 self.report_file_already_downloaded(dl_filename)
0202b52a 2814
0202b52a 2815 dl_filename = dl_filename or temp_filename
c571435f 2816 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2817
3158150c 2818 except network_exceptions as err:
7960b056 2819 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2820 return
2821 except (OSError, IOError) as err:
2822 raise UnavailableVideoError(err)
2823 except (ContentTooShortError, ) as err:
2824 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2825 return
8222d8de 2826
de6000d9 2827 if success and full_filename != '-':
f17f8651 2828
fd7cfb64 2829 def fixup():
2830 do_fixup = True
2831 fixup_policy = self.params.get('fixup')
2832 vid = info_dict['id']
2833
2834 if fixup_policy in ('ignore', 'never'):
2835 return
2836 elif fixup_policy == 'warn':
2837 do_fixup = False
f89b3e2d 2838 elif fixup_policy != 'force':
2839 assert fixup_policy in ('detect_or_warn', None)
2840 if not info_dict.get('__real_download'):
2841 do_fixup = False
fd7cfb64 2842
2843 def ffmpeg_fixup(cndn, msg, cls):
2844 if not cndn:
2845 return
2846 if not do_fixup:
2847 self.report_warning(f'{vid}: {msg}')
2848 return
2849 pp = cls(self)
2850 if pp.available:
2851 info_dict['__postprocessors'].append(pp)
2852 else:
2853 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2854
2855 stretched_ratio = info_dict.get('stretched_ratio')
2856 ffmpeg_fixup(
2857 stretched_ratio not in (1, None),
2858 f'Non-uniform pixel ratio {stretched_ratio}',
2859 FFmpegFixupStretchedPP)
2860
2861 ffmpeg_fixup(
2862 (info_dict.get('requested_formats') is None
2863 and info_dict.get('container') == 'm4a_dash'
2864 and info_dict.get('ext') == 'm4a'),
2865 'writing DASH m4a. Only some players support this container',
2866 FFmpegFixupM4aPP)
2867
2868 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2869 if 'protocol' in info_dict else None)
2870 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2871 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2872 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2873
2874 fixup()
8222d8de 2875 try:
23c1a667 2876 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2877 except PostProcessingError as err:
2878 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2879 return
ab8e5e51
AM
2880 try:
2881 for ph in self._post_hooks:
23c1a667 2882 ph(info_dict['filepath'])
ab8e5e51
AM
2883 except Exception as err:
2884 self.report_error('post hooks: %s' % str(err))
2885 return
2d30509f 2886 must_record_download_archive = True
2887
2888 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2889 self.record_download_archive(info_dict)
c3e6ffba 2890 max_downloads = self.params.get('max_downloads')
2891 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2892 raise MaxDownloadsReached()
8222d8de
JMF
2893
2894 def download(self, url_list):
2895 """Download a given list of URLs."""
de6000d9 2896 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2897 if (len(url_list) > 1
2898 and outtmpl != '-'
2899 and '%' not in outtmpl
2900 and self.params.get('max_downloads') != 1):
acd69589 2901 raise SameFileError(outtmpl)
8222d8de
JMF
2902
2903 for url in url_list:
2904 try:
5f6a1245 2905 # It also downloads the videos
61aa5ba3
S
2906 res = self.extract_info(
2907 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2908 except UnavailableVideoError:
6febd1c1 2909 self.report_error('unable to download video')
8222d8de 2910 except MaxDownloadsReached:
8f18aca8 2911 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2912 raise
2913 except ExistingVideoReached:
8f18aca8 2914 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2915 raise
2916 except RejectedVideoReached:
8f18aca8 2917 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2918 raise
63e0be34
PH
2919 else:
2920 if self.params.get('dump_single_json', False):
277d6ff5 2921 self.post_extract(res)
6e84b215 2922 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2923
2924 return self._download_retcode
2925
1dcc4c0c 2926 def download_with_info_file(self, info_filename):
31bd3925
JMF
2927 with contextlib.closing(fileinput.FileInput(
2928 [info_filename], mode='r',
2929 openhook=fileinput.hook_encoded('utf-8'))) as f:
2930 # FileInput doesn't have a read method, we can't call json.load
8012d892 2931 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2932 try:
2933 self.process_ie_result(info, download=True)
d3f62c19 2934 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2935 webpage_url = info.get('webpage_url')
2936 if webpage_url is not None:
6febd1c1 2937 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2938 return self.download([webpage_url])
2939 else:
2940 raise
2941 return self._download_retcode
1dcc4c0c 2942
cb202fd2 2943 @staticmethod
8012d892 2944 def sanitize_info(info_dict, remove_private_keys=False):
2945 ''' Sanitize the infodict for converting to json '''
3ad56b42 2946 if info_dict is None:
2947 return info_dict
6e84b215 2948 info_dict.setdefault('epoch', int(time.time()))
2949 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2950 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2951 if remove_private_keys:
6e84b215 2952 remove_keys |= {
2953 'requested_formats', 'requested_subtitles', 'requested_entries',
2954 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2955 }
ae8f99e6 2956 empty_values = (None, {}, [], set(), tuple())
2957 reject = lambda k, v: k not in keep_keys and (
2958 k.startswith('_') or k in remove_keys or v in empty_values)
2959 else:
ae8f99e6 2960 reject = lambda k, v: k in remove_keys
5226731e 2961 filter_fn = lambda obj: (
b0249bca 2962 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2963 else obj if not isinstance(obj, dict)
ae8f99e6 2964 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2965 return filter_fn(info_dict)
cb202fd2 2966
8012d892 2967 @staticmethod
2968 def filter_requested_info(info_dict, actually_filter=True):
2969 ''' Alias of sanitize_info for backward compatibility '''
2970 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2971
dcf64d43 2972 def run_pp(self, pp, infodict):
5bfa4862 2973 files_to_delete = []
dcf64d43 2974 if '__files_to_move' not in infodict:
2975 infodict['__files_to_move'] = {}
af819c21 2976 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2977 if not files_to_delete:
dcf64d43 2978 return infodict
5bfa4862 2979
2980 if self.params.get('keepvideo', False):
2981 for f in files_to_delete:
dcf64d43 2982 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2983 else:
2984 for old_filename in set(files_to_delete):
2985 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2986 try:
2987 os.remove(encodeFilename(old_filename))
2988 except (IOError, OSError):
2989 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2990 if old_filename in infodict['__files_to_move']:
2991 del infodict['__files_to_move'][old_filename]
2992 return infodict
5bfa4862 2993
277d6ff5 2994 @staticmethod
2995 def post_extract(info_dict):
2996 def actual_post_extract(info_dict):
2997 if info_dict.get('_type') in ('playlist', 'multi_video'):
2998 for video_dict in info_dict.get('entries', {}):
b050d210 2999 actual_post_extract(video_dict or {})
277d6ff5 3000 return
3001
07cce701 3002 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3003 extra = post_extractor().items()
3004 info_dict.update(extra)
07cce701 3005 info_dict.pop('__post_extractor', None)
277d6ff5 3006
4ec82a72 3007 original_infodict = info_dict.get('__original_infodict') or {}
3008 original_infodict.update(extra)
3009 original_infodict.pop('__post_extractor', None)
3010
b050d210 3011 actual_post_extract(info_dict or {})
277d6ff5 3012
56d868db 3013 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3014 info = dict(ie_info)
56d868db 3015 info['__files_to_move'] = files_to_move or {}
3016 for pp in self._pps[key]:
dcf64d43 3017 info = self.run_pp(pp, info)
56d868db 3018 return info, info.pop('__files_to_move', None)
5bfa4862 3019
dcf64d43 3020 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3021 """Run all the postprocessors on the given file."""
3022 info = dict(ie_info)
3023 info['filepath'] = filename
dcf64d43 3024 info['__files_to_move'] = files_to_move or {}
0202b52a 3025
56d868db 3026 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3027 info = self.run_pp(pp, info)
3028 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3029 del info['__files_to_move']
56d868db 3030 for pp in self._pps['after_move']:
dcf64d43 3031 info = self.run_pp(pp, info)
23c1a667 3032 return info
c1c9a79c 3033
5db07df6 3034 def _make_archive_id(self, info_dict):
e9fef7ee
S
3035 video_id = info_dict.get('id')
3036 if not video_id:
3037 return
5db07df6
PH
3038 # Future-proof against any change in case
3039 # and backwards compatibility with prior versions
e9fef7ee 3040 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3041 if extractor is None:
1211bb6d
S
3042 url = str_or_none(info_dict.get('url'))
3043 if not url:
3044 return
e9fef7ee 3045 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3046 for ie_key, ie in self._ies.items():
1211bb6d 3047 if ie.suitable(url):
8b7491c8 3048 extractor = ie_key
e9fef7ee
S
3049 break
3050 else:
3051 return
d0757229 3052 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3053
3054 def in_download_archive(self, info_dict):
3055 fn = self.params.get('download_archive')
3056 if fn is None:
3057 return False
3058
3059 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3060 if not vid_id:
7012b23c 3061 return False # Incomplete video information
5db07df6 3062
a45e8619 3063 return vid_id in self.archive
c1c9a79c
PH
3064
3065 def record_download_archive(self, info_dict):
3066 fn = self.params.get('download_archive')
3067 if fn is None:
3068 return
5db07df6
PH
3069 vid_id = self._make_archive_id(info_dict)
3070 assert vid_id
c1c9a79c 3071 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3072 archive_file.write(vid_id + '\n')
a45e8619 3073 self.archive.add(vid_id)
dd82ffea 3074
8c51aa65 3075 @staticmethod
8abeeb94 3076 def format_resolution(format, default='unknown'):
fb04e403 3077 if format.get('vcodec') == 'none':
8326b00a 3078 if format.get('acodec') == 'none':
3079 return 'images'
fb04e403 3080 return 'audio only'
f49d89ee
PH
3081 if format.get('resolution') is not None:
3082 return format['resolution']
35615307
DA
3083 if format.get('width') and format.get('height'):
3084 res = '%dx%d' % (format['width'], format['height'])
3085 elif format.get('height'):
3086 res = '%sp' % format['height']
3087 elif format.get('width'):
388ae76b 3088 res = '%dx?' % format['width']
8c51aa65 3089 else:
8abeeb94 3090 res = default
8c51aa65
JMF
3091 return res
3092
c57f7757
PH
3093 def _format_note(self, fdict):
3094 res = ''
3095 if fdict.get('ext') in ['f4f', 'f4m']:
3096 res += '(unsupported) '
32f90364
PH
3097 if fdict.get('language'):
3098 if res:
3099 res += ' '
9016d76f 3100 res += '[%s] ' % fdict['language']
c57f7757
PH
3101 if fdict.get('format_note') is not None:
3102 res += fdict['format_note'] + ' '
3103 if fdict.get('tbr') is not None:
3104 res += '%4dk ' % fdict['tbr']
3105 if fdict.get('container') is not None:
3106 if res:
3107 res += ', '
3108 res += '%s container' % fdict['container']
3089bc74
S
3109 if (fdict.get('vcodec') is not None
3110 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3111 if res:
3112 res += ', '
3113 res += fdict['vcodec']
91c7271a 3114 if fdict.get('vbr') is not None:
c57f7757
PH
3115 res += '@'
3116 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3117 res += 'video@'
3118 if fdict.get('vbr') is not None:
3119 res += '%4dk' % fdict['vbr']
fbb21cf5 3120 if fdict.get('fps') is not None:
5d583bdf
S
3121 if res:
3122 res += ', '
3123 res += '%sfps' % fdict['fps']
c57f7757
PH
3124 if fdict.get('acodec') is not None:
3125 if res:
3126 res += ', '
3127 if fdict['acodec'] == 'none':
3128 res += 'video only'
3129 else:
3130 res += '%-5s' % fdict['acodec']
3131 elif fdict.get('abr') is not None:
3132 if res:
3133 res += ', '
3134 res += 'audio'
3135 if fdict.get('abr') is not None:
3136 res += '@%3dk' % fdict['abr']
3137 if fdict.get('asr') is not None:
3138 res += ' (%5dHz)' % fdict['asr']
3139 if fdict.get('filesize') is not None:
3140 if res:
3141 res += ', '
3142 res += format_bytes(fdict['filesize'])
9732d77e
PH
3143 elif fdict.get('filesize_approx') is not None:
3144 if res:
3145 res += ', '
3146 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3147 return res
91c7271a 3148
c57f7757 3149 def list_formats(self, info_dict):
94badb25 3150 formats = info_dict.get('formats', [info_dict])
53ed7066 3151 new_format = (
3152 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3153 and self.params.get('listformats_table', True) is not False)
76d321f6 3154 if new_format:
3155 table = [
3156 [
3157 format_field(f, 'format_id'),
3158 format_field(f, 'ext'),
3159 self.format_resolution(f),
3160 format_field(f, 'fps', '%d'),
3161 '|',
3162 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3163 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3164 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3165 '|',
3166 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3167 format_field(f, 'vbr', '%4dk'),
3168 format_field(f, 'acodec', default='unknown').replace('none', ''),
3169 format_field(f, 'abr', '%3dk'),
3170 format_field(f, 'asr', '%5dHz'),
3f698246 3171 ', '.join(filter(None, (
3172 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3173 format_field(f, 'language', '[%s]'),
3174 format_field(f, 'format_note'),
3175 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3176 ))),
3f698246 3177 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3178 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3179 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3180 else:
3181 table = [
3182 [
3183 format_field(f, 'format_id'),
3184 format_field(f, 'ext'),
3185 self.format_resolution(f),
3186 self._format_note(f)]
3187 for f in formats
3188 if f.get('preference') is None or f['preference'] >= -1000]
3189 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3190
cfb56d1a 3191 self.to_screen(
169dbde9 3192 '[info] Available formats for %s:' % info_dict['id'])
3193 self.to_stdout(render_table(
bc97cdae 3194 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3195
3196 def list_thumbnails(self, info_dict):
b0249bca 3197 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3198 if not thumbnails:
b7b72db9 3199 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3200 return
cfb56d1a
PH
3201
3202 self.to_screen(
3203 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3204 self.to_stdout(render_table(
cfb56d1a
PH
3205 ['ID', 'width', 'height', 'URL'],
3206 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3207
360e1ca5 3208 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3209 if not subtitles:
360e1ca5 3210 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3211 return
a504ced0 3212 self.to_screen(
edab9dbf 3213 'Available %s for %s:' % (name, video_id))
2412044c 3214
3215 def _row(lang, formats):
49c258e1 3216 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3217 if len(set(names)) == 1:
7aee40c1 3218 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3219 return [lang, ', '.join(names), ', '.join(exts)]
3220
169dbde9 3221 self.to_stdout(render_table(
2412044c 3222 ['Language', 'Name', 'Formats'],
3223 [_row(lang, formats) for lang, formats in subtitles.items()],
3224 hideEmpty=True))
a504ced0 3225
dca08720
PH
3226 def urlopen(self, req):
3227 """ Start an HTTP download """
82d8a8b6 3228 if isinstance(req, compat_basestring):
67dda517 3229 req = sanitized_Request(req)
19a41fc6 3230 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3231
3232 def print_debug_header(self):
3233 if not self.params.get('verbose'):
3234 return
62fec3b2 3235
c6afed48
PH
3236 stdout_encoding = getattr(
3237 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3238 encoding_str = (
734f90bb
PH
3239 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3240 locale.getpreferredencoding(),
3241 sys.getfilesystemencoding(),
c6afed48 3242 stdout_encoding,
b0472057 3243 self.get_encoding()))
4192b51c 3244 write_string(encoding_str, encoding=None)
734f90bb 3245
e5813e53 3246 source = (
3247 '(exe)' if hasattr(sys, 'frozen')
3248 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3249 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3250 else '')
3251 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3252 if _LAZY_LOADER:
f74980cb 3253 self._write_string('[debug] Lazy loading extractors enabled\n')
3254 if _PLUGIN_CLASSES:
3255 self._write_string(
3256 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3257 if self.params.get('compat_opts'):
3258 self._write_string(
3259 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3260 try:
3261 sp = subprocess.Popen(
3262 ['git', 'rev-parse', '--short', 'HEAD'],
3263 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3264 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3265 out, err = process_communicate_or_kill(sp)
dca08720
PH
3266 out = out.decode().strip()
3267 if re.match('[0-9a-f]+', out):
f74980cb 3268 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3269 except Exception:
dca08720
PH
3270 try:
3271 sys.exc_clear()
70a1165b 3272 except Exception:
dca08720 3273 pass
b300cda4
S
3274
3275 def python_implementation():
3276 impl_name = platform.python_implementation()
3277 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3278 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3279 return impl_name
3280
e5813e53 3281 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3282 platform.python_version(),
3283 python_implementation(),
3284 platform.architecture()[0],
b300cda4 3285 platform_name()))
d28b5171 3286
73fac4e9 3287 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3288 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3289 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3290 exe_str = ', '.join(
2831b468 3291 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3292 ) or 'none'
d28b5171 3293 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3294
2831b468 3295 from .downloader.fragment import can_decrypt_frag
3296 from .downloader.websocket import has_websockets
3297 from .postprocessor.embedthumbnail import has_mutagen
3298 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3299
ad3dc496 3300 lib_str = ', '.join(sorted(filter(None, (
2831b468 3301 can_decrypt_frag and 'pycryptodome',
3302 has_websockets and 'websockets',
3303 has_mutagen and 'mutagen',
3304 SQLITE_AVAILABLE and 'sqlite',
3305 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3306 )))) or 'none'
2831b468 3307 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3308
dca08720
PH
3309 proxy_map = {}
3310 for handler in self._opener.handlers:
3311 if hasattr(handler, 'proxies'):
3312 proxy_map.update(handler.proxies)
734f90bb 3313 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3314
58b1f00d
PH
3315 if self.params.get('call_home', False):
3316 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3317 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3318 return
58b1f00d
PH
3319 latest_version = self.urlopen(
3320 'https://yt-dl.org/latest/version').read().decode('utf-8')
3321 if version_tuple(latest_version) > version_tuple(__version__):
3322 self.report_warning(
3323 'You are using an outdated version (newest version: %s)! '
3324 'See https://yt-dl.org/update if you need help updating.' %
3325 latest_version)
3326
e344693b 3327 def _setup_opener(self):
6ad14cab 3328 timeout_val = self.params.get('socket_timeout')
19a41fc6 3329 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3330
982ee69a 3331 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3332 opts_cookiefile = self.params.get('cookiefile')
3333 opts_proxy = self.params.get('proxy')
3334
982ee69a 3335 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3336
6a3f4c3f 3337 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3338 if opts_proxy is not None:
3339 if opts_proxy == '':
3340 proxies = {}
3341 else:
3342 proxies = {'http': opts_proxy, 'https': opts_proxy}
3343 else:
3344 proxies = compat_urllib_request.getproxies()
067aa17e 3345 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3346 if 'http' in proxies and 'https' not in proxies:
3347 proxies['https'] = proxies['http']
91410c9b 3348 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3349
3350 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3351 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3352 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3353 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3354 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3355
3356 # When passing our own FileHandler instance, build_opener won't add the
3357 # default FileHandler and allows us to disable the file protocol, which
3358 # can be used for malicious purposes (see
067aa17e 3359 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3360 file_handler = compat_urllib_request.FileHandler()
3361
3362 def file_open(*args, **kwargs):
7a5c1cfe 3363 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3364 file_handler.file_open = file_open
3365
3366 opener = compat_urllib_request.build_opener(
fca6dba8 3367 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3368
dca08720
PH
3369 # Delete the default user-agent header, which would otherwise apply in
3370 # cases where our custom HTTP handler doesn't come into play
067aa17e 3371 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3372 opener.addheaders = []
3373 self._opener = opener
62fec3b2
PH
3374
3375 def encode(self, s):
3376 if isinstance(s, bytes):
3377 return s # Already encoded
3378
3379 try:
3380 return s.encode(self.get_encoding())
3381 except UnicodeEncodeError as err:
3382 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3383 raise
3384
3385 def get_encoding(self):
3386 encoding = self.params.get('encoding')
3387 if encoding is None:
3388 encoding = preferredencoding()
3389 return encoding
ec82d85a 3390
de6000d9 3391 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3392 write_all = self.params.get('write_all_thumbnails', False)
3393 thumbnails = []
3394 if write_all or self.params.get('writethumbnail', False):
0202b52a 3395 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3396 multiple = write_all and len(thumbnails) > 1
ec82d85a 3397
0202b52a 3398 ret = []
981052c9 3399 for t in thumbnails[::-1]:
ec82d85a 3400 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3401 suffix = '%s.' % t['id'] if multiple else ''
3402 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3403 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3404
0c3d0f51 3405 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3406 ret.append(suffix + thumb_ext)
8ba87148 3407 t['filepath'] = thumb_filename
ec82d85a
PH
3408 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3409 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3410 else:
5ef7d9bd 3411 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3412 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3413 try:
3414 uf = self.urlopen(t['url'])
d3d89c32 3415 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3416 shutil.copyfileobj(uf, thumbf)
de6000d9 3417 ret.append(suffix + thumb_ext)
ec82d85a
PH
3418 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3419 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3420 t['filepath'] = thumb_filename
3158150c 3421 except network_exceptions as err:
ec82d85a 3422 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3423 (t['url'], error_to_compat_str(err)))
6c4fd172 3424 if ret and not write_all:
3425 break
0202b52a 3426 return ret