]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Fix `--compat-option playlist-index`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
7d1eb38a 38 compat_shlex_quote,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b 44)
982ee69a 45from .cookies import load_cookies
8c25f81b 46from .utils import (
eedb7ba5
S
47 age_restricted,
48 args_to_str,
ce02ed60
PH
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
acd69589 52 DEFAULT_OUTTMPL,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
498f5606 61 EntryNotInPlaylist,
a06916d9 62 error_to_compat_str,
8b0d7497 63 ExistingVideoReached,
590bc6f6 64 expand_path,
ce02ed60 65 ExtractorError,
e29663c6 66 float_or_none,
02dbf93f 67 format_bytes,
76d321f6 68 format_field,
901130bb 69 STR_FORMAT_RE_TMPL,
70 STR_FORMAT_TYPES,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
b0249bca 73 HEADRequest,
c9969434 74 int_or_none,
732044af 75 iri_to_uri,
773f291d 76 ISO3166Utils,
56a8fb4f 77 LazyList,
ce02ed60 78 locked_file,
0202b52a 79 make_dir,
dca08720 80 make_HTTPS_handler,
ce02ed60 81 MaxDownloadsReached,
3158150c 82 network_exceptions,
cd6fc19e 83 orderedSet,
a06916d9 84 OUTTMPL_TYPES,
b7ab0590 85 PagedList,
083c9df9 86 parse_filesize,
91410c9b 87 PerRequestProxyHandler,
dca08720 88 platform_name,
eedb7ba5 89 PostProcessingError,
ce02ed60 90 preferredencoding,
eedb7ba5 91 prepend_extension,
a06916d9 92 process_communicate_or_kill,
51fb4995 93 register_socks_protocols,
a06916d9 94 RejectedVideoReached,
cfb56d1a 95 render_table,
eedb7ba5 96 replace_extension,
ce02ed60
PH
97 SameFileError,
98 sanitize_filename,
1bb5c511 99 sanitize_path,
dcf77cf1 100 sanitize_url,
67dda517 101 sanitized_Request,
e5660ee6 102 std_headers,
1211bb6d 103 str_or_none,
e29663c6 104 strftime_or_none,
ce02ed60 105 subtitles_filename,
51d9739f 106 ThrottledDownload,
732044af 107 to_high_limit_path,
324ad820 108 traverse_obj,
6033d980 109 try_get,
ce02ed60 110 UnavailableVideoError,
29eb5174 111 url_basename,
7d1eb38a 112 variadic,
58b1f00d 113 version_tuple,
ce02ed60
PH
114 write_json_file,
115 write_string,
6a3f4c3f 116 YoutubeDLCookieProcessor,
dca08720 117 YoutubeDLHandler,
fca6dba8 118 YoutubeDLRedirectHandler,
ce02ed60 119)
a0e07d31 120from .cache import Cache
52a8a1e1 121from .extractor import (
122 gen_extractor_classes,
123 get_info_extractor,
124 _LAZY_LOADER,
125 _PLUGIN_CLASSES
126)
4c54b89e 127from .extractor.openload import PhantomJSwrapper
52a8a1e1 128from .downloader import (
dbf5416a 129 FFmpegFD,
52a8a1e1 130 get_suitable_downloader,
131 shorten_protocol_name
132)
4c83c967 133from .downloader.rtmp import rtmpdump_version
4f026faf 134from .postprocessor import (
e36d50c5 135 get_postprocessor,
136 FFmpegFixupDurationPP,
f17f8651 137 FFmpegFixupM3u8PP,
62cd676c 138 FFmpegFixupM4aPP,
6271f1ca 139 FFmpegFixupStretchedPP,
e36d50c5 140 FFmpegFixupTimestampPP,
4f026faf
PH
141 FFmpegMergerPP,
142 FFmpegPostProcessor,
0202b52a 143 MoveFilesAfterDownloadPP,
4f026faf 144)
dca08720 145from .version import __version__
8222d8de 146
e9c0cdd3
YCH
147if compat_os_name == 'nt':
148 import ctypes
149
2459b6e1 150
8222d8de
JMF
151class YoutubeDL(object):
152 """YoutubeDL class.
153
154 YoutubeDL objects are the ones responsible of downloading the
155 actual video file and writing it to disk if the user has requested
156 it, among some other tasks. In most cases there should be one per
157 program. As, given a video URL, the downloader doesn't know how to
158 extract all the needed information, task that InfoExtractors do, it
159 has to pass the URL to one of them.
160
161 For this, YoutubeDL objects have a method that allows
162 InfoExtractors to be registered in a given order. When it is passed
163 a URL, the YoutubeDL object handles it to the first InfoExtractor it
164 finds that reports being able to handle it. The InfoExtractor extracts
165 all the information about the video or videos the URL refers to, and
166 YoutubeDL process the extracted information, possibly using a File
167 Downloader to download the video.
168
169 YoutubeDL objects accept a lot of parameters. In order not to saturate
170 the object constructor with arguments, it receives a dictionary of
171 options instead. These options are available through the params
172 attribute for the InfoExtractors to use. The YoutubeDL also
173 registers itself as the downloader in charge for the InfoExtractors
174 that are added to it, so this is a "mutual registration".
175
176 Available options:
177
178 username: Username for authentication purposes.
179 password: Password for authentication purposes.
180940e0 180 videopassword: Password for accessing a video.
1da50aa3
S
181 ap_mso: Adobe Pass multiple-system operator identifier.
182 ap_username: Multiple-system operator account username.
183 ap_password: Multiple-system operator account password.
8222d8de
JMF
184 usenetrc: Use netrc for authentication instead.
185 verbose: Print additional info to stdout.
186 quiet: Do not print messages to stdout.
ad8915b7 187 no_warnings: Do not print out anything for warnings.
53c18592 188 forceprint: A list of templates to force print
189 forceurl: Force printing final URL. (Deprecated)
190 forcetitle: Force printing title. (Deprecated)
191 forceid: Force printing ID. (Deprecated)
192 forcethumbnail: Force printing thumbnail URL. (Deprecated)
193 forcedescription: Force printing description. (Deprecated)
194 forcefilename: Force printing final filename. (Deprecated)
195 forceduration: Force printing duration. (Deprecated)
8694c600 196 forcejson: Force printing info_dict as JSON.
63e0be34
PH
197 dump_single_json: Force printing the info_dict of the whole playlist
198 (or video) as a single JSON line.
c25228e5 199 force_write_download_archive: Force writing download archive regardless
200 of 'skip_download' or 'simulate'.
b7b04c78 201 simulate: Do not download the video files. If unset (or None),
202 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 203 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
c25228e5 208 format_sort: How to sort the video formats. see "Sorting Formats"
209 for more details.
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
211 for more details.
212 allow_multiple_video_streams: Allow multiple video streams to be merged
213 into a single file
214 allow_multiple_audio_streams: Allow multiple audio streams to be merged
215 into a single file
0ba692ac 216 check_formats Whether to test if the formats are downloadable.
217 Can be True (check all), False (check none)
218 or None (check only if requested by extractor)
4524baf0 219 paths: Dictionary of output paths. The allowed keys are 'home'
220 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 221 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 222 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 223 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
224 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
225 restrictfilenames: Do not allow "&" and spaces in file names
226 trim_file_name: Limit length of filename (extension excluded)
4524baf0 227 windowsfilenames: Force the filenames to be windows compatible
a820dc72 228 ignoreerrors: Do not stop on download errors
7a5c1cfe 229 (Default True when running yt-dlp,
a820dc72 230 but False when directly accessing YoutubeDL class)
26e2805c 231 skip_playlist_after_errors: Number of allowed failures until the rest of
232 the playlist is skipped
d22dec74 233 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 234 overwrites: Overwrite all video and metadata files if True,
235 overwrite only non-video files if None
236 and don't overwrite any file if False
34488702 237 For compatibility with youtube-dl,
238 "nooverwrites" may also be used instead
8222d8de
JMF
239 playliststart: Playlist item to start at.
240 playlistend: Playlist item to end at.
c14e88f0 241 playlist_items: Specific indices of playlist to download.
ff815fe6 242 playlistreverse: Download playlist items in reverse order.
75822ca7 243 playlistrandom: Download playlist items in random order.
8222d8de
JMF
244 matchtitle: Download only matching titles.
245 rejecttitle: Reject downloads for matching titles.
8bf9319e 246 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
247 logtostderr: Log messages to stderr instead of stdout.
248 writedescription: Write the video description to a .description file
249 writeinfojson: Write the video description to a .info.json file
75d43ca0 250 clean_infojson: Remove private fields from the infojson
34488702 251 getcomments: Extract video comments. This will not be written to disk
06167fbb 252 unless writeinfojson is also given
1fb07d10 253 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 254 writethumbnail: Write the thumbnail image to a file
c25228e5 255 allow_playlist_files: Whether to write playlists' description, infojson etc
256 also to disk when using the 'write*' options
ec82d85a 257 write_all_thumbnails: Write all thumbnail formats to files
732044af 258 writelink: Write an internet shortcut file, depending on the
259 current platform (.url/.webloc/.desktop)
260 writeurllink: Write a Windows internet shortcut file (.url)
261 writewebloclink: Write a macOS internet shortcut file (.webloc)
262 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 263 writesubtitles: Write the video subtitles to a file
741dd8ea 264 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 265 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 266 Downloads all the subtitles of the video
0b7f3118 267 (requires writesubtitles or writeautomaticsub)
8222d8de 268 listsubtitles: Lists all available subtitles for the video
a504ced0 269 subtitlesformat: The format code for subtitles
c32b0aab 270 subtitleslangs: List of languages of the subtitles to download (can be regex).
271 The list may contain "all" to refer to all the available
272 subtitles. The language can be prefixed with a "-" to
273 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
274 keepvideo: Keep the video file after post-processing
275 daterange: A DateRange object, download only if the upload_date is in the range.
276 skip_download: Skip the actual download of the video file
c35f9e72 277 cachedir: Location of the cache files in the filesystem.
a0e07d31 278 False to disable filesystem cache.
47192f92 279 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
280 age_limit: An integer representing the user's age in years.
281 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
282 min_views: An integer representing the minimum view count the video
283 must have in order to not be skipped.
284 Videos without view count information are always
285 downloaded. None for no limit.
286 max_views: An integer representing the maximum view count.
287 Videos that are more popular than that are not
288 downloaded.
289 Videos without view count information are always
290 downloaded. None for no limit.
291 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
292 Videos already present in the file are not downloaded
293 again.
8a51f564 294 break_on_existing: Stop the download process after attempting to download a
295 file that is in the archive.
296 break_on_reject: Stop the download process when encountering a video that
297 has been filtered out.
298 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
299 cookiesfrombrowser: A tuple containing the name of the browser and the profile
300 name/path from where cookies are loaded.
301 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 302 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
303 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
304 At the moment, this is only supported by YouTube.
a1ee09e8 305 proxy: URL of the proxy server to use
38cce791 306 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 307 on geo-restricted sites.
e344693b 308 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
309 bidi_workaround: Work around buggy terminals without bidirectional text
310 support, using fridibi
a0ddb8a2 311 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 312 include_ads: Download ads as well
04b4d394
PH
313 default_search: Prepend this string if an input url is not valid.
314 'auto' for elaborate guessing
62fec3b2 315 encoding: Use this encoding instead of the system-specified.
e8ee972c 316 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
317 Pass in 'in_playlist' to only show this behavior for
318 playlist items.
4f026faf 319 postprocessors: A list of dictionaries, each with an entry
71b640cc 320 * key: The name of the postprocessor. See
7a5c1cfe 321 yt_dlp/postprocessor/__init__.py for a list.
56d868db 322 * when: When to run the postprocessor. Can be one of
323 pre_process|before_dl|post_process|after_move.
324 Assumed to be 'post_process' if not given
ab8e5e51
AM
325 post_hooks: A list of functions that get called as the final step
326 for each video file, after all postprocessors have been
327 called. The filename will be passed as the only argument.
71b640cc
PH
328 progress_hooks: A list of functions that get called on download
329 progress, with a dictionary with the entries
5cda4eda 330 * status: One of "downloading", "error", or "finished".
ee69b99a 331 Check this first and ignore unknown values.
3ba7740d 332 * info_dict: The extracted info_dict
71b640cc 333
5cda4eda 334 If status is one of "downloading", or "finished", the
ee69b99a
PH
335 following properties may also be present:
336 * filename: The final filename (always present)
5cda4eda 337 * tmpfilename: The filename we're currently writing to
71b640cc
PH
338 * downloaded_bytes: Bytes on disk
339 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
340 * total_bytes_estimate: Guess of the eventual file size,
341 None if unavailable.
342 * elapsed: The number of seconds since download started.
71b640cc
PH
343 * eta: The estimated time in seconds, None if unknown
344 * speed: The download speed in bytes/second, None if
345 unknown
5cda4eda
PH
346 * fragment_index: The counter of the currently
347 downloaded video fragment.
348 * fragment_count: The number of fragments (= individual
349 files that will be merged)
71b640cc
PH
350
351 Progress hooks are guaranteed to be called at least once
352 (with status "finished") if the download is successful.
45598f15 353 merge_output_format: Extension to use when merging formats.
6b591b29 354 final_ext: Expected final extension; used to detect when the file was
355 already downloaded and converted. "merge_output_format" is
356 replaced by this extension when given
6271f1ca
PH
357 fixup: Automatically correct known faults of the file.
358 One of:
359 - "never": do nothing
360 - "warn": only emit a warning
361 - "detect_or_warn": check whether we can do anything
62cd676c 362 about it, warn otherwise (default)
504f20dd 363 source_address: Client-side IP address to bind to.
6ec6cb4e 364 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 365 yt-dlp servers for debugging. (BROKEN)
1cf376f5 366 sleep_interval_requests: Number of seconds to sleep between requests
367 during extraction
7aa589a5
S
368 sleep_interval: Number of seconds to sleep before each download when
369 used alone or a lower bound of a range for randomized
370 sleep before each download (minimum possible number
371 of seconds to sleep) when used along with
372 max_sleep_interval.
373 max_sleep_interval:Upper bound of a range for randomized sleep before each
374 download (maximum possible number of seconds to sleep).
375 Must only be used along with sleep_interval.
376 Actual sleep time will be a random float from range
377 [sleep_interval; max_sleep_interval].
1cf376f5 378 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
379 listformats: Print an overview of available video formats and exit.
380 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
381 match_filter: A function that gets called with the info_dict of
382 every video.
383 If it returns a message, the video is ignored.
384 If it returns None, the video is downloaded.
385 match_filter_func in utils.py is one example for this.
7e5db8c9 386 no_color: Do not emit color codes in output.
0a840f58 387 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 388 HTTP header
0a840f58 389 geo_bypass_country:
773f291d
S
390 Two-letter ISO 3166-2 country code that will be used for
391 explicit geographic restriction bypassing via faking
504f20dd 392 X-Forwarded-For HTTP header
5f95927a
S
393 geo_bypass_ip_block:
394 IP range in CIDR notation that will be used similarly to
504f20dd 395 geo_bypass_country
71b640cc 396
85729c51 397 The following options determine which downloader is picked:
52a8a1e1 398 external_downloader: A dictionary of protocol keys and the executable of the
399 external downloader to use for it. The allowed protocols
400 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
401 Set the value to 'native' to use the native downloader
402 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
403 or {'m3u8': 'ffmpeg'} instead.
404 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
405 if True, otherwise use ffmpeg/avconv if False, otherwise
406 use downloader suggested by extractor if None.
53ed7066 407 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 408 The following options do not work when used through the API:
409 filename, abort-on-error, multistreams, no-live-chat,
b51d2ae3 410 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 411 Refer __init__.py for their implementation
fe7e0c98 412
8222d8de 413 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 414 the downloader (see yt_dlp/downloader/common.py):
51d9739f 415 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
416 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
417 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
418
419 The following options are used by the post processors:
d4a24f40 420 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 421 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
422 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
423 to the binary or its containing directory.
43820c03 424 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 425 and a list of additional command-line arguments for the
426 postprocessor/executable. The dict can also have "PP+EXE" keys
427 which are used when the given exe is used by the given PP.
428 Use 'default' as the name for arguments to passed to all PP
429 For compatibility with youtube-dl, a single list of args
430 can also be used
e409895f 431
432 The following options are used by the extractors:
62bff2c1 433 extractor_retries: Number of times to retry for known errors
434 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 435 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 436 discontinuities such as ad breaks (default: False)
5d3a0e79 437 extractor_args: A dictionary of arguments to be passed to the extractors.
438 See "EXTRACTOR ARGUMENTS" for details.
439 Eg: {'youtube': {'skip': ['dash', 'hls']}}
440 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
441 If True (default), DASH manifests and related
62bff2c1 442 data will be downloaded and processed by extractor.
443 You can reduce network I/O by disabling it if you don't
444 care about DASH. (only for youtube)
5d3a0e79 445 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
446 If True (default), HLS manifests and related
62bff2c1 447 data will be downloaded and processed by extractor.
448 You can reduce network I/O by disabling it if you don't
449 care about HLS. (only for youtube)
8222d8de
JMF
450 """
451
c9969434
S
452 _NUMERIC_FIELDS = set((
453 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
454 'timestamp', 'upload_year', 'upload_month', 'upload_day',
455 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
456 'average_rating', 'comment_count', 'age_limit',
457 'start_time', 'end_time',
458 'chapter_number', 'season_number', 'episode_number',
459 'track_number', 'disc_number', 'release_year',
460 'playlist_index',
461 ))
462
8222d8de 463 params = None
8b7491c8 464 _ies = {}
56d868db 465 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 466 _printed_messages = set()
1cf376f5 467 _first_webpage_request = True
8222d8de
JMF
468 _download_retcode = None
469 _num_downloads = None
30a074c2 470 _playlist_level = 0
471 _playlist_urls = set()
8222d8de
JMF
472 _screen_file = None
473
3511266b 474 def __init__(self, params=None, auto_init=True):
8222d8de 475 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
476 if params is None:
477 params = {}
8b7491c8 478 self._ies = {}
56c73665 479 self._ies_instances = {}
56d868db 480 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 481 self._printed_messages = set()
1cf376f5 482 self._first_webpage_request = True
ab8e5e51 483 self._post_hooks = []
933605d7 484 self._progress_hooks = []
8222d8de
JMF
485 self._download_retcode = 0
486 self._num_downloads = 0
487 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 488 self._err_file = sys.stderr
4abf617b
S
489 self.params = {
490 # Default parameters
491 'nocheckcertificate': False,
492 }
493 self.params.update(params)
a0e07d31 494 self.cache = Cache(self)
34308b30 495
a61f4b28 496 if sys.version_info < (3, 6):
497 self.report_warning(
0181adef 498 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 499
88acdbc2 500 if self.params.get('allow_unplayable_formats'):
501 self.report_warning(
502 'You have asked for unplayable formats to be listed/downloaded. '
503 'This is a developer option intended for debugging. '
504 'If you experience any issues while using this option, DO NOT open a bug report')
505
be5df5ee
S
506 def check_deprecated(param, option, suggestion):
507 if self.params.get(param) is not None:
53ed7066 508 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
509 return True
510 return False
511
512 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
513 if self.params.get('geo_verification_proxy') is None:
514 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
515
0d1bb027 516 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
517 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 518 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 519
520 for msg in self.params.get('warnings', []):
521 self.report_warning(msg)
522
b868936c 523 if self.params.get('overwrites') is None:
524 self.params.pop('overwrites', None)
525 elif self.params.get('nooverwrites') is not None:
526 # nooverwrites was unnecessarily changed to overwrites
527 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
528 # This ensures compatibility with both keys
529 self.params['overwrites'] = not self.params['nooverwrites']
530 else:
531 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 532
0783b09b 533 if params.get('bidi_workaround', False):
1c088fa8
PH
534 try:
535 import pty
536 master, slave = pty.openpty()
003c69a8 537 width = compat_get_terminal_size().columns
1c088fa8
PH
538 if width is None:
539 width_args = []
540 else:
541 width_args = ['-w', str(width)]
5d681e96 542 sp_kwargs = dict(
1c088fa8
PH
543 stdin=subprocess.PIPE,
544 stdout=slave,
545 stderr=self._err_file)
5d681e96
PH
546 try:
547 self._output_process = subprocess.Popen(
548 ['bidiv'] + width_args, **sp_kwargs
549 )
550 except OSError:
5d681e96
PH
551 self._output_process = subprocess.Popen(
552 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
553 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 554 except OSError as ose:
66e7ace1 555 if ose.errno == errno.ENOENT:
6febd1c1 556 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
557 else:
558 raise
0783b09b 559
3089bc74
S
560 if (sys.platform != 'win32'
561 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
562 and not params.get('restrictfilenames', False)):
e9137224 563 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 564 self.report_warning(
6febd1c1 565 'Assuming --restrict-filenames since file system encoding '
1b725173 566 'cannot encode all characters. '
6febd1c1 567 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 568 self.params['restrictfilenames'] = True
34308b30 569
de6000d9 570 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 571
187986a8 572 # Creating format selector here allows us to catch syntax errors before the extraction
573 self.format_selector = (
574 None if self.params.get('format') is None
575 else self.build_format_selector(self.params['format']))
576
dca08720
PH
577 self._setup_opener()
578
4cd0a709 579 """Preload the archive, if any is specified"""
580 def preload_download_archive(fn):
581 if fn is None:
582 return False
0760b0a7 583 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 584 try:
585 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
586 for line in archive_file:
587 self.archive.add(line.strip())
588 except IOError as ioe:
589 if ioe.errno != errno.ENOENT:
590 raise
591 return False
592 return True
593
594 self.archive = set()
595 preload_download_archive(self.params.get('download_archive'))
596
3511266b
PH
597 if auto_init:
598 self.print_debug_header()
599 self.add_default_info_extractors()
600
4f026faf 601 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 602 pp_def = dict(pp_def_raw)
fd7cfb64 603 when = pp_def.pop('when', 'post_process')
604 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 605 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 606 self.add_post_processor(pp, when=when)
4f026faf 607
ab8e5e51
AM
608 for ph in self.params.get('post_hooks', []):
609 self.add_post_hook(ph)
610
71b640cc
PH
611 for ph in self.params.get('progress_hooks', []):
612 self.add_progress_hook(ph)
613
51fb4995
YCH
614 register_socks_protocols()
615
7d4111ed
PH
616 def warn_if_short_id(self, argv):
617 # short YouTube ID starting with dash?
618 idxs = [
619 i for i, a in enumerate(argv)
620 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
621 if idxs:
622 correct_argv = (
7a5c1cfe 623 ['yt-dlp']
3089bc74
S
624 + [a for i, a in enumerate(argv) if i not in idxs]
625 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
626 )
627 self.report_warning(
628 'Long argument string detected. '
629 'Use -- to separate parameters and URLs, like this:\n%s\n' %
630 args_to_str(correct_argv))
631
8222d8de
JMF
632 def add_info_extractor(self, ie):
633 """Add an InfoExtractor object to the end of the list."""
8b7491c8 634 ie_key = ie.ie_key()
635 self._ies[ie_key] = ie
e52d7f85 636 if not isinstance(ie, type):
8b7491c8 637 self._ies_instances[ie_key] = ie
e52d7f85 638 ie.set_downloader(self)
8222d8de 639
8b7491c8 640 def _get_info_extractor_class(self, ie_key):
641 ie = self._ies.get(ie_key)
642 if ie is None:
643 ie = get_info_extractor(ie_key)
644 self.add_info_extractor(ie)
645 return ie
646
56c73665
JMF
647 def get_info_extractor(self, ie_key):
648 """
649 Get an instance of an IE with name ie_key, it will try to get one from
650 the _ies list, if there's no instance it will create a new one and add
651 it to the extractor list.
652 """
653 ie = self._ies_instances.get(ie_key)
654 if ie is None:
655 ie = get_info_extractor(ie_key)()
656 self.add_info_extractor(ie)
657 return ie
658
023fa8c4
JMF
659 def add_default_info_extractors(self):
660 """
661 Add the InfoExtractors returned by gen_extractors to the end of the list
662 """
e52d7f85 663 for ie in gen_extractor_classes():
023fa8c4
JMF
664 self.add_info_extractor(ie)
665
56d868db 666 def add_post_processor(self, pp, when='post_process'):
8222d8de 667 """Add a PostProcessor object to the end of the chain."""
5bfa4862 668 self._pps[when].append(pp)
8222d8de
JMF
669 pp.set_downloader(self)
670
ab8e5e51
AM
671 def add_post_hook(self, ph):
672 """Add the post hook"""
673 self._post_hooks.append(ph)
674
933605d7
JMF
675 def add_progress_hook(self, ph):
676 """Add the progress hook (currently only for the file downloader)"""
677 self._progress_hooks.append(ph)
8ab470f1 678
1c088fa8 679 def _bidi_workaround(self, message):
5d681e96 680 if not hasattr(self, '_output_channel'):
1c088fa8
PH
681 return message
682
5d681e96 683 assert hasattr(self, '_output_process')
11b85ce6 684 assert isinstance(message, compat_str)
6febd1c1
PH
685 line_count = message.count('\n') + 1
686 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 687 self._output_process.stdin.flush()
6febd1c1 688 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 689 for _ in range(line_count))
6febd1c1 690 return res[:-len('\n')]
1c088fa8 691
b35496d8 692 def _write_string(self, message, out=None, only_once=False):
693 if only_once:
694 if message in self._printed_messages:
695 return
696 self._printed_messages.add(message)
697 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 698
848887eb 699 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 700 """Print message to stdout"""
8bf9319e 701 if self.params.get('logger'):
43afe285 702 self.params['logger'].debug(message)
835a1478 703 elif not quiet or self.params.get('verbose'):
704 self._write_string(
705 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
706 self._err_file if quiet else self._screen_file)
8222d8de 707
b35496d8 708 def to_stderr(self, message, only_once=False):
0760b0a7 709 """Print message to stderr"""
11b85ce6 710 assert isinstance(message, compat_str)
8bf9319e 711 if self.params.get('logger'):
43afe285
IB
712 self.params['logger'].error(message)
713 else:
b35496d8 714 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 715
1e5b9a95
PH
716 def to_console_title(self, message):
717 if not self.params.get('consoletitle', False):
718 return
4bede0d8
C
719 if compat_os_name == 'nt':
720 if ctypes.windll.kernel32.GetConsoleWindow():
721 # c_wchar_p() might not be necessary if `message` is
722 # already of type unicode()
723 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 724 elif 'TERM' in os.environ:
b46696bd 725 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 726
bdde425c
PH
727 def save_console_title(self):
728 if not self.params.get('consoletitle', False):
729 return
b7b04c78 730 if self.params.get('simulate'):
94c3442e 731 return
4bede0d8 732 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 733 # Save the title on stack
734f90bb 734 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
735
736 def restore_console_title(self):
737 if not self.params.get('consoletitle', False):
738 return
b7b04c78 739 if self.params.get('simulate'):
94c3442e 740 return
4bede0d8 741 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 742 # Restore the title from stack
734f90bb 743 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
744
745 def __enter__(self):
746 self.save_console_title()
747 return self
748
749 def __exit__(self, *args):
750 self.restore_console_title()
f89197d7 751
dca08720 752 if self.params.get('cookiefile') is not None:
1bab3437 753 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 754
8222d8de
JMF
755 def trouble(self, message=None, tb=None):
756 """Determine action to take when a download problem appears.
757
758 Depending on if the downloader has been configured to ignore
759 download errors or not, this method may throw an exception or
760 not when errors are found, after printing the message.
761
762 tb, if given, is additional traceback information.
763 """
764 if message is not None:
765 self.to_stderr(message)
766 if self.params.get('verbose'):
767 if tb is None:
768 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 769 tb = ''
8222d8de 770 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 771 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 772 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
773 else:
774 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 775 tb = ''.join(tb_data)
c19bc311 776 if tb:
777 self.to_stderr(tb)
8222d8de
JMF
778 if not self.params.get('ignoreerrors', False):
779 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
780 exc_info = sys.exc_info()[1].exc_info
781 else:
782 exc_info = sys.exc_info()
783 raise DownloadError(message, exc_info)
784 self._download_retcode = 1
785
0760b0a7 786 def to_screen(self, message, skip_eol=False):
787 """Print message to stdout if not in quiet mode"""
788 self.to_stdout(
789 message, skip_eol, quiet=self.params.get('quiet', False))
790
c84aeac6 791 def report_warning(self, message, only_once=False):
8222d8de
JMF
792 '''
793 Print the message to stderr, it will be prefixed with 'WARNING:'
794 If stderr is a tty file the 'WARNING:' will be colored
795 '''
6d07ce01
JMF
796 if self.params.get('logger') is not None:
797 self.params['logger'].warning(message)
8222d8de 798 else:
ad8915b7
PH
799 if self.params.get('no_warnings'):
800 return
e9c0cdd3 801 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
802 _msg_header = '\033[0;33mWARNING:\033[0m'
803 else:
804 _msg_header = 'WARNING:'
805 warning_message = '%s %s' % (_msg_header, message)
b35496d8 806 self.to_stderr(warning_message, only_once)
8222d8de
JMF
807
808 def report_error(self, message, tb=None):
809 '''
810 Do the same as trouble, but prefixes the message with 'ERROR:', colored
811 in red if stderr is a tty file.
812 '''
e9c0cdd3 813 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 814 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 815 else:
6febd1c1
PH
816 _msg_header = 'ERROR:'
817 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
818 self.trouble(error_message, tb)
819
b35496d8 820 def write_debug(self, message, only_once=False):
0760b0a7 821 '''Log debug message or Print message to stderr'''
822 if not self.params.get('verbose', False):
823 return
824 message = '[debug] %s' % message
825 if self.params.get('logger'):
826 self.params['logger'].debug(message)
827 else:
b35496d8 828 self.to_stderr(message, only_once)
0760b0a7 829
8222d8de
JMF
830 def report_file_already_downloaded(self, file_name):
831 """Report file has already been fully downloaded."""
832 try:
6febd1c1 833 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 834 except UnicodeEncodeError:
6febd1c1 835 self.to_screen('[download] The file has already been downloaded')
8222d8de 836
0c3d0f51 837 def report_file_delete(self, file_name):
838 """Report that existing file will be deleted."""
839 try:
c25228e5 840 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 841 except UnicodeEncodeError:
c25228e5 842 self.to_screen('Deleting existing file')
0c3d0f51 843
1151c407 844 def raise_no_formats(self, info, forced=False):
845 has_drm = info.get('__has_drm')
88acdbc2 846 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
847 expected = self.params.get('ignore_no_formats_error')
848 if forced or not expected:
1151c407 849 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
850 expected=has_drm or expected)
88acdbc2 851 else:
852 self.report_warning(msg)
853
de6000d9 854 def parse_outtmpl(self):
855 outtmpl_dict = self.params.get('outtmpl', {})
856 if not isinstance(outtmpl_dict, dict):
857 outtmpl_dict = {'default': outtmpl_dict}
858 outtmpl_dict.update({
859 k: v for k, v in DEFAULT_OUTTMPL.items()
860 if not outtmpl_dict.get(k)})
861 for key, val in outtmpl_dict.items():
862 if isinstance(val, bytes):
863 self.report_warning(
864 'Parameter outtmpl is bytes, but should be a unicode string. '
865 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
866 return outtmpl_dict
867
21cd8fae 868 def get_output_path(self, dir_type='', filename=None):
869 paths = self.params.get('paths', {})
870 assert isinstance(paths, dict)
871 path = os.path.join(
872 expand_path(paths.get('home', '').strip()),
873 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
874 filename or '')
875
876 # Temporary fix for #4787
877 # 'Treat' all problem characters by passing filename through preferredencoding
878 # to workaround encoding issues with subprocess on python2 @ Windows
879 if sys.version_info < (3, 0) and sys.platform == 'win32':
880 path = encodeFilename(path, True).decode(preferredencoding())
881 return sanitize_path(path, force=self.params.get('windowsfilenames'))
882
76a264ac 883 @staticmethod
901130bb 884 def _outtmpl_expandpath(outtmpl):
885 # expand_path translates '%%' into '%' and '$$' into '$'
886 # correspondingly that is not what we want since we need to keep
887 # '%%' intact for template dict substitution step. Working around
888 # with boundary-alike separator hack.
889 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
890 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
891
892 # outtmpl should be expand_path'ed before template dict substitution
893 # because meta fields may contain env variables we don't want to
894 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
895 # title "Hello $PATH", we don't want `$PATH` to be expanded.
896 return expand_path(outtmpl).replace(sep, '')
897
898 @staticmethod
899 def escape_outtmpl(outtmpl):
900 ''' Escape any remaining strings like %s, %abc% etc. '''
901 return re.sub(
902 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
903 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
904 outtmpl)
905
906 @classmethod
907 def validate_outtmpl(cls, outtmpl):
76a264ac 908 ''' @return None or Exception object '''
7d1eb38a 909 outtmpl = re.sub(
910 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
911 lambda mobj: f'{mobj.group(0)[:-1]}s',
912 cls._outtmpl_expandpath(outtmpl))
76a264ac 913 try:
7d1eb38a 914 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 915 return None
916 except ValueError as err:
917 return err
918
143db31d 919 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
901130bb 920 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
6e84b215 921 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 922
6e84b215 923 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
924 for key in ('__original_infodict', '__postprocessors'):
925 info_dict.pop(key, None)
752cda38 926 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 927 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 928 if info_dict.get('duration', None) is not None
929 else None)
752cda38 930 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
931 if info_dict.get('resolution') is None:
932 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 933
143db31d 934 # For fields playlist_index and autonumber convert all occurrences
935 # of %(field)s to %(field)0Nd for backward compatibility
936 field_size_compat_map = {
752cda38 937 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
938 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 939 }
752cda38 940
385a27fa 941 TMPL_DICT = {}
7d1eb38a 942 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
385a27fa 943 MATH_FUNCTIONS = {
944 '+': float.__add__,
945 '-': float.__sub__,
946 }
e625be0d 947 # Field is of the form key1.key2...
948 # where keys (except first) can be string, int or slice
2b8a2973 949 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 950 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
951 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 952 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
953 (?P<negate>-)?
385a27fa 954 (?P<fields>{field})
955 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 956 (?:>(?P<strf_format>.+?))?
957 (?:\|(?P<default>.*?))?
385a27fa 958 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 959
2b8a2973 960 def _traverse_infodict(k):
961 k = k.split('.')
962 if k[0] == '':
963 k.pop(0)
964 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 965
752cda38 966 def get_value(mdict):
967 # Object traversal
2b8a2973 968 value = _traverse_infodict(mdict['fields'])
752cda38 969 # Negative
970 if mdict['negate']:
971 value = float_or_none(value)
972 if value is not None:
973 value *= -1
974 # Do maths
385a27fa 975 offset_key = mdict['maths']
976 if offset_key:
752cda38 977 value = float_or_none(value)
978 operator = None
385a27fa 979 while offset_key:
980 item = re.match(
981 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
982 offset_key).group(0)
983 offset_key = offset_key[len(item):]
984 if operator is None:
752cda38 985 operator = MATH_FUNCTIONS[item]
385a27fa 986 continue
987 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
988 offset = float_or_none(item)
989 if offset is None:
2b8a2973 990 offset = float_or_none(_traverse_infodict(item))
385a27fa 991 try:
992 value = operator(value, multiplier * offset)
993 except (TypeError, ZeroDivisionError):
994 return None
995 operator = None
752cda38 996 # Datetime formatting
997 if mdict['strf_format']:
998 value = strftime_or_none(value, mdict['strf_format'])
999
1000 return value
1001
b868936c 1002 na = self.params.get('outtmpl_na_placeholder', 'NA')
1003
6e84b215 1004 def _dumpjson_default(obj):
1005 if isinstance(obj, (set, LazyList)):
1006 return list(obj)
1007 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1008
752cda38 1009 def create_key(outer_mobj):
1010 if not outer_mobj.group('has_key'):
901130bb 1011 return f'%{outer_mobj.group(0)}'
752cda38 1012 key = outer_mobj.group('key')
752cda38 1013 mobj = re.match(INTERNAL_FORMAT_RE, key)
1014 if mobj is None:
9fea350f 1015 value, default, mobj = None, na, {'fields': ''}
752cda38 1016 else:
e625be0d 1017 mobj = mobj.groupdict()
752cda38 1018 default = mobj['default'] if mobj['default'] is not None else na
1019 value = get_value(mobj)
1020
b868936c 1021 fmt = outer_mobj.group('format')
752cda38 1022 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1023 fmt = '0{:d}d'.format(field_size_compat_map[key])
1024
1025 value = default if value is None else value
752cda38 1026
7d1eb38a 1027 str_fmt = f'{fmt[:-1]}s'
1028 if fmt[-1] == 'l':
1029 value, fmt = ', '.join(variadic(value)), str_fmt
1030 elif fmt[-1] == 'j':
6e84b215 1031 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
7d1eb38a 1032 elif fmt[-1] == 'q':
1033 value, fmt = compat_shlex_quote(str(value)), str_fmt
1034 elif fmt[-1] == 'c':
1035 value = str(value)
76a264ac 1036 if value is None:
1037 value, fmt = default, 's'
1038 else:
1039 value = value[0]
1040 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1041 value = float_or_none(value)
752cda38 1042 if value is None:
1043 value, fmt = default, 's'
901130bb 1044
752cda38 1045 if sanitize:
1046 if fmt[-1] == 'r':
1047 # If value is an object, sanitize might convert it to a string
1048 # So we convert it to repr first
7d1eb38a 1049 value, fmt = repr(value), str_fmt
639f1cea 1050 if fmt[-1] in 'csr':
9fea350f 1051 value = sanitize(mobj['fields'].split('.')[-1], value)
901130bb 1052
b868936c 1053 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1054 TMPL_DICT[key] = value
b868936c 1055 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1056
385a27fa 1057 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1058
de6000d9 1059 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1060 try:
586a91b6 1061 sanitize = lambda k, v: sanitize_filename(
45598aab 1062 compat_str(v),
1bb5c511 1063 restricted=self.params.get('restrictfilenames'),
40df485f 1064 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 1065 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 1066 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
901130bb 1067 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1068 filename = outtmpl % template_dict
15da37c7 1069
143db31d 1070 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1071 if force_ext is not None:
752cda38 1072 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1073
bdc3fd2f
U
1074 # https://github.com/blackjack4494/youtube-dlc/issues/85
1075 trim_file_name = self.params.get('trim_file_name', False)
1076 if trim_file_name:
1077 fn_groups = filename.rsplit('.')
1078 ext = fn_groups[-1]
1079 sub_ext = ''
1080 if len(fn_groups) > 2:
1081 sub_ext = fn_groups[-2]
1082 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1083
0202b52a 1084 return filename
8222d8de 1085 except ValueError as err:
6febd1c1 1086 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1087 return None
1088
de6000d9 1089 def prepare_filename(self, info_dict, dir_type='', warn=False):
1090 """Generate the output filename."""
21cd8fae 1091
de6000d9 1092 filename = self._prepare_filename(info_dict, dir_type or 'default')
1093
c84aeac6 1094 if warn:
21cd8fae 1095 if not self.params.get('paths'):
de6000d9 1096 pass
1097 elif filename == '-':
c84aeac6 1098 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1099 elif os.path.isabs(filename):
c84aeac6 1100 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1101 if filename == '-' or not filename:
1102 return filename
1103
21cd8fae 1104 return self.get_output_path(dir_type, filename)
0202b52a 1105
120fe513 1106 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1107 """ Returns None if the file should be downloaded """
8222d8de 1108
c77495e3 1109 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1110
8b0d7497 1111 def check_filter():
8b0d7497 1112 if 'title' in info_dict:
1113 # This can happen when we're just evaluating the playlist
1114 title = info_dict['title']
1115 matchtitle = self.params.get('matchtitle', False)
1116 if matchtitle:
1117 if not re.search(matchtitle, title, re.IGNORECASE):
1118 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1119 rejecttitle = self.params.get('rejecttitle', False)
1120 if rejecttitle:
1121 if re.search(rejecttitle, title, re.IGNORECASE):
1122 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1123 date = info_dict.get('upload_date')
1124 if date is not None:
1125 dateRange = self.params.get('daterange', DateRange())
1126 if date not in dateRange:
1127 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1128 view_count = info_dict.get('view_count')
1129 if view_count is not None:
1130 min_views = self.params.get('min_views')
1131 if min_views is not None and view_count < min_views:
1132 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1133 max_views = self.params.get('max_views')
1134 if max_views is not None and view_count > max_views:
1135 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1136 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1137 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1138
8f18aca8 1139 match_filter = self.params.get('match_filter')
1140 if match_filter is not None:
1141 try:
1142 ret = match_filter(info_dict, incomplete=incomplete)
1143 except TypeError:
1144 # For backward compatibility
1145 ret = None if incomplete else match_filter(info_dict)
1146 if ret is not None:
1147 return ret
8b0d7497 1148 return None
1149
c77495e3 1150 if self.in_download_archive(info_dict):
1151 reason = '%s has already been recorded in the archive' % video_title
1152 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1153 else:
1154 reason = check_filter()
1155 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1156 if reason is not None:
120fe513 1157 if not silent:
1158 self.to_screen('[download] ' + reason)
c77495e3 1159 if self.params.get(break_opt, False):
1160 raise break_err()
8b0d7497 1161 return reason
fe7e0c98 1162
b6c45014
JMF
1163 @staticmethod
1164 def add_extra_info(info_dict, extra_info):
1165 '''Set the keys from extra_info in info dict if they are missing'''
1166 for key, value in extra_info.items():
1167 info_dict.setdefault(key, value)
1168
58f197b7 1169 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1170 process=True, force_generic_extractor=False):
41d1cca3 1171 """
1172 Return a list with a dictionary for each video extracted.
1173
1174 Arguments:
1175 url -- URL to extract
1176
1177 Keyword arguments:
1178 download -- whether to download videos during extraction
1179 ie_key -- extractor key hint
1180 extra_info -- dictionary containing the extra values to add to each result
1181 process -- whether to resolve all unresolved references (URLs, playlist items),
1182 must be True for download to work.
1183 force_generic_extractor -- force using the generic extractor
1184 """
fe7e0c98 1185
61aa5ba3 1186 if not ie_key and force_generic_extractor:
d22dec74
S
1187 ie_key = 'Generic'
1188
8222d8de 1189 if ie_key:
8b7491c8 1190 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1191 else:
1192 ies = self._ies
1193
8b7491c8 1194 for ie_key, ie in ies.items():
8222d8de
JMF
1195 if not ie.suitable(url):
1196 continue
1197
1198 if not ie.working():
6febd1c1
PH
1199 self.report_warning('The program functionality for this site has been marked as broken, '
1200 'and will probably not work.')
8222d8de 1201
1151c407 1202 temp_id = ie.get_temp_id(url)
a0566bbf 1203 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1204 self.to_screen("[%s] %s: has already been recorded in archive" % (
1205 ie_key, temp_id))
1206 break
8b7491c8 1207 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1208 else:
1209 self.report_error('no suitable InfoExtractor for URL %s' % url)
1210
cc9d1493 1211 def __handle_extraction_exceptions(func, handle_all_errors=True):
a0566bbf 1212 def wrapper(self, *args, **kwargs):
1213 try:
1214 return func(self, *args, **kwargs)
773f291d
S
1215 except GeoRestrictedError as e:
1216 msg = e.msg
1217 if e.countries:
1218 msg += '\nThis video is available in %s.' % ', '.join(
1219 map(ISO3166Utils.short2full, e.countries))
1220 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1221 self.report_error(msg)
fb043a6e 1222 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1223 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1224 except ThrottledDownload:
1225 self.to_stderr('\r')
1226 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1227 return wrapper(self, *args, **kwargs)
8b0d7497 1228 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1229 raise
8222d8de 1230 except Exception as e:
cc9d1493 1231 if handle_all_errors and self.params.get('ignoreerrors', False):
9b9c5355 1232 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1233 else:
1234 raise
a0566bbf 1235 return wrapper
1236
1237 @__handle_extraction_exceptions
58f197b7 1238 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1239 ie_result = ie.extract(url)
1240 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1241 return
1242 if isinstance(ie_result, list):
1243 # Backwards compatibility: old IE result format
1244 ie_result = {
1245 '_type': 'compat_list',
1246 'entries': ie_result,
1247 }
e37d0efb 1248 if extra_info.get('original_url'):
1249 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1250 self.add_default_extra_info(ie_result, ie, url)
1251 if process:
1252 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1253 else:
a0566bbf 1254 return ie_result
fe7e0c98 1255
ea38e55f 1256 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1257 if url is not None:
1258 self.add_extra_info(ie_result, {
1259 'webpage_url': url,
1260 'original_url': url,
1261 'webpage_url_basename': url_basename(url),
1262 })
1263 if ie is not None:
1264 self.add_extra_info(ie_result, {
1265 'extractor': ie.IE_NAME,
1266 'extractor_key': ie.ie_key(),
1267 })
ea38e55f 1268
58adec46 1269 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1270 """
1271 Take the result of the ie(may be modified) and resolve all unresolved
1272 references (URLs, playlist items).
1273
1274 It will also download the videos if 'download'.
1275 Returns the resolved ie_result.
1276 """
58adec46 1277 if extra_info is None:
1278 extra_info = {}
e8ee972c
PH
1279 result_type = ie_result.get('_type', 'video')
1280
057a5206 1281 if result_type in ('url', 'url_transparent'):
134c6ea8 1282 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1283 if ie_result.get('original_url'):
1284 extra_info.setdefault('original_url', ie_result['original_url'])
1285
057a5206 1286 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1287 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1288 or extract_flat is True):
ecb54191 1289 info_copy = ie_result.copy()
1290 self.add_extra_info(info_copy, extra_info)
6033d980 1291 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1292 self.add_default_extra_info(info_copy, ie, ie_result['url'])
ecb54191 1293 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1294 return ie_result
1295
8222d8de 1296 if result_type == 'video':
b6c45014 1297 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1298 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1299 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1300 if additional_urls:
e9f4ccd1 1301 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1302 if isinstance(additional_urls, compat_str):
1303 additional_urls = [additional_urls]
1304 self.to_screen(
1305 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1306 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1307 ie_result['additional_entries'] = [
1308 self.extract_info(
1309 url, download, extra_info,
1310 force_generic_extractor=self.params.get('force_generic_extractor'))
1311 for url in additional_urls
1312 ]
1313 return ie_result
8222d8de
JMF
1314 elif result_type == 'url':
1315 # We have to add extra_info to the results because it may be
1316 # contained in a playlist
07cce701 1317 return self.extract_info(
1318 ie_result['url'], download,
1319 ie_key=ie_result.get('ie_key'),
1320 extra_info=extra_info)
7fc3fa05
PH
1321 elif result_type == 'url_transparent':
1322 # Use the information from the embedding page
1323 info = self.extract_info(
1324 ie_result['url'], ie_key=ie_result.get('ie_key'),
1325 extra_info=extra_info, download=False, process=False)
1326
1640eb09
S
1327 # extract_info may return None when ignoreerrors is enabled and
1328 # extraction failed with an error, don't crash and return early
1329 # in this case
1330 if not info:
1331 return info
1332
412c617d
PH
1333 force_properties = dict(
1334 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1335 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1336 if f in force_properties:
1337 del force_properties[f]
1338 new_result = info.copy()
1339 new_result.update(force_properties)
7fc3fa05 1340
0563f7ac
S
1341 # Extracted info may not be a video result (i.e.
1342 # info.get('_type', 'video') != video) but rather an url or
1343 # url_transparent. In such cases outer metadata (from ie_result)
1344 # should be propagated to inner one (info). For this to happen
1345 # _type of info should be overridden with url_transparent. This
067aa17e 1346 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1347 if new_result.get('_type') == 'url':
1348 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1349
1350 return self.process_ie_result(
1351 new_result, download=download, extra_info=extra_info)
40fcba5e 1352 elif result_type in ('playlist', 'multi_video'):
30a074c2 1353 # Protect from infinite recursion due to recursively nested playlists
1354 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1355 webpage_url = ie_result['webpage_url']
1356 if webpage_url in self._playlist_urls:
7e85e872 1357 self.to_screen(
30a074c2 1358 '[download] Skipping already downloaded playlist: %s'
1359 % ie_result.get('title') or ie_result.get('id'))
1360 return
7e85e872 1361
30a074c2 1362 self._playlist_level += 1
1363 self._playlist_urls.add(webpage_url)
bc516a3f 1364 self._sanitize_thumbnails(ie_result)
30a074c2 1365 try:
1366 return self.__process_playlist(ie_result, download)
1367 finally:
1368 self._playlist_level -= 1
1369 if not self._playlist_level:
1370 self._playlist_urls.clear()
8222d8de 1371 elif result_type == 'compat_list':
c9bf4114
PH
1372 self.report_warning(
1373 'Extractor %s returned a compat_list result. '
1374 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1375
8222d8de 1376 def _fixup(r):
b868936c 1377 self.add_extra_info(r, {
1378 'extractor': ie_result['extractor'],
1379 'webpage_url': ie_result['webpage_url'],
1380 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1381 'extractor_key': ie_result['extractor_key'],
1382 })
8222d8de
JMF
1383 return r
1384 ie_result['entries'] = [
b6c45014 1385 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1386 for r in ie_result['entries']
1387 ]
1388 return ie_result
1389 else:
1390 raise Exception('Invalid result type: %s' % result_type)
1391
e92caff5 1392 def _ensure_dir_exists(self, path):
1393 return make_dir(path, self.report_error)
1394
30a074c2 1395 def __process_playlist(self, ie_result, download):
1396 # We process each entry in the playlist
1397 playlist = ie_result.get('title') or ie_result.get('id')
1398 self.to_screen('[download] Downloading playlist: %s' % playlist)
1399
498f5606 1400 if 'entries' not in ie_result:
1401 raise EntryNotInPlaylist()
1402 incomplete_entries = bool(ie_result.get('requested_entries'))
1403 if incomplete_entries:
1404 def fill_missing_entries(entries, indexes):
1405 ret = [None] * max(*indexes)
1406 for i, entry in zip(indexes, entries):
1407 ret[i - 1] = entry
1408 return ret
1409 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1410
30a074c2 1411 playlist_results = []
1412
56a8fb4f 1413 playliststart = self.params.get('playliststart', 1)
30a074c2 1414 playlistend = self.params.get('playlistend')
1415 # For backwards compatibility, interpret -1 as whole list
1416 if playlistend == -1:
1417 playlistend = None
1418
1419 playlistitems_str = self.params.get('playlist_items')
1420 playlistitems = None
1421 if playlistitems_str is not None:
1422 def iter_playlistitems(format):
1423 for string_segment in format.split(','):
1424 if '-' in string_segment:
1425 start, end = string_segment.split('-')
1426 for item in range(int(start), int(end) + 1):
1427 yield int(item)
1428 else:
1429 yield int(string_segment)
1430 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1431
1432 ie_entries = ie_result['entries']
56a8fb4f 1433 msg = (
1434 'Downloading %d videos' if not isinstance(ie_entries, list)
1435 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1436 if not isinstance(ie_entries, (list, PagedList)):
1437 ie_entries = LazyList(ie_entries)
1438
50fed816 1439 def get_entry(i):
1440 return YoutubeDL.__handle_extraction_exceptions(
cc9d1493 1441 lambda self, i: ie_entries[i - 1],
1442 False
50fed816 1443 )(self, i)
1444
56a8fb4f 1445 entries = []
1446 for i in playlistitems or itertools.count(playliststart):
1447 if playlistitems is None and playlistend is not None and playlistend < i:
1448 break
1449 entry = None
1450 try:
50fed816 1451 entry = get_entry(i)
56a8fb4f 1452 if entry is None:
498f5606 1453 raise EntryNotInPlaylist()
56a8fb4f 1454 except (IndexError, EntryNotInPlaylist):
1455 if incomplete_entries:
1456 raise EntryNotInPlaylist()
1457 elif not playlistitems:
1458 break
1459 entries.append(entry)
120fe513 1460 try:
1461 if entry is not None:
1462 self._match_entry(entry, incomplete=True, silent=True)
1463 except (ExistingVideoReached, RejectedVideoReached):
1464 break
56a8fb4f 1465 ie_result['entries'] = entries
30a074c2 1466
56a8fb4f 1467 # Save playlist_index before re-ordering
1468 entries = [
9e598870 1469 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1470 for i, entry in enumerate(entries, 1)
1471 if entry is not None]
1472 n_entries = len(entries)
498f5606 1473
498f5606 1474 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1475 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1476 ie_result['requested_entries'] = playlistitems
1477
1478 if self.params.get('allow_playlist_files', True):
1479 ie_copy = {
1480 'playlist': playlist,
1481 'playlist_id': ie_result.get('id'),
1482 'playlist_title': ie_result.get('title'),
1483 'playlist_uploader': ie_result.get('uploader'),
1484 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1485 'playlist_index': 0,
498f5606 1486 }
1487 ie_copy.update(dict(ie_result))
1488
1489 if self.params.get('writeinfojson', False):
1490 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1491 if not self._ensure_dir_exists(encodeFilename(infofn)):
1492 return
1493 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1494 self.to_screen('[info] Playlist metadata is already present')
1495 else:
1496 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1497 try:
8012d892 1498 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
498f5606 1499 except (OSError, IOError):
1500 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1501
681de68e 1502 # TODO: This should be passed to ThumbnailsConvertor if necessary
1503 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1504
498f5606 1505 if self.params.get('writedescription', False):
1506 descfn = self.prepare_filename(ie_copy, 'pl_description')
1507 if not self._ensure_dir_exists(encodeFilename(descfn)):
1508 return
1509 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1510 self.to_screen('[info] Playlist description is already present')
1511 elif ie_result.get('description') is None:
1512 self.report_warning('There\'s no playlist description to write.')
1513 else:
1514 try:
1515 self.to_screen('[info] Writing playlist description to: ' + descfn)
1516 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1517 descfile.write(ie_result['description'])
1518 except (OSError, IOError):
1519 self.report_error('Cannot write playlist description file ' + descfn)
1520 return
30a074c2 1521
1522 if self.params.get('playlistreverse', False):
1523 entries = entries[::-1]
30a074c2 1524 if self.params.get('playlistrandom', False):
1525 random.shuffle(entries)
1526
1527 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1528
56a8fb4f 1529 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1530 failures = 0
1531 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1532 for i, entry_tuple in enumerate(entries, 1):
1533 playlist_index, entry = entry_tuple
81139999 1534 if 'playlist-index' in self.params.get('compat_opts', []):
1535 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1536 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1537 # This __x_forwarded_for_ip thing is a bit ugly but requires
1538 # minimal changes
1539 if x_forwarded_for:
1540 entry['__x_forwarded_for_ip'] = x_forwarded_for
1541 extra = {
1542 'n_entries': n_entries,
f59ae581 1543 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1544 'playlist_index': playlist_index,
1545 'playlist_autonumber': i,
30a074c2 1546 'playlist': playlist,
1547 'playlist_id': ie_result.get('id'),
1548 'playlist_title': ie_result.get('title'),
1549 'playlist_uploader': ie_result.get('uploader'),
1550 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1551 'extractor': ie_result['extractor'],
1552 'webpage_url': ie_result['webpage_url'],
1553 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1554 'extractor_key': ie_result['extractor_key'],
1555 }
1556
1557 if self._match_entry(entry, incomplete=True) is not None:
1558 continue
1559
1560 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1561 if not entry_result:
1562 failures += 1
1563 if failures >= max_failures:
1564 self.report_error(
1565 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1566 break
30a074c2 1567 # TODO: skip failed (empty) entries?
1568 playlist_results.append(entry_result)
1569 ie_result['entries'] = playlist_results
1570 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1571 return ie_result
1572
a0566bbf 1573 @__handle_extraction_exceptions
1574 def __process_iterable_entry(self, entry, download, extra_info):
1575 return self.process_ie_result(
1576 entry, download=download, extra_info=extra_info)
1577
67134eab
JMF
1578 def _build_format_filter(self, filter_spec):
1579 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1580
1581 OPERATORS = {
1582 '<': operator.lt,
1583 '<=': operator.le,
1584 '>': operator.gt,
1585 '>=': operator.ge,
1586 '=': operator.eq,
1587 '!=': operator.ne,
1588 }
67134eab 1589 operator_rex = re.compile(r'''(?x)\s*
187986a8 1590 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1591 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1592 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1593 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1594 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1595 if m:
1596 try:
1597 comparison_value = int(m.group('value'))
1598 except ValueError:
1599 comparison_value = parse_filesize(m.group('value'))
1600 if comparison_value is None:
1601 comparison_value = parse_filesize(m.group('value') + 'B')
1602 if comparison_value is None:
1603 raise ValueError(
1604 'Invalid value %r in format specification %r' % (
67134eab 1605 m.group('value'), filter_spec))
9ddb6925
S
1606 op = OPERATORS[m.group('op')]
1607
083c9df9 1608 if not m:
9ddb6925
S
1609 STR_OPERATORS = {
1610 '=': operator.eq,
10d33b34
YCH
1611 '^=': lambda attr, value: attr.startswith(value),
1612 '$=': lambda attr, value: attr.endswith(value),
1613 '*=': lambda attr, value: value in attr,
9ddb6925 1614 }
187986a8 1615 str_operator_rex = re.compile(r'''(?x)\s*
1616 (?P<key>[a-zA-Z0-9._-]+)\s*
1617 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1618 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1619 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1620 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1621 if m:
1622 comparison_value = m.group('value')
2cc779f4
S
1623 str_op = STR_OPERATORS[m.group('op')]
1624 if m.group('negation'):
e118a879 1625 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1626 else:
1627 op = str_op
083c9df9 1628
9ddb6925 1629 if not m:
187986a8 1630 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1631
1632 def _filter(f):
1633 actual_value = f.get(m.group('key'))
1634 if actual_value is None:
1635 return m.group('none_inclusive')
1636 return op(actual_value, comparison_value)
67134eab
JMF
1637 return _filter
1638
0017d9ad 1639 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1640
af0f7428
S
1641 def can_merge():
1642 merger = FFmpegMergerPP(self)
1643 return merger.available and merger.can_merge()
1644
91ebc640 1645 prefer_best = (
b7b04c78 1646 not self.params.get('simulate')
91ebc640 1647 and download
1648 and (
1649 not can_merge()
19807826 1650 or info_dict.get('is_live', False)
de6000d9 1651 or self.outtmpl_dict['default'] == '-'))
53ed7066 1652 compat = (
1653 prefer_best
1654 or self.params.get('allow_multiple_audio_streams', False)
1655 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1656
1657 return (
53ed7066 1658 'best/bestvideo+bestaudio' if prefer_best
1659 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1660 else 'bestvideo+bestaudio/best')
0017d9ad 1661
67134eab
JMF
1662 def build_format_selector(self, format_spec):
1663 def syntax_error(note, start):
1664 message = (
1665 'Invalid format specification: '
1666 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1667 return SyntaxError(message)
1668
1669 PICKFIRST = 'PICKFIRST'
1670 MERGE = 'MERGE'
1671 SINGLE = 'SINGLE'
0130afb7 1672 GROUP = 'GROUP'
67134eab
JMF
1673 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1674
91ebc640 1675 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1676 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1677
e8e73840 1678 check_formats = self.params.get('check_formats')
1679
67134eab
JMF
1680 def _parse_filter(tokens):
1681 filter_parts = []
1682 for type, string, start, _, _ in tokens:
1683 if type == tokenize.OP and string == ']':
1684 return ''.join(filter_parts)
1685 else:
1686 filter_parts.append(string)
1687
232541df 1688 def _remove_unused_ops(tokens):
17cc1534 1689 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1690 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1691 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1692 last_string, last_start, last_end, last_line = None, None, None, None
1693 for type, string, start, end, line in tokens:
1694 if type == tokenize.OP and string == '[':
1695 if last_string:
1696 yield tokenize.NAME, last_string, last_start, last_end, last_line
1697 last_string = None
1698 yield type, string, start, end, line
1699 # everything inside brackets will be handled by _parse_filter
1700 for type, string, start, end, line in tokens:
1701 yield type, string, start, end, line
1702 if type == tokenize.OP and string == ']':
1703 break
1704 elif type == tokenize.OP and string in ALLOWED_OPS:
1705 if last_string:
1706 yield tokenize.NAME, last_string, last_start, last_end, last_line
1707 last_string = None
1708 yield type, string, start, end, line
1709 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1710 if not last_string:
1711 last_string = string
1712 last_start = start
1713 last_end = end
1714 else:
1715 last_string += string
1716 if last_string:
1717 yield tokenize.NAME, last_string, last_start, last_end, last_line
1718
cf2ac6df 1719 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1720 selectors = []
1721 current_selector = None
1722 for type, string, start, _, _ in tokens:
1723 # ENCODING is only defined in python 3.x
1724 if type == getattr(tokenize, 'ENCODING', None):
1725 continue
1726 elif type in [tokenize.NAME, tokenize.NUMBER]:
1727 current_selector = FormatSelector(SINGLE, string, [])
1728 elif type == tokenize.OP:
cf2ac6df
JMF
1729 if string == ')':
1730 if not inside_group:
1731 # ')' will be handled by the parentheses group
1732 tokens.restore_last_token()
67134eab 1733 break
cf2ac6df 1734 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1735 tokens.restore_last_token()
1736 break
cf2ac6df
JMF
1737 elif inside_choice and string == ',':
1738 tokens.restore_last_token()
1739 break
1740 elif string == ',':
0a31a350
JMF
1741 if not current_selector:
1742 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1743 selectors.append(current_selector)
1744 current_selector = None
1745 elif string == '/':
d96d604e
JMF
1746 if not current_selector:
1747 raise syntax_error('"/" must follow a format selector', start)
67134eab 1748 first_choice = current_selector
cf2ac6df 1749 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1750 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1751 elif string == '[':
1752 if not current_selector:
1753 current_selector = FormatSelector(SINGLE, 'best', [])
1754 format_filter = _parse_filter(tokens)
1755 current_selector.filters.append(format_filter)
0130afb7
JMF
1756 elif string == '(':
1757 if current_selector:
1758 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1759 group = _parse_format_selection(tokens, inside_group=True)
1760 current_selector = FormatSelector(GROUP, group, [])
67134eab 1761 elif string == '+':
d03cfdce 1762 if not current_selector:
1763 raise syntax_error('Unexpected "+"', start)
1764 selector_1 = current_selector
1765 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1766 if not selector_2:
1767 raise syntax_error('Expected a selector', start)
1768 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1769 else:
1770 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1771 elif type == tokenize.ENDMARKER:
1772 break
1773 if current_selector:
1774 selectors.append(current_selector)
1775 return selectors
1776
f8d4ad9a 1777 def _merge(formats_pair):
1778 format_1, format_2 = formats_pair
1779
1780 formats_info = []
1781 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1782 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1783
1784 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1785 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1786 for (i, fmt_info) in enumerate(formats_info):
551f9388 1787 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1788 formats_info.pop(i)
1789 continue
1790 for aud_vid in ['audio', 'video']:
f8d4ad9a 1791 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1792 if get_no_more[aud_vid]:
1793 formats_info.pop(i)
f5510afe 1794 break
f8d4ad9a 1795 get_no_more[aud_vid] = True
1796
1797 if len(formats_info) == 1:
1798 return formats_info[0]
1799
1800 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1801 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1802
1803 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1804 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1805
1806 output_ext = self.params.get('merge_output_format')
1807 if not output_ext:
1808 if the_only_video:
1809 output_ext = the_only_video['ext']
1810 elif the_only_audio and not video_fmts:
1811 output_ext = the_only_audio['ext']
1812 else:
1813 output_ext = 'mkv'
1814
1815 new_dict = {
1816 'requested_formats': formats_info,
1817 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1818 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1819 'ext': output_ext,
1820 }
1821
1822 if the_only_video:
1823 new_dict.update({
1824 'width': the_only_video.get('width'),
1825 'height': the_only_video.get('height'),
1826 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1827 'fps': the_only_video.get('fps'),
1828 'vcodec': the_only_video.get('vcodec'),
1829 'vbr': the_only_video.get('vbr'),
1830 'stretched_ratio': the_only_video.get('stretched_ratio'),
1831 })
1832
1833 if the_only_audio:
1834 new_dict.update({
1835 'acodec': the_only_audio.get('acodec'),
1836 'abr': the_only_audio.get('abr'),
1837 })
1838
1839 return new_dict
1840
e8e73840 1841 def _check_formats(formats):
981052c9 1842 if not check_formats:
1843 yield from formats
b5ac45b1 1844 return
e8e73840 1845 for f in formats:
1846 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1847 temp_file = tempfile.NamedTemporaryFile(
1848 suffix='.tmp', delete=False,
1849 dir=self.get_output_path('temp') or None)
1850 temp_file.close()
fe346461 1851 try:
981052c9 1852 success, _ = self.dl(temp_file.name, f, test=True)
1853 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1854 success = False
fe346461 1855 finally:
21cd8fae 1856 if os.path.exists(temp_file.name):
1857 try:
1858 os.remove(temp_file.name)
1859 except OSError:
1860 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1861 if success:
e8e73840 1862 yield f
1863 else:
1864 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1865
67134eab 1866 def _build_selector_function(selector):
909d24dd 1867 if isinstance(selector, list): # ,
67134eab
JMF
1868 fs = [_build_selector_function(s) for s in selector]
1869
317f7ab6 1870 def selector_function(ctx):
67134eab 1871 for f in fs:
981052c9 1872 yield from f(ctx)
67134eab 1873 return selector_function
909d24dd 1874
1875 elif selector.type == GROUP: # ()
0130afb7 1876 selector_function = _build_selector_function(selector.selector)
909d24dd 1877
1878 elif selector.type == PICKFIRST: # /
67134eab
JMF
1879 fs = [_build_selector_function(s) for s in selector.selector]
1880
317f7ab6 1881 def selector_function(ctx):
67134eab 1882 for f in fs:
317f7ab6 1883 picked_formats = list(f(ctx))
67134eab
JMF
1884 if picked_formats:
1885 return picked_formats
1886 return []
67134eab 1887
981052c9 1888 elif selector.type == MERGE: # +
1889 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1890
1891 def selector_function(ctx):
1892 for pair in itertools.product(
1893 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1894 yield _merge(pair)
1895
909d24dd 1896 elif selector.type == SINGLE: # atom
598d185d 1897 format_spec = selector.selector or 'best'
909d24dd 1898
f8d4ad9a 1899 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1900 if format_spec == 'all':
1901 def selector_function(ctx):
981052c9 1902 yield from _check_formats(ctx['formats'])
f8d4ad9a 1903 elif format_spec == 'mergeall':
1904 def selector_function(ctx):
981052c9 1905 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1906 if not formats:
1907 return
921b76ca 1908 merged_format = formats[-1]
1909 for f in formats[-2::-1]:
f8d4ad9a 1910 merged_format = _merge((merged_format, f))
1911 yield merged_format
909d24dd 1912
1913 else:
e8e73840 1914 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1915 mobj = re.match(
1916 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1917 format_spec)
1918 if mobj is not None:
1919 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1920 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1921 format_type = (mobj.group('type') or [None])[0]
1922 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1923 format_modified = mobj.group('mod') is not None
909d24dd 1924
1925 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1926 _filter_f = (
eff63539 1927 (lambda f: f.get('%scodec' % format_type) != 'none')
1928 if format_type and format_modified # bv*, ba*, wv*, wa*
1929 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1930 if format_type # bv, ba, wv, wa
1931 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1932 if not format_modified # b, w
8326b00a 1933 else lambda f: True) # b*, w*
1934 filter_f = lambda f: _filter_f(f) and (
1935 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1936 else:
909d24dd 1937 filter_f = ((lambda f: f.get('ext') == format_spec)
1938 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1939 else (lambda f: f.get('format_id') == format_spec)) # id
1940
1941 def selector_function(ctx):
1942 formats = list(ctx['formats'])
909d24dd 1943 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1944 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1945 # for extractors with incomplete formats (audio only (soundcloud)
1946 # or video only (imgur)) best/worst will fallback to
1947 # best/worst {video,audio}-only format
e8e73840 1948 matches = formats
981052c9 1949 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1950 try:
e8e73840 1951 yield matches[format_idx - 1]
981052c9 1952 except IndexError:
1953 return
083c9df9 1954
67134eab 1955 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1956
317f7ab6
S
1957 def final_selector(ctx):
1958 ctx_copy = copy.deepcopy(ctx)
67134eab 1959 for _filter in filters:
317f7ab6
S
1960 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1961 return selector_function(ctx_copy)
67134eab 1962 return final_selector
083c9df9 1963
67134eab 1964 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1965 try:
232541df 1966 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1967 except tokenize.TokenError:
1968 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1969
1970 class TokenIterator(object):
1971 def __init__(self, tokens):
1972 self.tokens = tokens
1973 self.counter = 0
1974
1975 def __iter__(self):
1976 return self
1977
1978 def __next__(self):
1979 if self.counter >= len(self.tokens):
1980 raise StopIteration()
1981 value = self.tokens[self.counter]
1982 self.counter += 1
1983 return value
1984
1985 next = __next__
1986
1987 def restore_last_token(self):
1988 self.counter -= 1
1989
1990 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1991 return _build_selector_function(parsed_selector)
a9c58ad9 1992
e5660ee6
JMF
1993 def _calc_headers(self, info_dict):
1994 res = std_headers.copy()
1995
1996 add_headers = info_dict.get('http_headers')
1997 if add_headers:
1998 res.update(add_headers)
1999
2000 cookies = self._calc_cookies(info_dict)
2001 if cookies:
2002 res['Cookie'] = cookies
2003
0016b84e
S
2004 if 'X-Forwarded-For' not in res:
2005 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2006 if x_forwarded_for_ip:
2007 res['X-Forwarded-For'] = x_forwarded_for_ip
2008
e5660ee6
JMF
2009 return res
2010
2011 def _calc_cookies(self, info_dict):
5c2266df 2012 pr = sanitized_Request(info_dict['url'])
e5660ee6 2013 self.cookiejar.add_cookie_header(pr)
662435f7 2014 return pr.get_header('Cookie')
e5660ee6 2015
b0249bca 2016 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2017 thumbnails = info_dict.get('thumbnails')
2018 if thumbnails is None:
2019 thumbnail = info_dict.get('thumbnail')
2020 if thumbnail:
2021 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2022 if thumbnails:
2023 thumbnails.sort(key=lambda t: (
2024 t.get('preference') if t.get('preference') is not None else -1,
2025 t.get('width') if t.get('width') is not None else -1,
2026 t.get('height') if t.get('height') is not None else -1,
2027 t.get('id') if t.get('id') is not None else '',
2028 t.get('url')))
b0249bca 2029
0ba692ac 2030 def thumbnail_tester():
2031 if self.params.get('check_formats'):
cca80fe6 2032 test_all = True
2033 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2034 else:
cca80fe6 2035 test_all = False
0ba692ac 2036 to_screen = self.write_debug
2037
2038 def test_thumbnail(t):
cca80fe6 2039 if not test_all and not t.get('_test_url'):
2040 return True
0ba692ac 2041 to_screen('Testing thumbnail %s' % t['id'])
2042 try:
2043 self.urlopen(HEADRequest(t['url']))
2044 except network_exceptions as err:
2045 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2046 t['id'], t['url'], error_to_compat_str(err)))
2047 return False
2048 return True
2049
2050 return test_thumbnail
b0249bca 2051
bc516a3f 2052 for i, t in enumerate(thumbnails):
bc516a3f 2053 if t.get('id') is None:
2054 t['id'] = '%d' % i
b0249bca 2055 if t.get('width') and t.get('height'):
2056 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2057 t['url'] = sanitize_url(t['url'])
0ba692ac 2058
2059 if self.params.get('check_formats') is not False:
2060 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2061 else:
2062 info_dict['thumbnails'] = thumbnails
bc516a3f 2063
dd82ffea
JMF
2064 def process_video_result(self, info_dict, download=True):
2065 assert info_dict.get('_type', 'video') == 'video'
2066
bec1fad2
PH
2067 if 'id' not in info_dict:
2068 raise ExtractorError('Missing "id" field in extractor result')
2069 if 'title' not in info_dict:
1151c407 2070 raise ExtractorError('Missing "title" field in extractor result',
2071 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2072
c9969434
S
2073 def report_force_conversion(field, field_not, conversion):
2074 self.report_warning(
2075 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2076 % (field, field_not, conversion))
2077
2078 def sanitize_string_field(info, string_field):
2079 field = info.get(string_field)
2080 if field is None or isinstance(field, compat_str):
2081 return
2082 report_force_conversion(string_field, 'a string', 'string')
2083 info[string_field] = compat_str(field)
2084
2085 def sanitize_numeric_fields(info):
2086 for numeric_field in self._NUMERIC_FIELDS:
2087 field = info.get(numeric_field)
2088 if field is None or isinstance(field, compat_numeric_types):
2089 continue
2090 report_force_conversion(numeric_field, 'numeric', 'int')
2091 info[numeric_field] = int_or_none(field)
2092
2093 sanitize_string_field(info_dict, 'id')
2094 sanitize_numeric_fields(info_dict)
be6217b2 2095
dd82ffea
JMF
2096 if 'playlist' not in info_dict:
2097 # It isn't part of a playlist
2098 info_dict['playlist'] = None
2099 info_dict['playlist_index'] = None
2100
bc516a3f 2101 self._sanitize_thumbnails(info_dict)
d5519808 2102
536a55da 2103 thumbnail = info_dict.get('thumbnail')
bc516a3f 2104 thumbnails = info_dict.get('thumbnails')
536a55da
S
2105 if thumbnail:
2106 info_dict['thumbnail'] = sanitize_url(thumbnail)
2107 elif thumbnails:
d5519808
PH
2108 info_dict['thumbnail'] = thumbnails[-1]['url']
2109
ae30b840 2110 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2111 info_dict['display_id'] = info_dict['id']
2112
10db0d2f 2113 for ts_key, date_key in (
2114 ('timestamp', 'upload_date'),
2115 ('release_timestamp', 'release_date'),
2116 ):
2117 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2118 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2119 # see http://bugs.python.org/issue1646728)
2120 try:
2121 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2122 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2123 except (ValueError, OverflowError, OSError):
2124 pass
9d2ecdbc 2125
ae30b840 2126 live_keys = ('is_live', 'was_live')
2127 live_status = info_dict.get('live_status')
2128 if live_status is None:
2129 for key in live_keys:
2130 if info_dict.get(key) is False:
2131 continue
2132 if info_dict.get(key):
2133 live_status = key
2134 break
2135 if all(info_dict.get(key) is False for key in live_keys):
2136 live_status = 'not_live'
2137 if live_status:
2138 info_dict['live_status'] = live_status
2139 for key in live_keys:
2140 if info_dict.get(key) is None:
2141 info_dict[key] = (live_status == key)
2142
33d2fc2f
S
2143 # Auto generate title fields corresponding to the *_number fields when missing
2144 # in order to always have clean titles. This is very common for TV series.
2145 for field in ('chapter', 'season', 'episode'):
2146 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2147 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2148
05108a49
S
2149 for cc_kind in ('subtitles', 'automatic_captions'):
2150 cc = info_dict.get(cc_kind)
2151 if cc:
2152 for _, subtitle in cc.items():
2153 for subtitle_format in subtitle:
2154 if subtitle_format.get('url'):
2155 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2156 if subtitle_format.get('ext') is None:
2157 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2158
2159 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2160 subtitles = info_dict.get('subtitles')
4bba3716 2161
360e1ca5 2162 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2163 info_dict['id'], subtitles, automatic_captions)
a504ced0 2164
dd82ffea
JMF
2165 # We now pick which formats have to be downloaded
2166 if info_dict.get('formats') is None:
2167 # There's only one format available
2168 formats = [info_dict]
2169 else:
2170 formats = info_dict['formats']
2171
e0493e90 2172 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2173 if not self.params.get('allow_unplayable_formats'):
2174 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2175
db95dc13 2176 if not formats:
1151c407 2177 self.raise_no_formats(info_dict)
db95dc13 2178
73af5cc8
S
2179 def is_wellformed(f):
2180 url = f.get('url')
a5ac0c47 2181 if not url:
73af5cc8
S
2182 self.report_warning(
2183 '"url" field is missing or empty - skipping format, '
2184 'there is an error in extractor')
a5ac0c47
S
2185 return False
2186 if isinstance(url, bytes):
2187 sanitize_string_field(f, 'url')
2188 return True
73af5cc8
S
2189
2190 # Filter out malformed formats for better extraction robustness
2191 formats = list(filter(is_wellformed, formats))
2192
181c7053
S
2193 formats_dict = {}
2194
dd82ffea 2195 # We check that all the formats have the format and format_id fields
db95dc13 2196 for i, format in enumerate(formats):
c9969434
S
2197 sanitize_string_field(format, 'format_id')
2198 sanitize_numeric_fields(format)
dcf77cf1 2199 format['url'] = sanitize_url(format['url'])
e74e3b63 2200 if not format.get('format_id'):
8016c922 2201 format['format_id'] = compat_str(i)
e2effb08
S
2202 else:
2203 # Sanitize format_id from characters used in format selector expression
ec85ded8 2204 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2205 format_id = format['format_id']
2206 if format_id not in formats_dict:
2207 formats_dict[format_id] = []
2208 formats_dict[format_id].append(format)
2209
2210 # Make sure all formats have unique format_id
2211 for format_id, ambiguous_formats in formats_dict.items():
2212 if len(ambiguous_formats) > 1:
2213 for i, format in enumerate(ambiguous_formats):
2214 format['format_id'] = '%s-%d' % (format_id, i)
2215
2216 for i, format in enumerate(formats):
8c51aa65 2217 if format.get('format') is None:
6febd1c1 2218 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2219 id=format['format_id'],
2220 res=self.format_resolution(format),
b868936c 2221 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2222 )
c1002e96 2223 # Automatically determine file extension if missing
5b1d8575 2224 if format.get('ext') is None:
cce929ea 2225 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2226 # Automatically determine protocol if missing (useful for format
2227 # selection purposes)
6f0be937 2228 if format.get('protocol') is None:
b5559424 2229 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2230 # Add HTTP headers, so that external programs can use them from the
2231 # json output
2232 full_format_info = info_dict.copy()
2233 full_format_info.update(format)
2234 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2235 # Remove private housekeeping stuff
2236 if '__x_forwarded_for_ip' in info_dict:
2237 del info_dict['__x_forwarded_for_ip']
dd82ffea 2238
4bcc7bd1 2239 # TODO Central sorting goes here
99e206d5 2240
88acdbc2 2241 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2242 # only set the 'formats' fields if the original info_dict list them
2243 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2244 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2245 # which can't be exported to json
b3d9ef88 2246 info_dict['formats'] = formats
4ec82a72 2247
2248 info_dict, _ = self.pre_process(info_dict)
2249
b7b04c78 2250 if self.params.get('list_thumbnails'):
2251 self.list_thumbnails(info_dict)
2252 if self.params.get('listformats'):
86c66b2d 2253 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2254 self.to_screen('%s has no formats' % info_dict['id'])
2255 else:
2256 self.list_formats(info_dict)
b7b04c78 2257 if self.params.get('listsubtitles'):
2258 if 'automatic_captions' in info_dict:
2259 self.list_subtitles(
2260 info_dict['id'], automatic_captions, 'automatic captions')
2261 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2262 list_only = self.params.get('simulate') is None and (
2263 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2264 if list_only:
b7b04c78 2265 # Without this printing, -F --print-json will not work
169dbde9 2266 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2267 return
2268
187986a8 2269 format_selector = self.format_selector
2270 if format_selector is None:
0017d9ad 2271 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2272 self.write_debug('Default format spec: %s' % req_format)
187986a8 2273 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2274
2275 # While in format selection we may need to have an access to the original
2276 # format set in order to calculate some metrics or do some processing.
2277 # For now we need to be able to guess whether original formats provided
2278 # by extractor are incomplete or not (i.e. whether extractor provides only
2279 # video-only or audio-only formats) for proper formats selection for
2280 # extractors with such incomplete formats (see
067aa17e 2281 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2282 # Since formats may be filtered during format selection and may not match
2283 # the original formats the results may be incorrect. Thus original formats
2284 # or pre-calculated metrics should be passed to format selection routines
2285 # as well.
2286 # We will pass a context object containing all necessary additional data
2287 # instead of just formats.
2288 # This fixes incorrect format selection issue (see
067aa17e 2289 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2290 incomplete_formats = (
317f7ab6 2291 # All formats are video-only or
3089bc74 2292 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2293 # all formats are audio-only
3089bc74 2294 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2295
2296 ctx = {
2297 'formats': formats,
2298 'incomplete_formats': incomplete_formats,
2299 }
2300
2301 formats_to_download = list(format_selector(ctx))
dd82ffea 2302 if not formats_to_download:
b7da73eb 2303 if not self.params.get('ignore_no_formats_error'):
1151c407 2304 raise ExtractorError('Requested format is not available', expected=True,
2305 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2306 else:
2307 self.report_warning('Requested format is not available')
4513a41a
A
2308 # Process what we can, even without any available formats.
2309 self.process_info(dict(info_dict))
b7da73eb 2310 elif download:
2311 self.to_screen(
07cce701 2312 '[info] %s: Downloading %d format(s): %s' % (
2313 info_dict['id'], len(formats_to_download),
2314 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2315 for fmt in formats_to_download:
dd82ffea 2316 new_info = dict(info_dict)
4ec82a72 2317 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2318 new_info['__original_infodict'] = info_dict
b7da73eb 2319 new_info.update(fmt)
dd82ffea
JMF
2320 self.process_info(new_info)
2321 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2322 if formats_to_download:
2323 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2324 return info_dict
2325
98c70d6f 2326 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2327 """Select the requested subtitles and their format"""
98c70d6f
JMF
2328 available_subs = {}
2329 if normal_subtitles and self.params.get('writesubtitles'):
2330 available_subs.update(normal_subtitles)
2331 if automatic_captions and self.params.get('writeautomaticsub'):
2332 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2333 if lang not in available_subs:
2334 available_subs[lang] = cap_info
2335
4d171848
JMF
2336 if (not self.params.get('writesubtitles') and not
2337 self.params.get('writeautomaticsub') or not
2338 available_subs):
2339 return None
a504ced0 2340
c32b0aab 2341 all_sub_langs = available_subs.keys()
a504ced0 2342 if self.params.get('allsubtitles', False):
c32b0aab 2343 requested_langs = all_sub_langs
2344 elif self.params.get('subtitleslangs', False):
2345 requested_langs = set()
2346 for lang in self.params.get('subtitleslangs'):
2347 if lang == 'all':
2348 requested_langs.update(all_sub_langs)
2349 continue
2350 discard = lang[0] == '-'
2351 if discard:
2352 lang = lang[1:]
2353 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2354 if discard:
2355 for lang in current_langs:
2356 requested_langs.discard(lang)
2357 else:
2358 requested_langs.update(current_langs)
2359 elif 'en' in available_subs:
2360 requested_langs = ['en']
a504ced0 2361 else:
c32b0aab 2362 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2363 if requested_langs:
2364 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2365
2366 formats_query = self.params.get('subtitlesformat', 'best')
2367 formats_preference = formats_query.split('/') if formats_query else []
2368 subs = {}
2369 for lang in requested_langs:
2370 formats = available_subs.get(lang)
2371 if formats is None:
2372 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2373 continue
a504ced0
JMF
2374 for ext in formats_preference:
2375 if ext == 'best':
2376 f = formats[-1]
2377 break
2378 matches = list(filter(lambda f: f['ext'] == ext, formats))
2379 if matches:
2380 f = matches[-1]
2381 break
2382 else:
2383 f = formats[-1]
2384 self.report_warning(
2385 'No subtitle format found matching "%s" for language %s, '
2386 'using %s' % (formats_query, lang, f['ext']))
2387 subs[lang] = f
2388 return subs
2389
d06daf23 2390 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2391 def print_mandatory(field, actual_field=None):
2392 if actual_field is None:
2393 actual_field = field
d06daf23 2394 if (self.params.get('force%s' % field, False)
53c18592 2395 and (not incomplete or info_dict.get(actual_field) is not None)):
2396 self.to_stdout(info_dict[actual_field])
d06daf23
S
2397
2398 def print_optional(field):
2399 if (self.params.get('force%s' % field, False)
2400 and info_dict.get(field) is not None):
2401 self.to_stdout(info_dict[field])
2402
53c18592 2403 info_dict = info_dict.copy()
2404 if filename is not None:
2405 info_dict['filename'] = filename
2406 if info_dict.get('requested_formats') is not None:
2407 # For RTMP URLs, also include the playpath
2408 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2409 elif 'url' in info_dict:
2410 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2411
2b8a2973 2412 if self.params.get('forceprint') or self.params.get('forcejson'):
2413 self.post_extract(info_dict)
53c18592 2414 for tmpl in self.params.get('forceprint', []):
2415 if re.match(r'\w+$', tmpl):
2416 tmpl = '%({})s'.format(tmpl)
2417 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
901130bb 2418 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
53c18592 2419
d06daf23
S
2420 print_mandatory('title')
2421 print_mandatory('id')
53c18592 2422 print_mandatory('url', 'urls')
d06daf23
S
2423 print_optional('thumbnail')
2424 print_optional('description')
53c18592 2425 print_optional('filename')
b868936c 2426 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2427 self.to_stdout(formatSeconds(info_dict['duration']))
2428 print_mandatory('format')
53c18592 2429
2b8a2973 2430 if self.params.get('forcejson'):
6e84b215 2431 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2432
e8e73840 2433 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2434 if not info.get('url'):
1151c407 2435 self.raise_no_formats(info, True)
e8e73840 2436
2437 if test:
2438 verbose = self.params.get('verbose')
2439 params = {
2440 'test': True,
2441 'quiet': not verbose,
2442 'verbose': verbose,
2443 'noprogress': not verbose,
2444 'nopart': True,
2445 'skip_unavailable_fragments': False,
2446 'keep_fragments': False,
2447 'overwrites': True,
2448 '_no_ytdl_file': True,
2449 }
2450 else:
2451 params = self.params
96fccc10 2452 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2453 if not test:
2454 for ph in self._progress_hooks:
2455 fd.add_progress_hook(ph)
18e674b4 2456 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2457 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2458 new_info = dict(info)
2459 if new_info.get('http_headers') is None:
2460 new_info['http_headers'] = self._calc_headers(new_info)
2461 return fd.download(name, new_info, subtitle)
2462
8222d8de
JMF
2463 def process_info(self, info_dict):
2464 """Process a single resolved IE result."""
2465
2466 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2467
2468 max_downloads = self.params.get('max_downloads')
2469 if max_downloads is not None:
2470 if self._num_downloads >= int(max_downloads):
2471 raise MaxDownloadsReached()
8222d8de 2472
d06daf23 2473 # TODO: backward compatibility, to be removed
8222d8de 2474 info_dict['fulltitle'] = info_dict['title']
8222d8de 2475
4513a41a 2476 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2477 info_dict['format'] = info_dict['ext']
2478
c77495e3 2479 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2480 return
2481
277d6ff5 2482 self.post_extract(info_dict)
fd288278 2483 self._num_downloads += 1
8222d8de 2484
dcf64d43 2485 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2486 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2487 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2488 files_to_move = {}
8222d8de
JMF
2489
2490 # Forced printings
4513a41a 2491 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2492
b7b04c78 2493 if self.params.get('simulate'):
2d30509f 2494 if self.params.get('force_write_download_archive', False):
2495 self.record_download_archive(info_dict)
2496
2497 # Do nothing else if in simulate mode
8222d8de
JMF
2498 return
2499
de6000d9 2500 if full_filename is None:
8222d8de
JMF
2501 return
2502
e92caff5 2503 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2504 return
e92caff5 2505 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2506 return
2507
2508 if self.params.get('writedescription', False):
de6000d9 2509 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2510 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2511 return
0c3d0f51 2512 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2513 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2514 elif info_dict.get('description') is None:
2515 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2516 else:
2517 try:
6febd1c1 2518 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2519 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2520 descfile.write(info_dict['description'])
7b6fefc9 2521 except (OSError, IOError):
6febd1c1 2522 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2523 return
8222d8de 2524
1fb07d10 2525 if self.params.get('writeannotations', False):
de6000d9 2526 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2527 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2528 return
0c3d0f51 2529 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2530 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2531 elif not info_dict.get('annotations'):
2532 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2533 else:
2534 try:
6febd1c1 2535 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2536 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2537 annofile.write(info_dict['annotations'])
2538 except (KeyError, TypeError):
6febd1c1 2539 self.report_warning('There are no annotations to write.')
7b6fefc9 2540 except (OSError, IOError):
6febd1c1 2541 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2542 return
1fb07d10 2543
c4a91be7 2544 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2545 self.params.get('writeautomaticsub')])
c4a91be7 2546
c84dd8a9 2547 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2548 # subtitles download errors are already managed as troubles in relevant IE
2549 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2550 subtitles = info_dict['requested_subtitles']
fa57af1e 2551 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2552 for sub_lang, sub_info in subtitles.items():
2553 sub_format = sub_info['ext']
56d868db 2554 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2555 sub_filename_final = subtitles_filename(
2556 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2557 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2558 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2559 sub_info['filepath'] = sub_filename
0202b52a 2560 files_to_move[sub_filename] = sub_filename_final
a504ced0 2561 else:
0c9df79e 2562 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2563 if sub_info.get('data') is not None:
2564 try:
2565 # Use newline='' to prevent conversion of newline characters
067aa17e 2566 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2567 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2568 subfile.write(sub_info['data'])
dcf64d43 2569 sub_info['filepath'] = sub_filename
0202b52a 2570 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2571 except (OSError, IOError):
2572 self.report_error('Cannot write subtitles file ' + sub_filename)
2573 return
7b6fefc9 2574 else:
5ff1bc0c 2575 try:
e8e73840 2576 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2577 sub_info['filepath'] = sub_filename
0202b52a 2578 files_to_move[sub_filename] = sub_filename_final
fe346461 2579 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2580 self.report_warning('Unable to download subtitle for "%s": %s' %
2581 (sub_lang, error_to_compat_str(err)))
2582 continue
8222d8de 2583
8222d8de 2584 if self.params.get('writeinfojson', False):
de6000d9 2585 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2586 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2587 return
0c3d0f51 2588 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2589 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2590 else:
66c935fb 2591 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2592 try:
8012d892 2593 write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2594 except (OSError, IOError):
66c935fb 2595 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2596 return
de6000d9 2597 info_dict['__infojson_filename'] = infofn
8222d8de 2598
56d868db 2599 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2600 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2601 thumb_filename = replace_extension(
2602 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2603 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2604
732044af 2605 # Write internet shortcut files
2606 url_link = webloc_link = desktop_link = False
2607 if self.params.get('writelink', False):
2608 if sys.platform == "darwin": # macOS.
2609 webloc_link = True
2610 elif sys.platform.startswith("linux"):
2611 desktop_link = True
2612 else: # if sys.platform in ['win32', 'cygwin']:
2613 url_link = True
2614 if self.params.get('writeurllink', False):
2615 url_link = True
2616 if self.params.get('writewebloclink', False):
2617 webloc_link = True
2618 if self.params.get('writedesktoplink', False):
2619 desktop_link = True
2620
2621 if url_link or webloc_link or desktop_link:
2622 if 'webpage_url' not in info_dict:
2623 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2624 return
2625 ascii_url = iri_to_uri(info_dict['webpage_url'])
2626
2627 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2628 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2629 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2630 self.to_screen('[info] Internet shortcut is already present')
2631 else:
2632 try:
2633 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2634 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2635 template_vars = {'url': ascii_url}
2636 if embed_filename:
2637 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2638 linkfile.write(template % template_vars)
2639 except (OSError, IOError):
2640 self.report_error('Cannot write internet shortcut ' + linkfn)
2641 return False
2642 return True
2643
2644 if url_link:
2645 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2646 return
2647 if webloc_link:
2648 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2649 return
2650 if desktop_link:
2651 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2652 return
2653
56d868db 2654 try:
2655 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2656 except PostProcessingError as err:
2657 self.report_error('Preprocessing: %s' % str(err))
2658 return
2659
732044af 2660 must_record_download_archive = False
56d868db 2661 if self.params.get('skip_download', False):
2662 info_dict['filepath'] = temp_filename
2663 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2664 info_dict['__files_to_move'] = files_to_move
2665 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2666 else:
2667 # Download
b868936c 2668 info_dict.setdefault('__postprocessors', [])
4340deca 2669 try:
0202b52a 2670
6b591b29 2671 def existing_file(*filepaths):
2672 ext = info_dict.get('ext')
2673 final_ext = self.params.get('final_ext', ext)
2674 existing_files = []
2675 for file in orderedSet(filepaths):
2676 if final_ext != ext:
2677 converted = replace_extension(file, final_ext, ext)
2678 if os.path.exists(encodeFilename(converted)):
2679 existing_files.append(converted)
2680 if os.path.exists(encodeFilename(file)):
2681 existing_files.append(file)
2682
2683 if not existing_files or self.params.get('overwrites', False):
2684 for file in orderedSet(existing_files):
2685 self.report_file_delete(file)
2686 os.remove(encodeFilename(file))
2687 return None
2688
6b591b29 2689 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2690 return existing_files[0]
0202b52a 2691
2692 success = True
4340deca 2693 if info_dict.get('requested_formats') is not None:
81cd954a
S
2694
2695 def compatible_formats(formats):
d03cfdce 2696 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2697 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2698 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2699 if len(video_formats) > 2 or len(audio_formats) > 2:
2700 return False
2701
81cd954a 2702 # Check extension
d03cfdce 2703 exts = set(format.get('ext') for format in formats)
2704 COMPATIBLE_EXTS = (
2705 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2706 set(('webm',)),
2707 )
2708 for ext_sets in COMPATIBLE_EXTS:
2709 if ext_sets.issuperset(exts):
2710 return True
81cd954a
S
2711 # TODO: Check acodec/vcodec
2712 return False
2713
2714 requested_formats = info_dict['requested_formats']
0202b52a 2715 old_ext = info_dict['ext']
3b297919 2716 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2717 info_dict['ext'] = 'mkv'
2718 self.report_warning(
2719 'Requested formats are incompatible for merge and will be merged into mkv.')
124bc071 2720 new_ext = info_dict['ext']
0202b52a 2721
124bc071 2722 def correct_ext(filename, ext=new_ext):
96fccc10 2723 if filename == '-':
2724 return filename
0202b52a 2725 filename_real_ext = os.path.splitext(filename)[1][1:]
2726 filename_wo_ext = (
2727 os.path.splitext(filename)[0]
124bc071 2728 if filename_real_ext in (old_ext, new_ext)
0202b52a 2729 else filename)
124bc071 2730 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2731
38c6902b 2732 # Ensure filename always has a correct extension for successful merge
0202b52a 2733 full_filename = correct_ext(full_filename)
2734 temp_filename = correct_ext(temp_filename)
2735 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2736 info_dict['__real_download'] = False
18e674b4 2737
2738 _protocols = set(determine_protocol(f) for f in requested_formats)
dbf5416a 2739 if len(_protocols) == 1: # All requested formats have same protocol
18e674b4 2740 info_dict['protocol'] = _protocols.pop()
dbf5416a 2741 directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2742 if dl_filename is not None:
6c7274ec 2743 self.report_file_already_downloaded(dl_filename)
96fccc10 2744 elif (directly_mergable and get_suitable_downloader(
a46a815b 2745 info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
dbf5416a 2746 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2747 success, real_download = self.dl(temp_filename, info_dict)
2748 info_dict['__real_download'] = real_download
18e674b4 2749 else:
2750 downloaded = []
2751 merger = FFmpegMergerPP(self)
2752 if self.params.get('allow_unplayable_formats'):
2753 self.report_warning(
2754 'You have requested merging of multiple formats '
2755 'while also allowing unplayable formats to be downloaded. '
2756 'The formats won\'t be merged to prevent data corruption.')
2757 elif not merger.available:
2758 self.report_warning(
2759 'You have requested merging of multiple formats but ffmpeg is not installed. '
2760 'The formats won\'t be merged.')
2761
96fccc10 2762 if temp_filename == '-':
2763 reason = ('using a downloader other than ffmpeg' if directly_mergable
2764 else 'but the formats are incompatible for simultaneous download' if merger.available
2765 else 'but ffmpeg is not installed')
2766 self.report_warning(
2767 f'You have requested downloading multiple formats to stdout {reason}. '
2768 'The formats will be streamed one after the other')
2769 fname = temp_filename
dbf5416a 2770 for f in requested_formats:
2771 new_info = dict(info_dict)
2772 del new_info['requested_formats']
2773 new_info.update(f)
96fccc10 2774 if temp_filename != '-':
124bc071 2775 fname = prepend_extension(
2776 correct_ext(temp_filename, new_info['ext']),
2777 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2778 if not self._ensure_dir_exists(fname):
2779 return
2780 downloaded.append(fname)
dbf5416a 2781 partial_success, real_download = self.dl(fname, new_info)
2782 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2783 success = success and partial_success
2784 if merger.available and not self.params.get('allow_unplayable_formats'):
2785 info_dict['__postprocessors'].append(merger)
2786 info_dict['__files_to_merge'] = downloaded
2787 # Even if there were no downloads, it is being merged only now
2788 info_dict['__real_download'] = True
2789 else:
2790 for file in downloaded:
2791 files_to_move[file] = None
4340deca
P
2792 else:
2793 # Just a single file
0202b52a 2794 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2795 if dl_filename is None or dl_filename == temp_filename:
2796 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2797 # So we should try to resume the download
e8e73840 2798 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2799 info_dict['__real_download'] = real_download
6c7274ec 2800 else:
2801 self.report_file_already_downloaded(dl_filename)
0202b52a 2802
0202b52a 2803 dl_filename = dl_filename or temp_filename
c571435f 2804 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2805
3158150c 2806 except network_exceptions as err:
7960b056 2807 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2808 return
2809 except (OSError, IOError) as err:
2810 raise UnavailableVideoError(err)
2811 except (ContentTooShortError, ) as err:
2812 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2813 return
8222d8de 2814
de6000d9 2815 if success and full_filename != '-':
f17f8651 2816
fd7cfb64 2817 def fixup():
2818 do_fixup = True
2819 fixup_policy = self.params.get('fixup')
2820 vid = info_dict['id']
2821
2822 if fixup_policy in ('ignore', 'never'):
2823 return
2824 elif fixup_policy == 'warn':
2825 do_fixup = False
f89b3e2d 2826 elif fixup_policy != 'force':
2827 assert fixup_policy in ('detect_or_warn', None)
2828 if not info_dict.get('__real_download'):
2829 do_fixup = False
fd7cfb64 2830
2831 def ffmpeg_fixup(cndn, msg, cls):
2832 if not cndn:
2833 return
2834 if not do_fixup:
2835 self.report_warning(f'{vid}: {msg}')
2836 return
2837 pp = cls(self)
2838 if pp.available:
2839 info_dict['__postprocessors'].append(pp)
2840 else:
2841 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2842
2843 stretched_ratio = info_dict.get('stretched_ratio')
2844 ffmpeg_fixup(
2845 stretched_ratio not in (1, None),
2846 f'Non-uniform pixel ratio {stretched_ratio}',
2847 FFmpegFixupStretchedPP)
2848
2849 ffmpeg_fixup(
2850 (info_dict.get('requested_formats') is None
2851 and info_dict.get('container') == 'm4a_dash'
2852 and info_dict.get('ext') == 'm4a'),
2853 'writing DASH m4a. Only some players support this container',
2854 FFmpegFixupM4aPP)
2855
2856 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2857 if 'protocol' in info_dict else None)
2858 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2859 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2860 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2861
2862 fixup()
8222d8de 2863 try:
23c1a667 2864 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2865 except PostProcessingError as err:
2866 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2867 return
ab8e5e51
AM
2868 try:
2869 for ph in self._post_hooks:
23c1a667 2870 ph(info_dict['filepath'])
ab8e5e51
AM
2871 except Exception as err:
2872 self.report_error('post hooks: %s' % str(err))
2873 return
2d30509f 2874 must_record_download_archive = True
2875
2876 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2877 self.record_download_archive(info_dict)
c3e6ffba 2878 max_downloads = self.params.get('max_downloads')
2879 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2880 raise MaxDownloadsReached()
8222d8de
JMF
2881
2882 def download(self, url_list):
2883 """Download a given list of URLs."""
de6000d9 2884 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2885 if (len(url_list) > 1
2886 and outtmpl != '-'
2887 and '%' not in outtmpl
2888 and self.params.get('max_downloads') != 1):
acd69589 2889 raise SameFileError(outtmpl)
8222d8de
JMF
2890
2891 for url in url_list:
2892 try:
5f6a1245 2893 # It also downloads the videos
61aa5ba3
S
2894 res = self.extract_info(
2895 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2896 except UnavailableVideoError:
6febd1c1 2897 self.report_error('unable to download video')
8222d8de 2898 except MaxDownloadsReached:
8f18aca8 2899 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2900 raise
2901 except ExistingVideoReached:
8f18aca8 2902 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2903 raise
2904 except RejectedVideoReached:
8f18aca8 2905 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2906 raise
63e0be34
PH
2907 else:
2908 if self.params.get('dump_single_json', False):
277d6ff5 2909 self.post_extract(res)
6e84b215 2910 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2911
2912 return self._download_retcode
2913
1dcc4c0c 2914 def download_with_info_file(self, info_filename):
31bd3925
JMF
2915 with contextlib.closing(fileinput.FileInput(
2916 [info_filename], mode='r',
2917 openhook=fileinput.hook_encoded('utf-8'))) as f:
2918 # FileInput doesn't have a read method, we can't call json.load
8012d892 2919 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2920 try:
2921 self.process_ie_result(info, download=True)
d3f62c19 2922 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2923 webpage_url = info.get('webpage_url')
2924 if webpage_url is not None:
6febd1c1 2925 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2926 return self.download([webpage_url])
2927 else:
2928 raise
2929 return self._download_retcode
1dcc4c0c 2930
cb202fd2 2931 @staticmethod
8012d892 2932 def sanitize_info(info_dict, remove_private_keys=False):
2933 ''' Sanitize the infodict for converting to json '''
3ad56b42 2934 if info_dict is None:
2935 return info_dict
6e84b215 2936 info_dict.setdefault('epoch', int(time.time()))
2937 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2938 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2939 if remove_private_keys:
6e84b215 2940 remove_keys |= {
2941 'requested_formats', 'requested_subtitles', 'requested_entries',
2942 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2943 }
ae8f99e6 2944 empty_values = (None, {}, [], set(), tuple())
2945 reject = lambda k, v: k not in keep_keys and (
2946 k.startswith('_') or k in remove_keys or v in empty_values)
2947 else:
ae8f99e6 2948 reject = lambda k, v: k in remove_keys
5226731e 2949 filter_fn = lambda obj: (
b0249bca 2950 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2951 else obj if not isinstance(obj, dict)
ae8f99e6 2952 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2953 return filter_fn(info_dict)
cb202fd2 2954
8012d892 2955 @staticmethod
2956 def filter_requested_info(info_dict, actually_filter=True):
2957 ''' Alias of sanitize_info for backward compatibility '''
2958 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2959
dcf64d43 2960 def run_pp(self, pp, infodict):
5bfa4862 2961 files_to_delete = []
dcf64d43 2962 if '__files_to_move' not in infodict:
2963 infodict['__files_to_move'] = {}
af819c21 2964 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2965 if not files_to_delete:
dcf64d43 2966 return infodict
5bfa4862 2967
2968 if self.params.get('keepvideo', False):
2969 for f in files_to_delete:
dcf64d43 2970 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2971 else:
2972 for old_filename in set(files_to_delete):
2973 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2974 try:
2975 os.remove(encodeFilename(old_filename))
2976 except (IOError, OSError):
2977 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2978 if old_filename in infodict['__files_to_move']:
2979 del infodict['__files_to_move'][old_filename]
2980 return infodict
5bfa4862 2981
277d6ff5 2982 @staticmethod
2983 def post_extract(info_dict):
2984 def actual_post_extract(info_dict):
2985 if info_dict.get('_type') in ('playlist', 'multi_video'):
2986 for video_dict in info_dict.get('entries', {}):
b050d210 2987 actual_post_extract(video_dict or {})
277d6ff5 2988 return
2989
07cce701 2990 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2991 extra = post_extractor().items()
2992 info_dict.update(extra)
07cce701 2993 info_dict.pop('__post_extractor', None)
277d6ff5 2994
4ec82a72 2995 original_infodict = info_dict.get('__original_infodict') or {}
2996 original_infodict.update(extra)
2997 original_infodict.pop('__post_extractor', None)
2998
b050d210 2999 actual_post_extract(info_dict or {})
277d6ff5 3000
56d868db 3001 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3002 info = dict(ie_info)
56d868db 3003 info['__files_to_move'] = files_to_move or {}
3004 for pp in self._pps[key]:
dcf64d43 3005 info = self.run_pp(pp, info)
56d868db 3006 return info, info.pop('__files_to_move', None)
5bfa4862 3007
dcf64d43 3008 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3009 """Run all the postprocessors on the given file."""
3010 info = dict(ie_info)
3011 info['filepath'] = filename
dcf64d43 3012 info['__files_to_move'] = files_to_move or {}
0202b52a 3013
56d868db 3014 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3015 info = self.run_pp(pp, info)
3016 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3017 del info['__files_to_move']
56d868db 3018 for pp in self._pps['after_move']:
dcf64d43 3019 info = self.run_pp(pp, info)
23c1a667 3020 return info
c1c9a79c 3021
5db07df6 3022 def _make_archive_id(self, info_dict):
e9fef7ee
S
3023 video_id = info_dict.get('id')
3024 if not video_id:
3025 return
5db07df6
PH
3026 # Future-proof against any change in case
3027 # and backwards compatibility with prior versions
e9fef7ee 3028 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3029 if extractor is None:
1211bb6d
S
3030 url = str_or_none(info_dict.get('url'))
3031 if not url:
3032 return
e9fef7ee 3033 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3034 for ie_key, ie in self._ies.items():
1211bb6d 3035 if ie.suitable(url):
8b7491c8 3036 extractor = ie_key
e9fef7ee
S
3037 break
3038 else:
3039 return
d0757229 3040 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3041
3042 def in_download_archive(self, info_dict):
3043 fn = self.params.get('download_archive')
3044 if fn is None:
3045 return False
3046
3047 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3048 if not vid_id:
7012b23c 3049 return False # Incomplete video information
5db07df6 3050
a45e8619 3051 return vid_id in self.archive
c1c9a79c
PH
3052
3053 def record_download_archive(self, info_dict):
3054 fn = self.params.get('download_archive')
3055 if fn is None:
3056 return
5db07df6
PH
3057 vid_id = self._make_archive_id(info_dict)
3058 assert vid_id
c1c9a79c 3059 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3060 archive_file.write(vid_id + '\n')
a45e8619 3061 self.archive.add(vid_id)
dd82ffea 3062
8c51aa65 3063 @staticmethod
8abeeb94 3064 def format_resolution(format, default='unknown'):
fb04e403 3065 if format.get('vcodec') == 'none':
8326b00a 3066 if format.get('acodec') == 'none':
3067 return 'images'
fb04e403 3068 return 'audio only'
f49d89ee
PH
3069 if format.get('resolution') is not None:
3070 return format['resolution']
35615307
DA
3071 if format.get('width') and format.get('height'):
3072 res = '%dx%d' % (format['width'], format['height'])
3073 elif format.get('height'):
3074 res = '%sp' % format['height']
3075 elif format.get('width'):
388ae76b 3076 res = '%dx?' % format['width']
8c51aa65 3077 else:
8abeeb94 3078 res = default
8c51aa65
JMF
3079 return res
3080
c57f7757
PH
3081 def _format_note(self, fdict):
3082 res = ''
3083 if fdict.get('ext') in ['f4f', 'f4m']:
3084 res += '(unsupported) '
32f90364
PH
3085 if fdict.get('language'):
3086 if res:
3087 res += ' '
9016d76f 3088 res += '[%s] ' % fdict['language']
c57f7757
PH
3089 if fdict.get('format_note') is not None:
3090 res += fdict['format_note'] + ' '
3091 if fdict.get('tbr') is not None:
3092 res += '%4dk ' % fdict['tbr']
3093 if fdict.get('container') is not None:
3094 if res:
3095 res += ', '
3096 res += '%s container' % fdict['container']
3089bc74
S
3097 if (fdict.get('vcodec') is not None
3098 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3099 if res:
3100 res += ', '
3101 res += fdict['vcodec']
91c7271a 3102 if fdict.get('vbr') is not None:
c57f7757
PH
3103 res += '@'
3104 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3105 res += 'video@'
3106 if fdict.get('vbr') is not None:
3107 res += '%4dk' % fdict['vbr']
fbb21cf5 3108 if fdict.get('fps') is not None:
5d583bdf
S
3109 if res:
3110 res += ', '
3111 res += '%sfps' % fdict['fps']
c57f7757
PH
3112 if fdict.get('acodec') is not None:
3113 if res:
3114 res += ', '
3115 if fdict['acodec'] == 'none':
3116 res += 'video only'
3117 else:
3118 res += '%-5s' % fdict['acodec']
3119 elif fdict.get('abr') is not None:
3120 if res:
3121 res += ', '
3122 res += 'audio'
3123 if fdict.get('abr') is not None:
3124 res += '@%3dk' % fdict['abr']
3125 if fdict.get('asr') is not None:
3126 res += ' (%5dHz)' % fdict['asr']
3127 if fdict.get('filesize') is not None:
3128 if res:
3129 res += ', '
3130 res += format_bytes(fdict['filesize'])
9732d77e
PH
3131 elif fdict.get('filesize_approx') is not None:
3132 if res:
3133 res += ', '
3134 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3135 return res
91c7271a 3136
c57f7757 3137 def list_formats(self, info_dict):
94badb25 3138 formats = info_dict.get('formats', [info_dict])
53ed7066 3139 new_format = (
3140 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3141 and self.params.get('listformats_table', True) is not False)
76d321f6 3142 if new_format:
3143 table = [
3144 [
3145 format_field(f, 'format_id'),
3146 format_field(f, 'ext'),
3147 self.format_resolution(f),
3148 format_field(f, 'fps', '%d'),
3149 '|',
3150 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3151 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3152 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3153 '|',
3154 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3155 format_field(f, 'vbr', '%4dk'),
3156 format_field(f, 'acodec', default='unknown').replace('none', ''),
3157 format_field(f, 'abr', '%3dk'),
3158 format_field(f, 'asr', '%5dHz'),
3f698246 3159 ', '.join(filter(None, (
3160 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3161 format_field(f, 'language', '[%s]'),
3162 format_field(f, 'format_note'),
3163 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3164 ))),
3f698246 3165 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3166 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3167 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3168 else:
3169 table = [
3170 [
3171 format_field(f, 'format_id'),
3172 format_field(f, 'ext'),
3173 self.format_resolution(f),
3174 self._format_note(f)]
3175 for f in formats
3176 if f.get('preference') is None or f['preference'] >= -1000]
3177 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3178
cfb56d1a 3179 self.to_screen(
169dbde9 3180 '[info] Available formats for %s:' % info_dict['id'])
3181 self.to_stdout(render_table(
bc97cdae 3182 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3183
3184 def list_thumbnails(self, info_dict):
b0249bca 3185 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3186 if not thumbnails:
b7b72db9 3187 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3188 return
cfb56d1a
PH
3189
3190 self.to_screen(
3191 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3192 self.to_stdout(render_table(
cfb56d1a
PH
3193 ['ID', 'width', 'height', 'URL'],
3194 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3195
360e1ca5 3196 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3197 if not subtitles:
360e1ca5 3198 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3199 return
a504ced0 3200 self.to_screen(
edab9dbf 3201 'Available %s for %s:' % (name, video_id))
2412044c 3202
3203 def _row(lang, formats):
49c258e1 3204 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3205 if len(set(names)) == 1:
7aee40c1 3206 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3207 return [lang, ', '.join(names), ', '.join(exts)]
3208
169dbde9 3209 self.to_stdout(render_table(
2412044c 3210 ['Language', 'Name', 'Formats'],
3211 [_row(lang, formats) for lang, formats in subtitles.items()],
3212 hideEmpty=True))
a504ced0 3213
dca08720
PH
3214 def urlopen(self, req):
3215 """ Start an HTTP download """
82d8a8b6 3216 if isinstance(req, compat_basestring):
67dda517 3217 req = sanitized_Request(req)
19a41fc6 3218 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3219
3220 def print_debug_header(self):
3221 if not self.params.get('verbose'):
3222 return
62fec3b2 3223
c6afed48
PH
3224 stdout_encoding = getattr(
3225 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3226 encoding_str = (
734f90bb
PH
3227 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3228 locale.getpreferredencoding(),
3229 sys.getfilesystemencoding(),
c6afed48 3230 stdout_encoding,
b0472057 3231 self.get_encoding()))
4192b51c 3232 write_string(encoding_str, encoding=None)
734f90bb 3233
e5813e53 3234 source = (
3235 '(exe)' if hasattr(sys, 'frozen')
3236 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3237 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3238 else '')
3239 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3240 if _LAZY_LOADER:
f74980cb 3241 self._write_string('[debug] Lazy loading extractors enabled\n')
3242 if _PLUGIN_CLASSES:
3243 self._write_string(
3244 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3245 if self.params.get('compat_opts'):
3246 self._write_string(
3247 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3248 try:
3249 sp = subprocess.Popen(
3250 ['git', 'rev-parse', '--short', 'HEAD'],
3251 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3252 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3253 out, err = process_communicate_or_kill(sp)
dca08720
PH
3254 out = out.decode().strip()
3255 if re.match('[0-9a-f]+', out):
f74980cb 3256 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3257 except Exception:
dca08720
PH
3258 try:
3259 sys.exc_clear()
70a1165b 3260 except Exception:
dca08720 3261 pass
b300cda4
S
3262
3263 def python_implementation():
3264 impl_name = platform.python_implementation()
3265 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3266 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3267 return impl_name
3268
e5813e53 3269 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3270 platform.python_version(),
3271 python_implementation(),
3272 platform.architecture()[0],
b300cda4 3273 platform_name()))
d28b5171 3274
73fac4e9 3275 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3276 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3277 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3278 exe_str = ', '.join(
2831b468 3279 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3280 ) or 'none'
d28b5171 3281 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3282
2831b468 3283 from .downloader.fragment import can_decrypt_frag
3284 from .downloader.websocket import has_websockets
3285 from .postprocessor.embedthumbnail import has_mutagen
3286 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3287
ad3dc496 3288 lib_str = ', '.join(sorted(filter(None, (
2831b468 3289 can_decrypt_frag and 'pycryptodome',
3290 has_websockets and 'websockets',
3291 has_mutagen and 'mutagen',
3292 SQLITE_AVAILABLE and 'sqlite',
3293 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3294 )))) or 'none'
2831b468 3295 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3296
dca08720
PH
3297 proxy_map = {}
3298 for handler in self._opener.handlers:
3299 if hasattr(handler, 'proxies'):
3300 proxy_map.update(handler.proxies)
734f90bb 3301 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3302
58b1f00d
PH
3303 if self.params.get('call_home', False):
3304 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3305 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3306 return
58b1f00d
PH
3307 latest_version = self.urlopen(
3308 'https://yt-dl.org/latest/version').read().decode('utf-8')
3309 if version_tuple(latest_version) > version_tuple(__version__):
3310 self.report_warning(
3311 'You are using an outdated version (newest version: %s)! '
3312 'See https://yt-dl.org/update if you need help updating.' %
3313 latest_version)
3314
e344693b 3315 def _setup_opener(self):
6ad14cab 3316 timeout_val = self.params.get('socket_timeout')
19a41fc6 3317 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3318
982ee69a 3319 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3320 opts_cookiefile = self.params.get('cookiefile')
3321 opts_proxy = self.params.get('proxy')
3322
982ee69a 3323 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3324
6a3f4c3f 3325 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3326 if opts_proxy is not None:
3327 if opts_proxy == '':
3328 proxies = {}
3329 else:
3330 proxies = {'http': opts_proxy, 'https': opts_proxy}
3331 else:
3332 proxies = compat_urllib_request.getproxies()
067aa17e 3333 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3334 if 'http' in proxies and 'https' not in proxies:
3335 proxies['https'] = proxies['http']
91410c9b 3336 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3337
3338 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3339 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3340 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3341 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3342 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3343
3344 # When passing our own FileHandler instance, build_opener won't add the
3345 # default FileHandler and allows us to disable the file protocol, which
3346 # can be used for malicious purposes (see
067aa17e 3347 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3348 file_handler = compat_urllib_request.FileHandler()
3349
3350 def file_open(*args, **kwargs):
7a5c1cfe 3351 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3352 file_handler.file_open = file_open
3353
3354 opener = compat_urllib_request.build_opener(
fca6dba8 3355 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3356
dca08720
PH
3357 # Delete the default user-agent header, which would otherwise apply in
3358 # cases where our custom HTTP handler doesn't come into play
067aa17e 3359 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3360 opener.addheaders = []
3361 self._opener = opener
62fec3b2
PH
3362
3363 def encode(self, s):
3364 if isinstance(s, bytes):
3365 return s # Already encoded
3366
3367 try:
3368 return s.encode(self.get_encoding())
3369 except UnicodeEncodeError as err:
3370 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3371 raise
3372
3373 def get_encoding(self):
3374 encoding = self.params.get('encoding')
3375 if encoding is None:
3376 encoding = preferredencoding()
3377 return encoding
ec82d85a 3378
de6000d9 3379 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3380 write_all = self.params.get('write_all_thumbnails', False)
3381 thumbnails = []
3382 if write_all or self.params.get('writethumbnail', False):
0202b52a 3383 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3384 multiple = write_all and len(thumbnails) > 1
ec82d85a 3385
0202b52a 3386 ret = []
981052c9 3387 for t in thumbnails[::-1]:
ec82d85a 3388 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3389 suffix = '%s.' % t['id'] if multiple else ''
3390 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3391 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3392
0c3d0f51 3393 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3394 ret.append(suffix + thumb_ext)
8ba87148 3395 t['filepath'] = thumb_filename
ec82d85a
PH
3396 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3397 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3398 else:
5ef7d9bd 3399 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3400 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3401 try:
3402 uf = self.urlopen(t['url'])
d3d89c32 3403 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3404 shutil.copyfileobj(uf, thumbf)
de6000d9 3405 ret.append(suffix + thumb_ext)
ec82d85a
PH
3406 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3407 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3408 t['filepath'] = thumb_filename
3158150c 3409 except network_exceptions as err:
ec82d85a 3410 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3411 (t['url'], error_to_compat_str(err)))
6c4fd172 3412 if ret and not write_all:
3413 break
0202b52a 3414 return ret