]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[eria2c] Fix --skip-unavailable fragment
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
b5ae35ee 12import functools
8222d8de 13import io
b82f815f 14import itertools
8694c600 15import json
62fec3b2 16import locale
083c9df9 17import operator
8222d8de 18import os
dca08720 19import platform
8222d8de
JMF
20import re
21import shutil
dca08720 22import subprocess
8222d8de 23import sys
21cd8fae 24import tempfile
8222d8de 25import time
67134eab 26import tokenize
8222d8de 27import traceback
75822ca7 28import random
524e2e4f 29import unicodedata
8222d8de 30
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
003c69a8 35 compat_get_terminal_size,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
edf65256 39 compat_pycrypto_AES,
7d1eb38a 40 compat_shlex_quote,
ce02ed60 41 compat_str,
67134eab 42 compat_tokenize_tokenize,
ce02ed60
PH
43 compat_urllib_error,
44 compat_urllib_request,
8b172c2e 45 compat_urllib_request_DataHandler,
819e0531 46 windows_enable_vt_mode,
8c25f81b 47)
982ee69a 48from .cookies import load_cookies
8c25f81b 49from .utils import (
eedb7ba5
S
50 age_restricted,
51 args_to_str,
ce02ed60
PH
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
acd69589 55 DEFAULT_OUTTMPL,
ce02ed60 56 determine_ext,
b5559424 57 determine_protocol,
732044af 58 DOT_DESKTOP_LINK_TEMPLATE,
59 DOT_URL_LINK_TEMPLATE,
60 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 61 DownloadError,
c0384f22 62 encode_compat_str,
ce02ed60 63 encodeFilename,
498f5606 64 EntryNotInPlaylist,
a06916d9 65 error_to_compat_str,
8b0d7497 66 ExistingVideoReached,
590bc6f6 67 expand_path,
ce02ed60 68 ExtractorError,
e29663c6 69 float_or_none,
02dbf93f 70 format_bytes,
76d321f6 71 format_field,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
b0249bca 74 HEADRequest,
c9969434 75 int_or_none,
732044af 76 iri_to_uri,
773f291d 77 ISO3166Utils,
56a8fb4f 78 LazyList,
ce02ed60 79 locked_file,
0202b52a 80 make_dir,
dca08720 81 make_HTTPS_handler,
ce02ed60 82 MaxDownloadsReached,
3158150c 83 network_exceptions,
cd6fc19e 84 orderedSet,
a06916d9 85 OUTTMPL_TYPES,
b7ab0590 86 PagedList,
083c9df9 87 parse_filesize,
91410c9b 88 PerRequestProxyHandler,
dca08720 89 platform_name,
eedb7ba5 90 PostProcessingError,
ce02ed60 91 preferredencoding,
eedb7ba5 92 prepend_extension,
a06916d9 93 process_communicate_or_kill,
51fb4995 94 register_socks_protocols,
a06916d9 95 RejectedVideoReached,
cfb56d1a 96 render_table,
eedb7ba5 97 replace_extension,
ce02ed60
PH
98 SameFileError,
99 sanitize_filename,
1bb5c511 100 sanitize_path,
dcf77cf1 101 sanitize_url,
67dda517 102 sanitized_Request,
e5660ee6 103 std_headers,
819e0531 104 STR_FORMAT_RE_TMPL,
105 STR_FORMAT_TYPES,
1211bb6d 106 str_or_none,
e29663c6 107 strftime_or_none,
ce02ed60 108 subtitles_filename,
819e0531 109 supports_terminal_sequences,
110 TERMINAL_SEQUENCES,
51d9739f 111 ThrottledDownload,
732044af 112 to_high_limit_path,
324ad820 113 traverse_obj,
6033d980 114 try_get,
ce02ed60 115 UnavailableVideoError,
29eb5174 116 url_basename,
7d1eb38a 117 variadic,
58b1f00d 118 version_tuple,
ce02ed60
PH
119 write_json_file,
120 write_string,
6a3f4c3f 121 YoutubeDLCookieProcessor,
dca08720 122 YoutubeDLHandler,
fca6dba8 123 YoutubeDLRedirectHandler,
ce02ed60 124)
a0e07d31 125from .cache import Cache
52a8a1e1 126from .extractor import (
127 gen_extractor_classes,
128 get_info_extractor,
129 _LAZY_LOADER,
3ae5e797 130 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 131)
4c54b89e 132from .extractor.openload import PhantomJSwrapper
52a8a1e1 133from .downloader import (
dbf5416a 134 FFmpegFD,
52a8a1e1 135 get_suitable_downloader,
136 shorten_protocol_name
137)
4c83c967 138from .downloader.rtmp import rtmpdump_version
4f026faf 139from .postprocessor import (
e36d50c5 140 get_postprocessor,
4e3b637d 141 EmbedThumbnailPP,
e36d50c5 142 FFmpegFixupDurationPP,
f17f8651 143 FFmpegFixupM3u8PP,
62cd676c 144 FFmpegFixupM4aPP,
6271f1ca 145 FFmpegFixupStretchedPP,
e36d50c5 146 FFmpegFixupTimestampPP,
4f026faf
PH
147 FFmpegMergerPP,
148 FFmpegPostProcessor,
0202b52a 149 MoveFilesAfterDownloadPP,
3ae5e797 150 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 151)
4c88ff87 152from .update import detect_variant
dca08720 153from .version import __version__
8222d8de 154
e9c0cdd3
YCH
155if compat_os_name == 'nt':
156 import ctypes
157
2459b6e1 158
8222d8de
JMF
159class YoutubeDL(object):
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
180940e0 188 videopassword: Password for accessing a video.
1da50aa3
S
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
8222d8de
JMF
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
ad8915b7 195 no_warnings: Do not print out anything for warnings.
53c18592 196 forceprint: A list of templates to force print
197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
8694c600 204 forcejson: Force printing info_dict as JSON.
63e0be34
PH
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
c25228e5 207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
b7b04c78 209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 211 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
c25228e5 216 format_sort: How to sort the video formats. see "Sorting Formats"
217 for more details.
218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
220 allow_multiple_video_streams: Allow multiple video streams to be merged
221 into a single file
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 into a single file
0ba692ac 224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none)
226 or None (check only if requested by extractor)
4524baf0 227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 229 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 231 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
4524baf0 235 windowsfilenames: Force the filenames to be windows compatible
b1940459 236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
26e2805c 239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
d22dec74 241 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
34488702 245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
8222d8de
JMF
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
c14e88f0 249 playlist_items: Specific indices of playlist to download.
ff815fe6 250 playlistreverse: Download playlist items in reverse order.
75822ca7 251 playlistrandom: Download playlist items in random order.
8222d8de
JMF
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
8bf9319e 254 logger: Log messages to a logging.Logger instance.
8222d8de 255 logtostderr: Log messages to stderr instead of stdout.
819e0531 256 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
75d43ca0 259 clean_infojson: Remove private fields from the infojson
34488702 260 getcomments: Extract video comments. This will not be written to disk
06167fbb 261 unless writeinfojson is also given
1fb07d10 262 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 263 writethumbnail: Write the thumbnail image to a file
c25228e5 264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
ec82d85a 266 write_all_thumbnails: Write all thumbnail formats to files
732044af 267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 272 writesubtitles: Write the video subtitles to a file
741dd8ea 273 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 274 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 275 Downloads all the subtitles of the video
0b7f3118 276 (requires writesubtitles or writeautomaticsub)
8222d8de 277 listsubtitles: Lists all available subtitles for the video
a504ced0 278 subtitlesformat: The format code for subtitles
c32b0aab 279 subtitleslangs: List of languages of the subtitles to download (can be regex).
280 The list may contain "all" to refer to all the available
281 subtitles. The language can be prefixed with a "-" to
282 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
283 keepvideo: Keep the video file after post-processing
284 daterange: A DateRange object, download only if the upload_date is in the range.
285 skip_download: Skip the actual download of the video file
c35f9e72 286 cachedir: Location of the cache files in the filesystem.
a0e07d31 287 False to disable filesystem cache.
47192f92 288 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
289 age_limit: An integer representing the user's age in years.
290 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
291 min_views: An integer representing the minimum view count the video
292 must have in order to not be skipped.
293 Videos without view count information are always
294 downloaded. None for no limit.
295 max_views: An integer representing the maximum view count.
296 Videos that are more popular than that are not
297 downloaded.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
301 Videos already present in the file are not downloaded
302 again.
8a51f564 303 break_on_existing: Stop the download process after attempting to download a
304 file that is in the archive.
305 break_on_reject: Stop the download process when encountering a video that
306 has been filtered out.
307 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
308 cookiesfrombrowser: A tuple containing the name of the browser and the profile
309 name/path from where cookies are loaded.
310 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 311 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
312 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
313 At the moment, this is only supported by YouTube.
a1ee09e8 314 proxy: URL of the proxy server to use
38cce791 315 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 316 on geo-restricted sites.
e344693b 317 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
318 bidi_workaround: Work around buggy terminals without bidirectional text
319 support, using fridibi
a0ddb8a2 320 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 321 include_ads: Download ads as well
04b4d394
PH
322 default_search: Prepend this string if an input url is not valid.
323 'auto' for elaborate guessing
62fec3b2 324 encoding: Use this encoding instead of the system-specified.
e8ee972c 325 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
326 Pass in 'in_playlist' to only show this behavior for
327 playlist items.
4f026faf 328 postprocessors: A list of dictionaries, each with an entry
71b640cc 329 * key: The name of the postprocessor. See
7a5c1cfe 330 yt_dlp/postprocessor/__init__.py for a list.
56d868db 331 * when: When to run the postprocessor. Can be one of
332 pre_process|before_dl|post_process|after_move.
333 Assumed to be 'post_process' if not given
b5ae35ee 334 post_hooks: Deprecated - Register a custom postprocessor instead
335 A list of functions that get called as the final step
ab8e5e51
AM
336 for each video file, after all postprocessors have been
337 called. The filename will be passed as the only argument.
71b640cc
PH
338 progress_hooks: A list of functions that get called on download
339 progress, with a dictionary with the entries
5cda4eda 340 * status: One of "downloading", "error", or "finished".
ee69b99a 341 Check this first and ignore unknown values.
3ba7740d 342 * info_dict: The extracted info_dict
71b640cc 343
5cda4eda 344 If status is one of "downloading", or "finished", the
ee69b99a
PH
345 following properties may also be present:
346 * filename: The final filename (always present)
5cda4eda 347 * tmpfilename: The filename we're currently writing to
71b640cc
PH
348 * downloaded_bytes: Bytes on disk
349 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
350 * total_bytes_estimate: Guess of the eventual file size,
351 None if unavailable.
352 * elapsed: The number of seconds since download started.
71b640cc
PH
353 * eta: The estimated time in seconds, None if unknown
354 * speed: The download speed in bytes/second, None if
355 unknown
5cda4eda
PH
356 * fragment_index: The counter of the currently
357 downloaded video fragment.
358 * fragment_count: The number of fragments (= individual
359 files that will be merged)
71b640cc
PH
360
361 Progress hooks are guaranteed to be called at least once
362 (with status "finished") if the download is successful.
819e0531 363 postprocessor_hooks: A list of functions that get called on postprocessing
364 progress, with a dictionary with the entries
365 * status: One of "started", "processing", or "finished".
366 Check this first and ignore unknown values.
367 * postprocessor: Name of the postprocessor
368 * info_dict: The extracted info_dict
369
370 Progress hooks are guaranteed to be called at least twice
371 (with status "started" and "finished") if the processing is successful.
45598f15 372 merge_output_format: Extension to use when merging formats.
6b591b29 373 final_ext: Expected final extension; used to detect when the file was
374 already downloaded and converted. "merge_output_format" is
375 replaced by this extension when given
6271f1ca
PH
376 fixup: Automatically correct known faults of the file.
377 One of:
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
62cd676c 381 about it, warn otherwise (default)
504f20dd 382 source_address: Client-side IP address to bind to.
6ec6cb4e 383 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 384 yt-dlp servers for debugging. (BROKEN)
1cf376f5 385 sleep_interval_requests: Number of seconds to sleep between requests
386 during extraction
7aa589a5
S
387 sleep_interval: Number of seconds to sleep before each download when
388 used alone or a lower bound of a range for randomized
389 sleep before each download (minimum possible number
390 of seconds to sleep) when used along with
391 max_sleep_interval.
392 max_sleep_interval:Upper bound of a range for randomized sleep before each
393 download (maximum possible number of seconds to sleep).
394 Must only be used along with sleep_interval.
395 Actual sleep time will be a random float from range
396 [sleep_interval; max_sleep_interval].
1cf376f5 397 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
398 listformats: Print an overview of available video formats and exit.
399 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
400 match_filter: A function that gets called with the info_dict of
401 every video.
402 If it returns a message, the video is ignored.
403 If it returns None, the video is downloaded.
404 match_filter_func in utils.py is one example for this.
7e5db8c9 405 no_color: Do not emit color codes in output.
0a840f58 406 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 407 HTTP header
0a840f58 408 geo_bypass_country:
773f291d
S
409 Two-letter ISO 3166-2 country code that will be used for
410 explicit geographic restriction bypassing via faking
504f20dd 411 X-Forwarded-For HTTP header
5f95927a
S
412 geo_bypass_ip_block:
413 IP range in CIDR notation that will be used similarly to
504f20dd 414 geo_bypass_country
71b640cc 415
85729c51 416 The following options determine which downloader is picked:
52a8a1e1 417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
421 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
422 or {'m3u8': 'ffmpeg'} instead.
423 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
424 if True, otherwise use ffmpeg/avconv if False, otherwise
425 use downloader suggested by extractor if None.
53ed7066 426 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 427 The following options do not work when used through the API:
b5ae35ee 428 filename, abort-on-error, multistreams, no-live-chat, format-sort
b51d2ae3 429 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
e4f02757 430 Refer __init__.py for their implementation
819e0531 431 progress_template: Dictionary of templates for progress outputs.
432 Allowed keys are 'download', 'postprocess',
433 'download-title' (console title) and 'postprocess-title'.
434 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 435
8222d8de 436 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 437 the downloader (see yt_dlp/downloader/common.py):
51d9739f 438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
b5ae35ee 439 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
440 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args.
76b1bd67
JMF
442
443 The following options are used by the post processors:
d4a24f40 444 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 445 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
446 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
447 to the binary or its containing directory.
43820c03 448 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 449 and a list of additional command-line arguments for the
450 postprocessor/executable. The dict can also have "PP+EXE" keys
451 which are used when the given exe is used by the given PP.
452 Use 'default' as the name for arguments to passed to all PP
453 For compatibility with youtube-dl, a single list of args
454 can also be used
e409895f 455
456 The following options are used by the extractors:
62bff2c1 457 extractor_retries: Number of times to retry for known errors
458 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 459 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 460 discontinuities such as ad breaks (default: False)
5d3a0e79 461 extractor_args: A dictionary of arguments to be passed to the extractors.
462 See "EXTRACTOR ARGUMENTS" for details.
463 Eg: {'youtube': {'skip': ['dash', 'hls']}}
464 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
465 If True (default), DASH manifests and related
62bff2c1 466 data will be downloaded and processed by extractor.
467 You can reduce network I/O by disabling it if you don't
468 care about DASH. (only for youtube)
5d3a0e79 469 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
470 If True (default), HLS manifests and related
62bff2c1 471 data will be downloaded and processed by extractor.
472 You can reduce network I/O by disabling it if you don't
473 care about HLS. (only for youtube)
8222d8de
JMF
474 """
475
c9969434
S
476 _NUMERIC_FIELDS = set((
477 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 478 'timestamp', 'release_timestamp',
c9969434
S
479 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
480 'average_rating', 'comment_count', 'age_limit',
481 'start_time', 'end_time',
482 'chapter_number', 'season_number', 'episode_number',
483 'track_number', 'disc_number', 'release_year',
c9969434
S
484 ))
485
8222d8de 486 params = None
8b7491c8 487 _ies = {}
56d868db 488 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 489 _printed_messages = set()
1cf376f5 490 _first_webpage_request = True
8222d8de
JMF
491 _download_retcode = None
492 _num_downloads = None
30a074c2 493 _playlist_level = 0
494 _playlist_urls = set()
8222d8de
JMF
495 _screen_file = None
496
3511266b 497 def __init__(self, params=None, auto_init=True):
8222d8de 498 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
499 if params is None:
500 params = {}
8b7491c8 501 self._ies = {}
56c73665 502 self._ies_instances = {}
56d868db 503 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 504 self._printed_messages = set()
1cf376f5 505 self._first_webpage_request = True
ab8e5e51 506 self._post_hooks = []
933605d7 507 self._progress_hooks = []
819e0531 508 self._postprocessor_hooks = []
8222d8de
JMF
509 self._download_retcode = 0
510 self._num_downloads = 0
511 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 512 self._err_file = sys.stderr
819e0531 513 self.params = params
a0e07d31 514 self.cache = Cache(self)
34308b30 515
819e0531 516 windows_enable_vt_mode()
d1d5c08f 517 # FIXME: This will break if we ever print color to stdout
819e0531 518 self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
519
a61f4b28 520 if sys.version_info < (3, 6):
521 self.report_warning(
0181adef 522 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 523
88acdbc2 524 if self.params.get('allow_unplayable_formats'):
525 self.report_warning(
819e0531 526 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
527 'This is a developer option intended for debugging. \n'
528 ' If you experience any issues while using this option, '
529 f'{self._color_text("DO NOT", "red")} open a bug report')
88acdbc2 530
be5df5ee
S
531 def check_deprecated(param, option, suggestion):
532 if self.params.get(param) is not None:
53ed7066 533 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
534 return True
535 return False
536
537 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
538 if self.params.get('geo_verification_proxy') is None:
539 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
540
0d1bb027 541 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
542 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 543 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 544
545 for msg in self.params.get('warnings', []):
546 self.report_warning(msg)
547
b5ae35ee 548 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 549 # nooverwrites was unnecessarily changed to overwrites
550 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
551 # This ensures compatibility with both keys
552 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 553 elif self.params.get('overwrites') is None:
554 self.params.pop('overwrites', None)
b868936c 555 else:
556 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 557
0783b09b 558 if params.get('bidi_workaround', False):
1c088fa8
PH
559 try:
560 import pty
561 master, slave = pty.openpty()
003c69a8 562 width = compat_get_terminal_size().columns
1c088fa8
PH
563 if width is None:
564 width_args = []
565 else:
566 width_args = ['-w', str(width)]
5d681e96 567 sp_kwargs = dict(
1c088fa8
PH
568 stdin=subprocess.PIPE,
569 stdout=slave,
570 stderr=self._err_file)
5d681e96
PH
571 try:
572 self._output_process = subprocess.Popen(
573 ['bidiv'] + width_args, **sp_kwargs
574 )
575 except OSError:
5d681e96
PH
576 self._output_process = subprocess.Popen(
577 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
578 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 579 except OSError as ose:
66e7ace1 580 if ose.errno == errno.ENOENT:
6febd1c1 581 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
582 else:
583 raise
0783b09b 584
3089bc74
S
585 if (sys.platform != 'win32'
586 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
587 and not params.get('restrictfilenames', False)):
e9137224 588 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 589 self.report_warning(
6febd1c1 590 'Assuming --restrict-filenames since file system encoding '
1b725173 591 'cannot encode all characters. '
6febd1c1 592 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 593 self.params['restrictfilenames'] = True
34308b30 594
de6000d9 595 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 596
187986a8 597 # Creating format selector here allows us to catch syntax errors before the extraction
598 self.format_selector = (
599 None if self.params.get('format') is None
600 else self.build_format_selector(self.params['format']))
601
dca08720
PH
602 self._setup_opener()
603
3511266b
PH
604 if auto_init:
605 self.print_debug_header()
606 self.add_default_info_extractors()
607
4f026faf 608 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 609 pp_def = dict(pp_def_raw)
fd7cfb64 610 when = pp_def.pop('when', 'post_process')
611 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 612 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 613 self.add_post_processor(pp, when=when)
4f026faf 614
ab8e5e51
AM
615 for ph in self.params.get('post_hooks', []):
616 self.add_post_hook(ph)
617
71b640cc
PH
618 for ph in self.params.get('progress_hooks', []):
619 self.add_progress_hook(ph)
620
51fb4995
YCH
621 register_socks_protocols()
622
ed39cac5 623 def preload_download_archive(fn):
624 """Preload the archive, if any is specified"""
625 if fn is None:
626 return False
627 self.write_debug('Loading archive file %r\n' % fn)
628 try:
629 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
630 for line in archive_file:
631 self.archive.add(line.strip())
632 except IOError as ioe:
633 if ioe.errno != errno.ENOENT:
634 raise
635 return False
636 return True
637
638 self.archive = set()
639 preload_download_archive(self.params.get('download_archive'))
640
7d4111ed
PH
641 def warn_if_short_id(self, argv):
642 # short YouTube ID starting with dash?
643 idxs = [
644 i for i, a in enumerate(argv)
645 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
646 if idxs:
647 correct_argv = (
7a5c1cfe 648 ['yt-dlp']
3089bc74
S
649 + [a for i, a in enumerate(argv) if i not in idxs]
650 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
651 )
652 self.report_warning(
653 'Long argument string detected. '
654 'Use -- to separate parameters and URLs, like this:\n%s\n' %
655 args_to_str(correct_argv))
656
8222d8de
JMF
657 def add_info_extractor(self, ie):
658 """Add an InfoExtractor object to the end of the list."""
8b7491c8 659 ie_key = ie.ie_key()
660 self._ies[ie_key] = ie
e52d7f85 661 if not isinstance(ie, type):
8b7491c8 662 self._ies_instances[ie_key] = ie
e52d7f85 663 ie.set_downloader(self)
8222d8de 664
8b7491c8 665 def _get_info_extractor_class(self, ie_key):
666 ie = self._ies.get(ie_key)
667 if ie is None:
668 ie = get_info_extractor(ie_key)
669 self.add_info_extractor(ie)
670 return ie
671
56c73665
JMF
672 def get_info_extractor(self, ie_key):
673 """
674 Get an instance of an IE with name ie_key, it will try to get one from
675 the _ies list, if there's no instance it will create a new one and add
676 it to the extractor list.
677 """
678 ie = self._ies_instances.get(ie_key)
679 if ie is None:
680 ie = get_info_extractor(ie_key)()
681 self.add_info_extractor(ie)
682 return ie
683
023fa8c4
JMF
684 def add_default_info_extractors(self):
685 """
686 Add the InfoExtractors returned by gen_extractors to the end of the list
687 """
e52d7f85 688 for ie in gen_extractor_classes():
023fa8c4
JMF
689 self.add_info_extractor(ie)
690
56d868db 691 def add_post_processor(self, pp, when='post_process'):
8222d8de 692 """Add a PostProcessor object to the end of the chain."""
5bfa4862 693 self._pps[when].append(pp)
8222d8de
JMF
694 pp.set_downloader(self)
695
ab8e5e51
AM
696 def add_post_hook(self, ph):
697 """Add the post hook"""
698 self._post_hooks.append(ph)
699
933605d7 700 def add_progress_hook(self, ph):
819e0531 701 """Add the download progress hook"""
933605d7 702 self._progress_hooks.append(ph)
8ab470f1 703
819e0531 704 def add_postprocessor_hook(self, ph):
705 """Add the postprocessing progress hook"""
706 self._postprocessor_hooks.append(ph)
707
1c088fa8 708 def _bidi_workaround(self, message):
5d681e96 709 if not hasattr(self, '_output_channel'):
1c088fa8
PH
710 return message
711
5d681e96 712 assert hasattr(self, '_output_process')
11b85ce6 713 assert isinstance(message, compat_str)
6febd1c1
PH
714 line_count = message.count('\n') + 1
715 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 716 self._output_process.stdin.flush()
6febd1c1 717 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 718 for _ in range(line_count))
6febd1c1 719 return res[:-len('\n')]
1c088fa8 720
b35496d8 721 def _write_string(self, message, out=None, only_once=False):
722 if only_once:
723 if message in self._printed_messages:
724 return
725 self._printed_messages.add(message)
726 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 727
848887eb 728 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 729 """Print message to stdout"""
8bf9319e 730 if self.params.get('logger'):
43afe285 731 self.params['logger'].debug(message)
835a1478 732 elif not quiet or self.params.get('verbose'):
733 self._write_string(
734 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
735 self._err_file if quiet else self._screen_file)
8222d8de 736
b35496d8 737 def to_stderr(self, message, only_once=False):
0760b0a7 738 """Print message to stderr"""
11b85ce6 739 assert isinstance(message, compat_str)
8bf9319e 740 if self.params.get('logger'):
43afe285
IB
741 self.params['logger'].error(message)
742 else:
b35496d8 743 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 744
1e5b9a95
PH
745 def to_console_title(self, message):
746 if not self.params.get('consoletitle', False):
747 return
4bede0d8
C
748 if compat_os_name == 'nt':
749 if ctypes.windll.kernel32.GetConsoleWindow():
750 # c_wchar_p() might not be necessary if `message` is
751 # already of type unicode()
752 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 753 elif 'TERM' in os.environ:
b46696bd 754 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 755
bdde425c
PH
756 def save_console_title(self):
757 if not self.params.get('consoletitle', False):
758 return
b7b04c78 759 if self.params.get('simulate'):
94c3442e 760 return
4bede0d8 761 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 762 # Save the title on stack
734f90bb 763 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
764
765 def restore_console_title(self):
766 if not self.params.get('consoletitle', False):
767 return
b7b04c78 768 if self.params.get('simulate'):
94c3442e 769 return
4bede0d8 770 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 771 # Restore the title from stack
734f90bb 772 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
773
774 def __enter__(self):
775 self.save_console_title()
776 return self
777
778 def __exit__(self, *args):
779 self.restore_console_title()
f89197d7 780
dca08720 781 if self.params.get('cookiefile') is not None:
1bab3437 782 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 783
8222d8de
JMF
784 def trouble(self, message=None, tb=None):
785 """Determine action to take when a download problem appears.
786
787 Depending on if the downloader has been configured to ignore
788 download errors or not, this method may throw an exception or
789 not when errors are found, after printing the message.
790
791 tb, if given, is additional traceback information.
792 """
793 if message is not None:
794 self.to_stderr(message)
795 if self.params.get('verbose'):
796 if tb is None:
797 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 798 tb = ''
8222d8de 799 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 800 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 801 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
802 else:
803 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 804 tb = ''.join(tb_data)
c19bc311 805 if tb:
806 self.to_stderr(tb)
b1940459 807 if not self.params.get('ignoreerrors'):
8222d8de
JMF
808 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
809 exc_info = sys.exc_info()[1].exc_info
810 else:
811 exc_info = sys.exc_info()
812 raise DownloadError(message, exc_info)
813 self._download_retcode = 1
814
0760b0a7 815 def to_screen(self, message, skip_eol=False):
816 """Print message to stdout if not in quiet mode"""
817 self.to_stdout(
818 message, skip_eol, quiet=self.params.get('quiet', False))
819
819e0531 820 def _color_text(self, text, color):
821 if self.params.get('no_color'):
822 return text
823 return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
824
c84aeac6 825 def report_warning(self, message, only_once=False):
8222d8de
JMF
826 '''
827 Print the message to stderr, it will be prefixed with 'WARNING:'
828 If stderr is a tty file the 'WARNING:' will be colored
829 '''
6d07ce01
JMF
830 if self.params.get('logger') is not None:
831 self.params['logger'].warning(message)
8222d8de 832 else:
ad8915b7
PH
833 if self.params.get('no_warnings'):
834 return
819e0531 835 self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
8222d8de
JMF
836
837 def report_error(self, message, tb=None):
838 '''
839 Do the same as trouble, but prefixes the message with 'ERROR:', colored
840 in red if stderr is a tty file.
841 '''
819e0531 842 self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
8222d8de 843
b35496d8 844 def write_debug(self, message, only_once=False):
0760b0a7 845 '''Log debug message or Print message to stderr'''
846 if not self.params.get('verbose', False):
847 return
848 message = '[debug] %s' % message
849 if self.params.get('logger'):
850 self.params['logger'].debug(message)
851 else:
b35496d8 852 self.to_stderr(message, only_once)
0760b0a7 853
8222d8de
JMF
854 def report_file_already_downloaded(self, file_name):
855 """Report file has already been fully downloaded."""
856 try:
6febd1c1 857 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 858 except UnicodeEncodeError:
6febd1c1 859 self.to_screen('[download] The file has already been downloaded')
8222d8de 860
0c3d0f51 861 def report_file_delete(self, file_name):
862 """Report that existing file will be deleted."""
863 try:
c25228e5 864 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 865 except UnicodeEncodeError:
c25228e5 866 self.to_screen('Deleting existing file')
0c3d0f51 867
1151c407 868 def raise_no_formats(self, info, forced=False):
869 has_drm = info.get('__has_drm')
88acdbc2 870 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
871 expected = self.params.get('ignore_no_formats_error')
872 if forced or not expected:
1151c407 873 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
874 expected=has_drm or expected)
88acdbc2 875 else:
876 self.report_warning(msg)
877
de6000d9 878 def parse_outtmpl(self):
879 outtmpl_dict = self.params.get('outtmpl', {})
880 if not isinstance(outtmpl_dict, dict):
881 outtmpl_dict = {'default': outtmpl_dict}
882 outtmpl_dict.update({
883 k: v for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 884 if outtmpl_dict.get(k) is None})
de6000d9 885 for key, val in outtmpl_dict.items():
886 if isinstance(val, bytes):
887 self.report_warning(
888 'Parameter outtmpl is bytes, but should be a unicode string. '
889 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
890 return outtmpl_dict
891
21cd8fae 892 def get_output_path(self, dir_type='', filename=None):
893 paths = self.params.get('paths', {})
894 assert isinstance(paths, dict)
895 path = os.path.join(
896 expand_path(paths.get('home', '').strip()),
897 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
898 filename or '')
899
900 # Temporary fix for #4787
901 # 'Treat' all problem characters by passing filename through preferredencoding
902 # to workaround encoding issues with subprocess on python2 @ Windows
903 if sys.version_info < (3, 0) and sys.platform == 'win32':
904 path = encodeFilename(path, True).decode(preferredencoding())
905 return sanitize_path(path, force=self.params.get('windowsfilenames'))
906
76a264ac 907 @staticmethod
901130bb 908 def _outtmpl_expandpath(outtmpl):
909 # expand_path translates '%%' into '%' and '$$' into '$'
910 # correspondingly that is not what we want since we need to keep
911 # '%%' intact for template dict substitution step. Working around
912 # with boundary-alike separator hack.
913 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
914 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
915
916 # outtmpl should be expand_path'ed before template dict substitution
917 # because meta fields may contain env variables we don't want to
918 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
919 # title "Hello $PATH", we don't want `$PATH` to be expanded.
920 return expand_path(outtmpl).replace(sep, '')
921
922 @staticmethod
923 def escape_outtmpl(outtmpl):
924 ''' Escape any remaining strings like %s, %abc% etc. '''
925 return re.sub(
926 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
927 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
928 outtmpl)
929
930 @classmethod
931 def validate_outtmpl(cls, outtmpl):
76a264ac 932 ''' @return None or Exception object '''
7d1eb38a 933 outtmpl = re.sub(
524e2e4f 934 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
7d1eb38a 935 lambda mobj: f'{mobj.group(0)[:-1]}s',
936 cls._outtmpl_expandpath(outtmpl))
76a264ac 937 try:
7d1eb38a 938 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 939 return None
940 except ValueError as err:
941 return err
942
143db31d 943 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
819e0531 944 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
6e84b215 945 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 946
6e84b215 947 info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
948 for key in ('__original_infodict', '__postprocessors'):
949 info_dict.pop(key, None)
752cda38 950 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 951 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 952 if info_dict.get('duration', None) is not None
953 else None)
752cda38 954 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
955 if info_dict.get('resolution') is None:
956 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 957
e6f21b3d 958 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 959 # of %(field)s to %(field)0Nd for backward compatibility
960 field_size_compat_map = {
752cda38 961 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
e6f21b3d 962 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
752cda38 963 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 964 }
752cda38 965
385a27fa 966 TMPL_DICT = {}
524e2e4f 967 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
385a27fa 968 MATH_FUNCTIONS = {
969 '+': float.__add__,
970 '-': float.__sub__,
971 }
e625be0d 972 # Field is of the form key1.key2...
973 # where keys (except first) can be string, int or slice
2b8a2973 974 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
385a27fa 975 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
976 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 977 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
978 (?P<negate>-)?
385a27fa 979 (?P<fields>{field})
980 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 981 (?:>(?P<strf_format>.+?))?
7c37ff97 982 (?P<alternate>(?<!\\),[^|)]+)?
e625be0d 983 (?:\|(?P<default>.*?))?
385a27fa 984 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 985
2b8a2973 986 def _traverse_infodict(k):
987 k = k.split('.')
988 if k[0] == '':
989 k.pop(0)
990 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 991
752cda38 992 def get_value(mdict):
993 # Object traversal
2b8a2973 994 value = _traverse_infodict(mdict['fields'])
752cda38 995 # Negative
996 if mdict['negate']:
997 value = float_or_none(value)
998 if value is not None:
999 value *= -1
1000 # Do maths
385a27fa 1001 offset_key = mdict['maths']
1002 if offset_key:
752cda38 1003 value = float_or_none(value)
1004 operator = None
385a27fa 1005 while offset_key:
1006 item = re.match(
1007 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1008 offset_key).group(0)
1009 offset_key = offset_key[len(item):]
1010 if operator is None:
752cda38 1011 operator = MATH_FUNCTIONS[item]
385a27fa 1012 continue
1013 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1014 offset = float_or_none(item)
1015 if offset is None:
2b8a2973 1016 offset = float_or_none(_traverse_infodict(item))
385a27fa 1017 try:
1018 value = operator(value, multiplier * offset)
1019 except (TypeError, ZeroDivisionError):
1020 return None
1021 operator = None
752cda38 1022 # Datetime formatting
1023 if mdict['strf_format']:
7c37ff97 1024 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1025
1026 return value
1027
b868936c 1028 na = self.params.get('outtmpl_na_placeholder', 'NA')
1029
6e84b215 1030 def _dumpjson_default(obj):
1031 if isinstance(obj, (set, LazyList)):
1032 return list(obj)
1033 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1034
752cda38 1035 def create_key(outer_mobj):
1036 if not outer_mobj.group('has_key'):
b836dc94 1037 return outer_mobj.group(0)
752cda38 1038 key = outer_mobj.group('key')
752cda38 1039 mobj = re.match(INTERNAL_FORMAT_RE, key)
7c37ff97 1040 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1041 value, default = None, na
1042 while mobj:
e625be0d 1043 mobj = mobj.groupdict()
7c37ff97 1044 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1045 value = get_value(mobj)
7c37ff97 1046 if value is None and mobj['alternate']:
1047 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1048 else:
1049 break
752cda38 1050
b868936c 1051 fmt = outer_mobj.group('format')
752cda38 1052 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1053 fmt = '0{:d}d'.format(field_size_compat_map[key])
1054
1055 value = default if value is None else value
752cda38 1056
7d1eb38a 1057 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1058 if fmt[-1] == 'l': # list
91dd88b9 1059 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1060 value, fmt = delim.join(variadic(value)), str_fmt
524e2e4f 1061 elif fmt[-1] == 'j': # json
6e84b215 1062 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
524e2e4f 1063 elif fmt[-1] == 'q': # quoted
7d1eb38a 1064 value, fmt = compat_shlex_quote(str(value)), str_fmt
524e2e4f 1065 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1066 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1067 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1068 elif fmt[-1] == 'U': # unicode normalized
1069 opts = outer_mobj.group('conversion') or ''
1070 value, fmt = unicodedata.normalize(
1071 # "+" = compatibility equivalence, "#" = NFD
1072 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1073 value), str_fmt
7d1eb38a 1074 elif fmt[-1] == 'c':
524e2e4f 1075 if value:
1076 value = str(value)[0]
76a264ac 1077 else:
524e2e4f 1078 fmt = str_fmt
76a264ac 1079 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1080 value = float_or_none(value)
752cda38 1081 if value is None:
1082 value, fmt = default, 's'
901130bb 1083
752cda38 1084 if sanitize:
1085 if fmt[-1] == 'r':
1086 # If value is an object, sanitize might convert it to a string
1087 # So we convert it to repr first
7d1eb38a 1088 value, fmt = repr(value), str_fmt
639f1cea 1089 if fmt[-1] in 'csr':
7c37ff97 1090 value = sanitize(initial_field, value)
901130bb 1091
b868936c 1092 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1093 TMPL_DICT[key] = value
b868936c 1094 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1095
385a27fa 1096 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1097
819e0531 1098 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1099 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1100 return self.escape_outtmpl(outtmpl) % info_dict
1101
de6000d9 1102 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1103 try:
586a91b6 1104 sanitize = lambda k, v: sanitize_filename(
45598aab 1105 compat_str(v),
1bb5c511 1106 restricted=self.params.get('restrictfilenames'),
40df485f 1107 is_id=(k == 'id' or k.endswith('_id')))
b836dc94 1108 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1109 filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
15da37c7 1110
143db31d 1111 force_ext = OUTTMPL_TYPES.get(tmpl_type)
80c03fa9 1112 if filename and force_ext is not None:
752cda38 1113 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1114
bdc3fd2f
U
1115 # https://github.com/blackjack4494/youtube-dlc/issues/85
1116 trim_file_name = self.params.get('trim_file_name', False)
1117 if trim_file_name:
1118 fn_groups = filename.rsplit('.')
1119 ext = fn_groups[-1]
1120 sub_ext = ''
1121 if len(fn_groups) > 2:
1122 sub_ext = fn_groups[-2]
1123 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1124
0202b52a 1125 return filename
8222d8de 1126 except ValueError as err:
6febd1c1 1127 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1128 return None
1129
de6000d9 1130 def prepare_filename(self, info_dict, dir_type='', warn=False):
1131 """Generate the output filename."""
21cd8fae 1132
de6000d9 1133 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1134 if not filename and dir_type not in ('', 'temp'):
1135 return ''
de6000d9 1136
c84aeac6 1137 if warn:
21cd8fae 1138 if not self.params.get('paths'):
de6000d9 1139 pass
1140 elif filename == '-':
c84aeac6 1141 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1142 elif os.path.isabs(filename):
c84aeac6 1143 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1144 if filename == '-' or not filename:
1145 return filename
1146
21cd8fae 1147 return self.get_output_path(dir_type, filename)
0202b52a 1148
120fe513 1149 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1150 """ Returns None if the file should be downloaded """
8222d8de 1151
c77495e3 1152 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1153
8b0d7497 1154 def check_filter():
8b0d7497 1155 if 'title' in info_dict:
1156 # This can happen when we're just evaluating the playlist
1157 title = info_dict['title']
1158 matchtitle = self.params.get('matchtitle', False)
1159 if matchtitle:
1160 if not re.search(matchtitle, title, re.IGNORECASE):
1161 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1162 rejecttitle = self.params.get('rejecttitle', False)
1163 if rejecttitle:
1164 if re.search(rejecttitle, title, re.IGNORECASE):
1165 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1166 date = info_dict.get('upload_date')
1167 if date is not None:
1168 dateRange = self.params.get('daterange', DateRange())
1169 if date not in dateRange:
1170 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1171 view_count = info_dict.get('view_count')
1172 if view_count is not None:
1173 min_views = self.params.get('min_views')
1174 if min_views is not None and view_count < min_views:
1175 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1176 max_views = self.params.get('max_views')
1177 if max_views is not None and view_count > max_views:
1178 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1179 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1180 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1181
8f18aca8 1182 match_filter = self.params.get('match_filter')
1183 if match_filter is not None:
1184 try:
1185 ret = match_filter(info_dict, incomplete=incomplete)
1186 except TypeError:
1187 # For backward compatibility
1188 ret = None if incomplete else match_filter(info_dict)
1189 if ret is not None:
1190 return ret
8b0d7497 1191 return None
1192
c77495e3 1193 if self.in_download_archive(info_dict):
1194 reason = '%s has already been recorded in the archive' % video_title
1195 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1196 else:
1197 reason = check_filter()
1198 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1199 if reason is not None:
120fe513 1200 if not silent:
1201 self.to_screen('[download] ' + reason)
c77495e3 1202 if self.params.get(break_opt, False):
1203 raise break_err()
8b0d7497 1204 return reason
fe7e0c98 1205
b6c45014
JMF
1206 @staticmethod
1207 def add_extra_info(info_dict, extra_info):
1208 '''Set the keys from extra_info in info dict if they are missing'''
1209 for key, value in extra_info.items():
1210 info_dict.setdefault(key, value)
1211
409e1828 1212 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1213 process=True, force_generic_extractor=False):
41d1cca3 1214 """
1215 Return a list with a dictionary for each video extracted.
1216
1217 Arguments:
1218 url -- URL to extract
1219
1220 Keyword arguments:
1221 download -- whether to download videos during extraction
1222 ie_key -- extractor key hint
1223 extra_info -- dictionary containing the extra values to add to each result
1224 process -- whether to resolve all unresolved references (URLs, playlist items),
1225 must be True for download to work.
1226 force_generic_extractor -- force using the generic extractor
1227 """
fe7e0c98 1228
409e1828 1229 if extra_info is None:
1230 extra_info = {}
1231
61aa5ba3 1232 if not ie_key and force_generic_extractor:
d22dec74
S
1233 ie_key = 'Generic'
1234
8222d8de 1235 if ie_key:
8b7491c8 1236 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1237 else:
1238 ies = self._ies
1239
8b7491c8 1240 for ie_key, ie in ies.items():
8222d8de
JMF
1241 if not ie.suitable(url):
1242 continue
1243
1244 if not ie.working():
6febd1c1
PH
1245 self.report_warning('The program functionality for this site has been marked as broken, '
1246 'and will probably not work.')
8222d8de 1247
1151c407 1248 temp_id = ie.get_temp_id(url)
a0566bbf 1249 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1250 self.to_screen("[%s] %s: has already been recorded in archive" % (
1251 ie_key, temp_id))
1252 break
8b7491c8 1253 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1254 else:
1255 self.report_error('no suitable InfoExtractor for URL %s' % url)
1256
8e5fecc8 1257 def __handle_extraction_exceptions(func):
b5ae35ee 1258 @functools.wraps(func)
a0566bbf 1259 def wrapper(self, *args, **kwargs):
1260 try:
1261 return func(self, *args, **kwargs)
773f291d
S
1262 except GeoRestrictedError as e:
1263 msg = e.msg
1264 if e.countries:
1265 msg += '\nThis video is available in %s.' % ', '.join(
1266 map(ISO3166Utils.short2full, e.countries))
1267 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1268 self.report_error(msg)
fb043a6e 1269 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1270 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1271 except ThrottledDownload:
1272 self.to_stderr('\r')
1273 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1274 return wrapper(self, *args, **kwargs)
8e5fecc8 1275 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
d3e5bbf4 1276 raise
8222d8de 1277 except Exception as e:
b1940459 1278 if self.params.get('ignoreerrors'):
9b9c5355 1279 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1280 else:
1281 raise
a0566bbf 1282 return wrapper
1283
1284 @__handle_extraction_exceptions
58f197b7 1285 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1286 ie_result = ie.extract(url)
1287 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1288 return
1289 if isinstance(ie_result, list):
1290 # Backwards compatibility: old IE result format
1291 ie_result = {
1292 '_type': 'compat_list',
1293 'entries': ie_result,
1294 }
e37d0efb 1295 if extra_info.get('original_url'):
1296 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1297 self.add_default_extra_info(ie_result, ie, url)
1298 if process:
1299 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1300 else:
a0566bbf 1301 return ie_result
fe7e0c98 1302
ea38e55f 1303 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1304 if url is not None:
1305 self.add_extra_info(ie_result, {
1306 'webpage_url': url,
1307 'original_url': url,
1308 'webpage_url_basename': url_basename(url),
1309 })
1310 if ie is not None:
1311 self.add_extra_info(ie_result, {
1312 'extractor': ie.IE_NAME,
1313 'extractor_key': ie.ie_key(),
1314 })
ea38e55f 1315
58adec46 1316 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1317 """
1318 Take the result of the ie(may be modified) and resolve all unresolved
1319 references (URLs, playlist items).
1320
1321 It will also download the videos if 'download'.
1322 Returns the resolved ie_result.
1323 """
58adec46 1324 if extra_info is None:
1325 extra_info = {}
e8ee972c
PH
1326 result_type = ie_result.get('_type', 'video')
1327
057a5206 1328 if result_type in ('url', 'url_transparent'):
134c6ea8 1329 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1330 if ie_result.get('original_url'):
1331 extra_info.setdefault('original_url', ie_result['original_url'])
1332
057a5206 1333 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1334 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1335 or extract_flat is True):
ecb54191 1336 info_copy = ie_result.copy()
6033d980 1337 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1338 if ie and not ie_result.get('id'):
4614bc22 1339 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1340 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1341 self.add_extra_info(info_copy, extra_info)
ecb54191 1342 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1343 if self.params.get('force_write_download_archive', False):
1344 self.record_download_archive(info_copy)
e8ee972c
PH
1345 return ie_result
1346
8222d8de 1347 if result_type == 'video':
b6c45014 1348 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1349 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1350 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1351 if additional_urls:
e9f4ccd1 1352 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1353 if isinstance(additional_urls, compat_str):
1354 additional_urls = [additional_urls]
1355 self.to_screen(
1356 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1357 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1358 ie_result['additional_entries'] = [
1359 self.extract_info(
1360 url, download, extra_info,
1361 force_generic_extractor=self.params.get('force_generic_extractor'))
1362 for url in additional_urls
1363 ]
1364 return ie_result
8222d8de
JMF
1365 elif result_type == 'url':
1366 # We have to add extra_info to the results because it may be
1367 # contained in a playlist
07cce701 1368 return self.extract_info(
1369 ie_result['url'], download,
1370 ie_key=ie_result.get('ie_key'),
1371 extra_info=extra_info)
7fc3fa05
PH
1372 elif result_type == 'url_transparent':
1373 # Use the information from the embedding page
1374 info = self.extract_info(
1375 ie_result['url'], ie_key=ie_result.get('ie_key'),
1376 extra_info=extra_info, download=False, process=False)
1377
1640eb09
S
1378 # extract_info may return None when ignoreerrors is enabled and
1379 # extraction failed with an error, don't crash and return early
1380 # in this case
1381 if not info:
1382 return info
1383
412c617d
PH
1384 force_properties = dict(
1385 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1386 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1387 if f in force_properties:
1388 del force_properties[f]
1389 new_result = info.copy()
1390 new_result.update(force_properties)
7fc3fa05 1391
0563f7ac
S
1392 # Extracted info may not be a video result (i.e.
1393 # info.get('_type', 'video') != video) but rather an url or
1394 # url_transparent. In such cases outer metadata (from ie_result)
1395 # should be propagated to inner one (info). For this to happen
1396 # _type of info should be overridden with url_transparent. This
067aa17e 1397 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1398 if new_result.get('_type') == 'url':
1399 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1400
1401 return self.process_ie_result(
1402 new_result, download=download, extra_info=extra_info)
40fcba5e 1403 elif result_type in ('playlist', 'multi_video'):
30a074c2 1404 # Protect from infinite recursion due to recursively nested playlists
1405 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1406 webpage_url = ie_result['webpage_url']
1407 if webpage_url in self._playlist_urls:
7e85e872 1408 self.to_screen(
30a074c2 1409 '[download] Skipping already downloaded playlist: %s'
1410 % ie_result.get('title') or ie_result.get('id'))
1411 return
7e85e872 1412
30a074c2 1413 self._playlist_level += 1
1414 self._playlist_urls.add(webpage_url)
bc516a3f 1415 self._sanitize_thumbnails(ie_result)
30a074c2 1416 try:
1417 return self.__process_playlist(ie_result, download)
1418 finally:
1419 self._playlist_level -= 1
1420 if not self._playlist_level:
1421 self._playlist_urls.clear()
8222d8de 1422 elif result_type == 'compat_list':
c9bf4114
PH
1423 self.report_warning(
1424 'Extractor %s returned a compat_list result. '
1425 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1426
8222d8de 1427 def _fixup(r):
b868936c 1428 self.add_extra_info(r, {
1429 'extractor': ie_result['extractor'],
1430 'webpage_url': ie_result['webpage_url'],
1431 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1432 'extractor_key': ie_result['extractor_key'],
1433 })
8222d8de
JMF
1434 return r
1435 ie_result['entries'] = [
b6c45014 1436 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1437 for r in ie_result['entries']
1438 ]
1439 return ie_result
1440 else:
1441 raise Exception('Invalid result type: %s' % result_type)
1442
e92caff5 1443 def _ensure_dir_exists(self, path):
1444 return make_dir(path, self.report_error)
1445
30a074c2 1446 def __process_playlist(self, ie_result, download):
1447 # We process each entry in the playlist
1448 playlist = ie_result.get('title') or ie_result.get('id')
1449 self.to_screen('[download] Downloading playlist: %s' % playlist)
1450
498f5606 1451 if 'entries' not in ie_result:
1452 raise EntryNotInPlaylist()
1453 incomplete_entries = bool(ie_result.get('requested_entries'))
1454 if incomplete_entries:
1455 def fill_missing_entries(entries, indexes):
1456 ret = [None] * max(*indexes)
1457 for i, entry in zip(indexes, entries):
1458 ret[i - 1] = entry
1459 return ret
1460 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1461
30a074c2 1462 playlist_results = []
1463
56a8fb4f 1464 playliststart = self.params.get('playliststart', 1)
30a074c2 1465 playlistend = self.params.get('playlistend')
1466 # For backwards compatibility, interpret -1 as whole list
1467 if playlistend == -1:
1468 playlistend = None
1469
1470 playlistitems_str = self.params.get('playlist_items')
1471 playlistitems = None
1472 if playlistitems_str is not None:
1473 def iter_playlistitems(format):
1474 for string_segment in format.split(','):
1475 if '-' in string_segment:
1476 start, end = string_segment.split('-')
1477 for item in range(int(start), int(end) + 1):
1478 yield int(item)
1479 else:
1480 yield int(string_segment)
1481 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1482
1483 ie_entries = ie_result['entries']
56a8fb4f 1484 msg = (
1485 'Downloading %d videos' if not isinstance(ie_entries, list)
1486 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
8e5fecc8 1487
1488 if isinstance(ie_entries, list):
1489 def get_entry(i):
1490 return ie_entries[i - 1]
1491 else:
1492 if not isinstance(ie_entries, PagedList):
1493 ie_entries = LazyList(ie_entries)
1494
1495 def get_entry(i):
1496 return YoutubeDL.__handle_extraction_exceptions(
1497 lambda self, i: ie_entries[i - 1]
1498 )(self, i)
50fed816 1499
56a8fb4f 1500 entries = []
ff1c7fc9 1501 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1502 for i in items:
1503 if i == 0:
1504 continue
56a8fb4f 1505 if playlistitems is None and playlistend is not None and playlistend < i:
1506 break
1507 entry = None
1508 try:
50fed816 1509 entry = get_entry(i)
56a8fb4f 1510 if entry is None:
498f5606 1511 raise EntryNotInPlaylist()
56a8fb4f 1512 except (IndexError, EntryNotInPlaylist):
1513 if incomplete_entries:
1514 raise EntryNotInPlaylist()
1515 elif not playlistitems:
1516 break
1517 entries.append(entry)
120fe513 1518 try:
1519 if entry is not None:
1520 self._match_entry(entry, incomplete=True, silent=True)
1521 except (ExistingVideoReached, RejectedVideoReached):
1522 break
56a8fb4f 1523 ie_result['entries'] = entries
30a074c2 1524
56a8fb4f 1525 # Save playlist_index before re-ordering
1526 entries = [
9e598870 1527 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1528 for i, entry in enumerate(entries, 1)
1529 if entry is not None]
1530 n_entries = len(entries)
498f5606 1531
498f5606 1532 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1533 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1534 ie_result['requested_entries'] = playlistitems
1535
1536 if self.params.get('allow_playlist_files', True):
1537 ie_copy = {
1538 'playlist': playlist,
1539 'playlist_id': ie_result.get('id'),
1540 'playlist_title': ie_result.get('title'),
1541 'playlist_uploader': ie_result.get('uploader'),
1542 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1543 'playlist_index': 0,
498f5606 1544 }
1545 ie_copy.update(dict(ie_result))
1546
80c03fa9 1547 if self._write_info_json('playlist', ie_result,
1548 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1549 return
1550 if self._write_description('playlist', ie_result,
1551 self.prepare_filename(ie_copy, 'pl_description')) is None:
1552 return
681de68e 1553 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1554 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1555
1556 if self.params.get('playlistreverse', False):
1557 entries = entries[::-1]
30a074c2 1558 if self.params.get('playlistrandom', False):
1559 random.shuffle(entries)
1560
1561 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1562
56a8fb4f 1563 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1564 failures = 0
1565 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1566 for i, entry_tuple in enumerate(entries, 1):
1567 playlist_index, entry = entry_tuple
81139999 1568 if 'playlist-index' in self.params.get('compat_opts', []):
1569 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1570 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1571 # This __x_forwarded_for_ip thing is a bit ugly but requires
1572 # minimal changes
1573 if x_forwarded_for:
1574 entry['__x_forwarded_for_ip'] = x_forwarded_for
1575 extra = {
1576 'n_entries': n_entries,
f59ae581 1577 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1578 'playlist_index': playlist_index,
1579 'playlist_autonumber': i,
30a074c2 1580 'playlist': playlist,
1581 'playlist_id': ie_result.get('id'),
1582 'playlist_title': ie_result.get('title'),
1583 'playlist_uploader': ie_result.get('uploader'),
1584 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1585 'extractor': ie_result['extractor'],
1586 'webpage_url': ie_result['webpage_url'],
1587 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1588 'extractor_key': ie_result['extractor_key'],
1589 }
1590
1591 if self._match_entry(entry, incomplete=True) is not None:
1592 continue
1593
1594 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1595 if not entry_result:
1596 failures += 1
1597 if failures >= max_failures:
1598 self.report_error(
1599 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1600 break
30a074c2 1601 # TODO: skip failed (empty) entries?
1602 playlist_results.append(entry_result)
1603 ie_result['entries'] = playlist_results
1604 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1605 return ie_result
1606
a0566bbf 1607 @__handle_extraction_exceptions
1608 def __process_iterable_entry(self, entry, download, extra_info):
1609 return self.process_ie_result(
1610 entry, download=download, extra_info=extra_info)
1611
67134eab
JMF
1612 def _build_format_filter(self, filter_spec):
1613 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1614
1615 OPERATORS = {
1616 '<': operator.lt,
1617 '<=': operator.le,
1618 '>': operator.gt,
1619 '>=': operator.ge,
1620 '=': operator.eq,
1621 '!=': operator.ne,
1622 }
67134eab 1623 operator_rex = re.compile(r'''(?x)\s*
187986a8 1624 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1625 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1626 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1627 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1628 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1629 if m:
1630 try:
1631 comparison_value = int(m.group('value'))
1632 except ValueError:
1633 comparison_value = parse_filesize(m.group('value'))
1634 if comparison_value is None:
1635 comparison_value = parse_filesize(m.group('value') + 'B')
1636 if comparison_value is None:
1637 raise ValueError(
1638 'Invalid value %r in format specification %r' % (
67134eab 1639 m.group('value'), filter_spec))
9ddb6925
S
1640 op = OPERATORS[m.group('op')]
1641
083c9df9 1642 if not m:
9ddb6925
S
1643 STR_OPERATORS = {
1644 '=': operator.eq,
10d33b34
YCH
1645 '^=': lambda attr, value: attr.startswith(value),
1646 '$=': lambda attr, value: attr.endswith(value),
1647 '*=': lambda attr, value: value in attr,
9ddb6925 1648 }
187986a8 1649 str_operator_rex = re.compile(r'''(?x)\s*
1650 (?P<key>[a-zA-Z0-9._-]+)\s*
1651 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1652 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1653 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1654 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1655 if m:
1656 comparison_value = m.group('value')
2cc779f4
S
1657 str_op = STR_OPERATORS[m.group('op')]
1658 if m.group('negation'):
e118a879 1659 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1660 else:
1661 op = str_op
083c9df9 1662
9ddb6925 1663 if not m:
187986a8 1664 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1665
1666 def _filter(f):
1667 actual_value = f.get(m.group('key'))
1668 if actual_value is None:
1669 return m.group('none_inclusive')
1670 return op(actual_value, comparison_value)
67134eab
JMF
1671 return _filter
1672
0017d9ad 1673 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1674
af0f7428
S
1675 def can_merge():
1676 merger = FFmpegMergerPP(self)
1677 return merger.available and merger.can_merge()
1678
91ebc640 1679 prefer_best = (
b7b04c78 1680 not self.params.get('simulate')
91ebc640 1681 and download
1682 and (
1683 not can_merge()
19807826 1684 or info_dict.get('is_live', False)
de6000d9 1685 or self.outtmpl_dict['default'] == '-'))
53ed7066 1686 compat = (
1687 prefer_best
1688 or self.params.get('allow_multiple_audio_streams', False)
1689 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1690
1691 return (
53ed7066 1692 'best/bestvideo+bestaudio' if prefer_best
1693 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1694 else 'bestvideo+bestaudio/best')
0017d9ad 1695
67134eab
JMF
1696 def build_format_selector(self, format_spec):
1697 def syntax_error(note, start):
1698 message = (
1699 'Invalid format specification: '
1700 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1701 return SyntaxError(message)
1702
1703 PICKFIRST = 'PICKFIRST'
1704 MERGE = 'MERGE'
1705 SINGLE = 'SINGLE'
0130afb7 1706 GROUP = 'GROUP'
67134eab
JMF
1707 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1708
91ebc640 1709 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1710 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1711
e8e73840 1712 check_formats = self.params.get('check_formats')
1713
67134eab
JMF
1714 def _parse_filter(tokens):
1715 filter_parts = []
1716 for type, string, start, _, _ in tokens:
1717 if type == tokenize.OP and string == ']':
1718 return ''.join(filter_parts)
1719 else:
1720 filter_parts.append(string)
1721
232541df 1722 def _remove_unused_ops(tokens):
17cc1534 1723 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1724 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1725 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1726 last_string, last_start, last_end, last_line = None, None, None, None
1727 for type, string, start, end, line in tokens:
1728 if type == tokenize.OP and string == '[':
1729 if last_string:
1730 yield tokenize.NAME, last_string, last_start, last_end, last_line
1731 last_string = None
1732 yield type, string, start, end, line
1733 # everything inside brackets will be handled by _parse_filter
1734 for type, string, start, end, line in tokens:
1735 yield type, string, start, end, line
1736 if type == tokenize.OP and string == ']':
1737 break
1738 elif type == tokenize.OP and string in ALLOWED_OPS:
1739 if last_string:
1740 yield tokenize.NAME, last_string, last_start, last_end, last_line
1741 last_string = None
1742 yield type, string, start, end, line
1743 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1744 if not last_string:
1745 last_string = string
1746 last_start = start
1747 last_end = end
1748 else:
1749 last_string += string
1750 if last_string:
1751 yield tokenize.NAME, last_string, last_start, last_end, last_line
1752
cf2ac6df 1753 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1754 selectors = []
1755 current_selector = None
1756 for type, string, start, _, _ in tokens:
1757 # ENCODING is only defined in python 3.x
1758 if type == getattr(tokenize, 'ENCODING', None):
1759 continue
1760 elif type in [tokenize.NAME, tokenize.NUMBER]:
1761 current_selector = FormatSelector(SINGLE, string, [])
1762 elif type == tokenize.OP:
cf2ac6df
JMF
1763 if string == ')':
1764 if not inside_group:
1765 # ')' will be handled by the parentheses group
1766 tokens.restore_last_token()
67134eab 1767 break
cf2ac6df 1768 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1769 tokens.restore_last_token()
1770 break
cf2ac6df
JMF
1771 elif inside_choice and string == ',':
1772 tokens.restore_last_token()
1773 break
1774 elif string == ',':
0a31a350
JMF
1775 if not current_selector:
1776 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1777 selectors.append(current_selector)
1778 current_selector = None
1779 elif string == '/':
d96d604e
JMF
1780 if not current_selector:
1781 raise syntax_error('"/" must follow a format selector', start)
67134eab 1782 first_choice = current_selector
cf2ac6df 1783 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1784 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1785 elif string == '[':
1786 if not current_selector:
1787 current_selector = FormatSelector(SINGLE, 'best', [])
1788 format_filter = _parse_filter(tokens)
1789 current_selector.filters.append(format_filter)
0130afb7
JMF
1790 elif string == '(':
1791 if current_selector:
1792 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1793 group = _parse_format_selection(tokens, inside_group=True)
1794 current_selector = FormatSelector(GROUP, group, [])
67134eab 1795 elif string == '+':
d03cfdce 1796 if not current_selector:
1797 raise syntax_error('Unexpected "+"', start)
1798 selector_1 = current_selector
1799 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1800 if not selector_2:
1801 raise syntax_error('Expected a selector', start)
1802 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1803 else:
1804 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1805 elif type == tokenize.ENDMARKER:
1806 break
1807 if current_selector:
1808 selectors.append(current_selector)
1809 return selectors
1810
f8d4ad9a 1811 def _merge(formats_pair):
1812 format_1, format_2 = formats_pair
1813
1814 formats_info = []
1815 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1816 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1817
1818 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1819 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1820 for (i, fmt_info) in enumerate(formats_info):
551f9388 1821 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1822 formats_info.pop(i)
1823 continue
1824 for aud_vid in ['audio', 'video']:
f8d4ad9a 1825 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1826 if get_no_more[aud_vid]:
1827 formats_info.pop(i)
f5510afe 1828 break
f8d4ad9a 1829 get_no_more[aud_vid] = True
1830
1831 if len(formats_info) == 1:
1832 return formats_info[0]
1833
1834 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1835 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1836
1837 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1838 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1839
1840 output_ext = self.params.get('merge_output_format')
1841 if not output_ext:
1842 if the_only_video:
1843 output_ext = the_only_video['ext']
1844 elif the_only_audio and not video_fmts:
1845 output_ext = the_only_audio['ext']
1846 else:
1847 output_ext = 'mkv'
1848
975a0d0d 1849 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1850
f8d4ad9a 1851 new_dict = {
1852 'requested_formats': formats_info,
975a0d0d 1853 'format': '+'.join(filtered('format')),
1854 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 1855 'ext': output_ext,
975a0d0d 1856 'protocol': '+'.join(map(determine_protocol, formats_info)),
1857 'language': '+'.join(orderedSet(filtered('language'))),
1858 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1859 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1860 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 1861 }
1862
1863 if the_only_video:
1864 new_dict.update({
1865 'width': the_only_video.get('width'),
1866 'height': the_only_video.get('height'),
1867 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1868 'fps': the_only_video.get('fps'),
1869 'vcodec': the_only_video.get('vcodec'),
1870 'vbr': the_only_video.get('vbr'),
1871 'stretched_ratio': the_only_video.get('stretched_ratio'),
1872 })
1873
1874 if the_only_audio:
1875 new_dict.update({
1876 'acodec': the_only_audio.get('acodec'),
1877 'abr': the_only_audio.get('abr'),
975a0d0d 1878 'asr': the_only_audio.get('asr'),
f8d4ad9a 1879 })
1880
1881 return new_dict
1882
e8e73840 1883 def _check_formats(formats):
981052c9 1884 if not check_formats:
1885 yield from formats
b5ac45b1 1886 return
e8e73840 1887 for f in formats:
1888 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1889 temp_file = tempfile.NamedTemporaryFile(
1890 suffix='.tmp', delete=False,
1891 dir=self.get_output_path('temp') or None)
1892 temp_file.close()
fe346461 1893 try:
981052c9 1894 success, _ = self.dl(temp_file.name, f, test=True)
1895 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1896 success = False
fe346461 1897 finally:
21cd8fae 1898 if os.path.exists(temp_file.name):
1899 try:
1900 os.remove(temp_file.name)
1901 except OSError:
1902 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1903 if success:
e8e73840 1904 yield f
1905 else:
1906 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1907
67134eab 1908 def _build_selector_function(selector):
909d24dd 1909 if isinstance(selector, list): # ,
67134eab
JMF
1910 fs = [_build_selector_function(s) for s in selector]
1911
317f7ab6 1912 def selector_function(ctx):
67134eab 1913 for f in fs:
981052c9 1914 yield from f(ctx)
67134eab 1915 return selector_function
909d24dd 1916
1917 elif selector.type == GROUP: # ()
0130afb7 1918 selector_function = _build_selector_function(selector.selector)
909d24dd 1919
1920 elif selector.type == PICKFIRST: # /
67134eab
JMF
1921 fs = [_build_selector_function(s) for s in selector.selector]
1922
317f7ab6 1923 def selector_function(ctx):
67134eab 1924 for f in fs:
317f7ab6 1925 picked_formats = list(f(ctx))
67134eab
JMF
1926 if picked_formats:
1927 return picked_formats
1928 return []
67134eab 1929
981052c9 1930 elif selector.type == MERGE: # +
1931 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1932
1933 def selector_function(ctx):
1934 for pair in itertools.product(
1935 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1936 yield _merge(pair)
1937
909d24dd 1938 elif selector.type == SINGLE: # atom
598d185d 1939 format_spec = selector.selector or 'best'
909d24dd 1940
f8d4ad9a 1941 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1942 if format_spec == 'all':
1943 def selector_function(ctx):
981052c9 1944 yield from _check_formats(ctx['formats'])
f8d4ad9a 1945 elif format_spec == 'mergeall':
1946 def selector_function(ctx):
981052c9 1947 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1948 if not formats:
1949 return
921b76ca 1950 merged_format = formats[-1]
1951 for f in formats[-2::-1]:
f8d4ad9a 1952 merged_format = _merge((merged_format, f))
1953 yield merged_format
909d24dd 1954
1955 else:
e8e73840 1956 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1957 mobj = re.match(
1958 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1959 format_spec)
1960 if mobj is not None:
1961 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1962 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1963 format_type = (mobj.group('type') or [None])[0]
1964 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1965 format_modified = mobj.group('mod') is not None
909d24dd 1966
1967 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1968 _filter_f = (
eff63539 1969 (lambda f: f.get('%scodec' % format_type) != 'none')
1970 if format_type and format_modified # bv*, ba*, wv*, wa*
1971 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1972 if format_type # bv, ba, wv, wa
1973 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1974 if not format_modified # b, w
8326b00a 1975 else lambda f: True) # b*, w*
1976 filter_f = lambda f: _filter_f(f) and (
1977 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1978 else:
b11c04a8 1979 if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension
1980 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
1981 elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension
1982 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
1983 elif format_spec in ('mhtml', ): # storyboards extension
1984 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
1985 else:
b5ae35ee 1986 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 1987
1988 def selector_function(ctx):
1989 formats = list(ctx['formats'])
909d24dd 1990 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1991 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1992 # for extractors with incomplete formats (audio only (soundcloud)
1993 # or video only (imgur)) best/worst will fallback to
1994 # best/worst {video,audio}-only format
e8e73840 1995 matches = formats
981052c9 1996 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1997 try:
e8e73840 1998 yield matches[format_idx - 1]
981052c9 1999 except IndexError:
2000 return
083c9df9 2001
67134eab 2002 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2003
317f7ab6
S
2004 def final_selector(ctx):
2005 ctx_copy = copy.deepcopy(ctx)
67134eab 2006 for _filter in filters:
317f7ab6
S
2007 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2008 return selector_function(ctx_copy)
67134eab 2009 return final_selector
083c9df9 2010
67134eab 2011 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2012 try:
232541df 2013 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2014 except tokenize.TokenError:
2015 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2016
2017 class TokenIterator(object):
2018 def __init__(self, tokens):
2019 self.tokens = tokens
2020 self.counter = 0
2021
2022 def __iter__(self):
2023 return self
2024
2025 def __next__(self):
2026 if self.counter >= len(self.tokens):
2027 raise StopIteration()
2028 value = self.tokens[self.counter]
2029 self.counter += 1
2030 return value
2031
2032 next = __next__
2033
2034 def restore_last_token(self):
2035 self.counter -= 1
2036
2037 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2038 return _build_selector_function(parsed_selector)
a9c58ad9 2039
e5660ee6
JMF
2040 def _calc_headers(self, info_dict):
2041 res = std_headers.copy()
2042
2043 add_headers = info_dict.get('http_headers')
2044 if add_headers:
2045 res.update(add_headers)
2046
2047 cookies = self._calc_cookies(info_dict)
2048 if cookies:
2049 res['Cookie'] = cookies
2050
0016b84e
S
2051 if 'X-Forwarded-For' not in res:
2052 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2053 if x_forwarded_for_ip:
2054 res['X-Forwarded-For'] = x_forwarded_for_ip
2055
e5660ee6
JMF
2056 return res
2057
2058 def _calc_cookies(self, info_dict):
5c2266df 2059 pr = sanitized_Request(info_dict['url'])
e5660ee6 2060 self.cookiejar.add_cookie_header(pr)
662435f7 2061 return pr.get_header('Cookie')
e5660ee6 2062
b0249bca 2063 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2064 thumbnails = info_dict.get('thumbnails')
2065 if thumbnails is None:
2066 thumbnail = info_dict.get('thumbnail')
2067 if thumbnail:
2068 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2069 if thumbnails:
2070 thumbnails.sort(key=lambda t: (
2071 t.get('preference') if t.get('preference') is not None else -1,
2072 t.get('width') if t.get('width') is not None else -1,
2073 t.get('height') if t.get('height') is not None else -1,
2074 t.get('id') if t.get('id') is not None else '',
2075 t.get('url')))
b0249bca 2076
0ba692ac 2077 def thumbnail_tester():
2078 if self.params.get('check_formats'):
cca80fe6 2079 test_all = True
2080 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 2081 else:
cca80fe6 2082 test_all = False
0ba692ac 2083 to_screen = self.write_debug
2084
2085 def test_thumbnail(t):
cca80fe6 2086 if not test_all and not t.get('_test_url'):
2087 return True
0ba692ac 2088 to_screen('Testing thumbnail %s' % t['id'])
2089 try:
2090 self.urlopen(HEADRequest(t['url']))
2091 except network_exceptions as err:
2092 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2093 t['id'], t['url'], error_to_compat_str(err)))
2094 return False
2095 return True
2096
2097 return test_thumbnail
b0249bca 2098
bc516a3f 2099 for i, t in enumerate(thumbnails):
bc516a3f 2100 if t.get('id') is None:
2101 t['id'] = '%d' % i
b0249bca 2102 if t.get('width') and t.get('height'):
2103 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2104 t['url'] = sanitize_url(t['url'])
0ba692ac 2105
2106 if self.params.get('check_formats') is not False:
2107 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2108 else:
2109 info_dict['thumbnails'] = thumbnails
bc516a3f 2110
dd82ffea
JMF
2111 def process_video_result(self, info_dict, download=True):
2112 assert info_dict.get('_type', 'video') == 'video'
2113
bec1fad2
PH
2114 if 'id' not in info_dict:
2115 raise ExtractorError('Missing "id" field in extractor result')
2116 if 'title' not in info_dict:
1151c407 2117 raise ExtractorError('Missing "title" field in extractor result',
2118 video_id=info_dict['id'], ie=info_dict['extractor'])
bec1fad2 2119
c9969434
S
2120 def report_force_conversion(field, field_not, conversion):
2121 self.report_warning(
2122 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2123 % (field, field_not, conversion))
2124
2125 def sanitize_string_field(info, string_field):
2126 field = info.get(string_field)
2127 if field is None or isinstance(field, compat_str):
2128 return
2129 report_force_conversion(string_field, 'a string', 'string')
2130 info[string_field] = compat_str(field)
2131
2132 def sanitize_numeric_fields(info):
2133 for numeric_field in self._NUMERIC_FIELDS:
2134 field = info.get(numeric_field)
2135 if field is None or isinstance(field, compat_numeric_types):
2136 continue
2137 report_force_conversion(numeric_field, 'numeric', 'int')
2138 info[numeric_field] = int_or_none(field)
2139
2140 sanitize_string_field(info_dict, 'id')
2141 sanitize_numeric_fields(info_dict)
be6217b2 2142
dd82ffea
JMF
2143 if 'playlist' not in info_dict:
2144 # It isn't part of a playlist
2145 info_dict['playlist'] = None
2146 info_dict['playlist_index'] = None
2147
bc516a3f 2148 self._sanitize_thumbnails(info_dict)
d5519808 2149
536a55da 2150 thumbnail = info_dict.get('thumbnail')
bc516a3f 2151 thumbnails = info_dict.get('thumbnails')
536a55da
S
2152 if thumbnail:
2153 info_dict['thumbnail'] = sanitize_url(thumbnail)
2154 elif thumbnails:
d5519808
PH
2155 info_dict['thumbnail'] = thumbnails[-1]['url']
2156
ae30b840 2157 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2158 info_dict['display_id'] = info_dict['id']
2159
10db0d2f 2160 for ts_key, date_key in (
2161 ('timestamp', 'upload_date'),
2162 ('release_timestamp', 'release_date'),
2163 ):
2164 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2165 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2166 # see http://bugs.python.org/issue1646728)
2167 try:
2168 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2169 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2170 except (ValueError, OverflowError, OSError):
2171 pass
9d2ecdbc 2172
ae30b840 2173 live_keys = ('is_live', 'was_live')
2174 live_status = info_dict.get('live_status')
2175 if live_status is None:
2176 for key in live_keys:
2177 if info_dict.get(key) is False:
2178 continue
2179 if info_dict.get(key):
2180 live_status = key
2181 break
2182 if all(info_dict.get(key) is False for key in live_keys):
2183 live_status = 'not_live'
2184 if live_status:
2185 info_dict['live_status'] = live_status
2186 for key in live_keys:
2187 if info_dict.get(key) is None:
2188 info_dict[key] = (live_status == key)
2189
33d2fc2f
S
2190 # Auto generate title fields corresponding to the *_number fields when missing
2191 # in order to always have clean titles. This is very common for TV series.
2192 for field in ('chapter', 'season', 'episode'):
2193 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2194 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2195
05108a49
S
2196 for cc_kind in ('subtitles', 'automatic_captions'):
2197 cc = info_dict.get(cc_kind)
2198 if cc:
2199 for _, subtitle in cc.items():
2200 for subtitle_format in subtitle:
2201 if subtitle_format.get('url'):
2202 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2203 if subtitle_format.get('ext') is None:
2204 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2205
2206 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2207 subtitles = info_dict.get('subtitles')
4bba3716 2208
360e1ca5 2209 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2210 info_dict['id'], subtitles, automatic_captions)
a504ced0 2211
dd82ffea
JMF
2212 # We now pick which formats have to be downloaded
2213 if info_dict.get('formats') is None:
2214 # There's only one format available
2215 formats = [info_dict]
2216 else:
2217 formats = info_dict['formats']
2218
e0493e90 2219 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2220 if not self.params.get('allow_unplayable_formats'):
2221 formats = [f for f in formats if not f.get('has_drm')]
88acdbc2 2222
db95dc13 2223 if not formats:
1151c407 2224 self.raise_no_formats(info_dict)
db95dc13 2225
73af5cc8
S
2226 def is_wellformed(f):
2227 url = f.get('url')
a5ac0c47 2228 if not url:
73af5cc8
S
2229 self.report_warning(
2230 '"url" field is missing or empty - skipping format, '
2231 'there is an error in extractor')
a5ac0c47
S
2232 return False
2233 if isinstance(url, bytes):
2234 sanitize_string_field(f, 'url')
2235 return True
73af5cc8
S
2236
2237 # Filter out malformed formats for better extraction robustness
2238 formats = list(filter(is_wellformed, formats))
2239
181c7053
S
2240 formats_dict = {}
2241
dd82ffea 2242 # We check that all the formats have the format and format_id fields
db95dc13 2243 for i, format in enumerate(formats):
c9969434
S
2244 sanitize_string_field(format, 'format_id')
2245 sanitize_numeric_fields(format)
dcf77cf1 2246 format['url'] = sanitize_url(format['url'])
e74e3b63 2247 if not format.get('format_id'):
8016c922 2248 format['format_id'] = compat_str(i)
e2effb08
S
2249 else:
2250 # Sanitize format_id from characters used in format selector expression
ec85ded8 2251 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2252 format_id = format['format_id']
2253 if format_id not in formats_dict:
2254 formats_dict[format_id] = []
2255 formats_dict[format_id].append(format)
2256
2257 # Make sure all formats have unique format_id
2258 for format_id, ambiguous_formats in formats_dict.items():
2259 if len(ambiguous_formats) > 1:
2260 for i, format in enumerate(ambiguous_formats):
2261 format['format_id'] = '%s-%d' % (format_id, i)
2262
2263 for i, format in enumerate(formats):
8c51aa65 2264 if format.get('format') is None:
6febd1c1 2265 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2266 id=format['format_id'],
2267 res=self.format_resolution(format),
b868936c 2268 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2269 )
c1002e96 2270 # Automatically determine file extension if missing
5b1d8575 2271 if format.get('ext') is None:
cce929ea 2272 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2273 # Automatically determine protocol if missing (useful for format
2274 # selection purposes)
6f0be937 2275 if format.get('protocol') is None:
b5559424 2276 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2277 # Add HTTP headers, so that external programs can use them from the
2278 # json output
2279 full_format_info = info_dict.copy()
2280 full_format_info.update(format)
2281 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2282 # Remove private housekeeping stuff
2283 if '__x_forwarded_for_ip' in info_dict:
2284 del info_dict['__x_forwarded_for_ip']
dd82ffea 2285
4bcc7bd1 2286 # TODO Central sorting goes here
99e206d5 2287
88acdbc2 2288 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2289 # only set the 'formats' fields if the original info_dict list them
2290 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2291 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2292 # which can't be exported to json
b3d9ef88 2293 info_dict['formats'] = formats
4ec82a72 2294
2295 info_dict, _ = self.pre_process(info_dict)
2296
b7b04c78 2297 if self.params.get('list_thumbnails'):
2298 self.list_thumbnails(info_dict)
2299 if self.params.get('listformats'):
86c66b2d 2300 if not info_dict.get('formats') and not info_dict.get('url'):
88acdbc2 2301 self.to_screen('%s has no formats' % info_dict['id'])
2302 else:
2303 self.list_formats(info_dict)
b7b04c78 2304 if self.params.get('listsubtitles'):
2305 if 'automatic_captions' in info_dict:
2306 self.list_subtitles(
2307 info_dict['id'], automatic_captions, 'automatic captions')
2308 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2309 list_only = self.params.get('simulate') is None and (
2310 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
169dbde9 2311 if list_only:
b7b04c78 2312 # Without this printing, -F --print-json will not work
169dbde9 2313 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2314 return
2315
187986a8 2316 format_selector = self.format_selector
2317 if format_selector is None:
0017d9ad 2318 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2319 self.write_debug('Default format spec: %s' % req_format)
187986a8 2320 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2321
2322 # While in format selection we may need to have an access to the original
2323 # format set in order to calculate some metrics or do some processing.
2324 # For now we need to be able to guess whether original formats provided
2325 # by extractor are incomplete or not (i.e. whether extractor provides only
2326 # video-only or audio-only formats) for proper formats selection for
2327 # extractors with such incomplete formats (see
067aa17e 2328 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2329 # Since formats may be filtered during format selection and may not match
2330 # the original formats the results may be incorrect. Thus original formats
2331 # or pre-calculated metrics should be passed to format selection routines
2332 # as well.
2333 # We will pass a context object containing all necessary additional data
2334 # instead of just formats.
2335 # This fixes incorrect format selection issue (see
067aa17e 2336 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2337 incomplete_formats = (
317f7ab6 2338 # All formats are video-only or
3089bc74 2339 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2340 # all formats are audio-only
3089bc74 2341 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2342
2343 ctx = {
2344 'formats': formats,
2345 'incomplete_formats': incomplete_formats,
2346 }
2347
2348 formats_to_download = list(format_selector(ctx))
dd82ffea 2349 if not formats_to_download:
b7da73eb 2350 if not self.params.get('ignore_no_formats_error'):
1151c407 2351 raise ExtractorError('Requested format is not available', expected=True,
2352 video_id=info_dict['id'], ie=info_dict['extractor'])
b7da73eb 2353 else:
2354 self.report_warning('Requested format is not available')
4513a41a
A
2355 # Process what we can, even without any available formats.
2356 self.process_info(dict(info_dict))
b7da73eb 2357 elif download:
2358 self.to_screen(
07cce701 2359 '[info] %s: Downloading %d format(s): %s' % (
2360 info_dict['id'], len(formats_to_download),
2361 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2362 for fmt in formats_to_download:
dd82ffea 2363 new_info = dict(info_dict)
4ec82a72 2364 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2365 new_info['__original_infodict'] = info_dict
b7da73eb 2366 new_info.update(fmt)
dd82ffea
JMF
2367 self.process_info(new_info)
2368 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2369 if formats_to_download:
2370 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2371 return info_dict
2372
98c70d6f 2373 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2374 """Select the requested subtitles and their format"""
98c70d6f
JMF
2375 available_subs = {}
2376 if normal_subtitles and self.params.get('writesubtitles'):
2377 available_subs.update(normal_subtitles)
2378 if automatic_captions and self.params.get('writeautomaticsub'):
2379 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2380 if lang not in available_subs:
2381 available_subs[lang] = cap_info
2382
4d171848
JMF
2383 if (not self.params.get('writesubtitles') and not
2384 self.params.get('writeautomaticsub') or not
2385 available_subs):
2386 return None
a504ced0 2387
c32b0aab 2388 all_sub_langs = available_subs.keys()
a504ced0 2389 if self.params.get('allsubtitles', False):
c32b0aab 2390 requested_langs = all_sub_langs
2391 elif self.params.get('subtitleslangs', False):
77c4a9ef 2392 # A list is used so that the order of languages will be the same as
2393 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2394 requested_langs = []
2395 for lang_re in self.params.get('subtitleslangs'):
2396 if lang_re == 'all':
2397 requested_langs.extend(all_sub_langs)
c32b0aab 2398 continue
77c4a9ef 2399 discard = lang_re[0] == '-'
c32b0aab 2400 if discard:
77c4a9ef 2401 lang_re = lang_re[1:]
2402 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2403 if discard:
2404 for lang in current_langs:
77c4a9ef 2405 while lang in requested_langs:
2406 requested_langs.remove(lang)
c32b0aab 2407 else:
77c4a9ef 2408 requested_langs.extend(current_langs)
2409 requested_langs = orderedSet(requested_langs)
c32b0aab 2410 elif 'en' in available_subs:
2411 requested_langs = ['en']
a504ced0 2412 else:
c32b0aab 2413 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2414 if requested_langs:
2415 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2416
2417 formats_query = self.params.get('subtitlesformat', 'best')
2418 formats_preference = formats_query.split('/') if formats_query else []
2419 subs = {}
2420 for lang in requested_langs:
2421 formats = available_subs.get(lang)
2422 if formats is None:
2423 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2424 continue
a504ced0
JMF
2425 for ext in formats_preference:
2426 if ext == 'best':
2427 f = formats[-1]
2428 break
2429 matches = list(filter(lambda f: f['ext'] == ext, formats))
2430 if matches:
2431 f = matches[-1]
2432 break
2433 else:
2434 f = formats[-1]
2435 self.report_warning(
2436 'No subtitle format found matching "%s" for language %s, '
2437 'using %s' % (formats_query, lang, f['ext']))
2438 subs[lang] = f
2439 return subs
2440
d06daf23 2441 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2442 def print_mandatory(field, actual_field=None):
2443 if actual_field is None:
2444 actual_field = field
d06daf23 2445 if (self.params.get('force%s' % field, False)
53c18592 2446 and (not incomplete or info_dict.get(actual_field) is not None)):
2447 self.to_stdout(info_dict[actual_field])
d06daf23
S
2448
2449 def print_optional(field):
2450 if (self.params.get('force%s' % field, False)
2451 and info_dict.get(field) is not None):
2452 self.to_stdout(info_dict[field])
2453
53c18592 2454 info_dict = info_dict.copy()
2455 if filename is not None:
2456 info_dict['filename'] = filename
2457 if info_dict.get('requested_formats') is not None:
2458 # For RTMP URLs, also include the playpath
2459 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2460 elif 'url' in info_dict:
2461 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2462
2b8a2973 2463 if self.params.get('forceprint') or self.params.get('forcejson'):
2464 self.post_extract(info_dict)
53c18592 2465 for tmpl in self.params.get('forceprint', []):
b5ae35ee 2466 mobj = re.match(r'\w+(=?)$', tmpl)
2467 if mobj and mobj.group(1):
2468 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2469 elif mobj:
2470 tmpl = '%({})s'.format(tmpl)
2471 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
53c18592 2472
d06daf23
S
2473 print_mandatory('title')
2474 print_mandatory('id')
53c18592 2475 print_mandatory('url', 'urls')
d06daf23
S
2476 print_optional('thumbnail')
2477 print_optional('description')
53c18592 2478 print_optional('filename')
b868936c 2479 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2480 self.to_stdout(formatSeconds(info_dict['duration']))
2481 print_mandatory('format')
53c18592 2482
2b8a2973 2483 if self.params.get('forcejson'):
6e84b215 2484 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2485
e8e73840 2486 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2487 if not info.get('url'):
1151c407 2488 self.raise_no_formats(info, True)
e8e73840 2489
2490 if test:
2491 verbose = self.params.get('verbose')
2492 params = {
2493 'test': True,
a169858f 2494 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2495 'verbose': verbose,
2496 'noprogress': not verbose,
2497 'nopart': True,
2498 'skip_unavailable_fragments': False,
2499 'keep_fragments': False,
2500 'overwrites': True,
2501 '_no_ytdl_file': True,
2502 }
2503 else:
2504 params = self.params
96fccc10 2505 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2506 if not test:
2507 for ph in self._progress_hooks:
2508 fd.add_progress_hook(ph)
18e674b4 2509 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2510 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2511 new_info = dict(info)
2512 if new_info.get('http_headers') is None:
2513 new_info['http_headers'] = self._calc_headers(new_info)
2514 return fd.download(name, new_info, subtitle)
2515
8222d8de
JMF
2516 def process_info(self, info_dict):
2517 """Process a single resolved IE result."""
2518
2519 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
2520
2521 max_downloads = self.params.get('max_downloads')
2522 if max_downloads is not None:
2523 if self._num_downloads >= int(max_downloads):
2524 raise MaxDownloadsReached()
8222d8de 2525
d06daf23 2526 # TODO: backward compatibility, to be removed
8222d8de 2527 info_dict['fulltitle'] = info_dict['title']
8222d8de 2528
4513a41a 2529 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2530 info_dict['format'] = info_dict['ext']
2531
c77495e3 2532 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2533 return
2534
277d6ff5 2535 self.post_extract(info_dict)
fd288278 2536 self._num_downloads += 1
8222d8de 2537
dcf64d43 2538 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2539 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2540 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2541 files_to_move = {}
8222d8de
JMF
2542
2543 # Forced printings
4513a41a 2544 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2545
b7b04c78 2546 if self.params.get('simulate'):
2d30509f 2547 if self.params.get('force_write_download_archive', False):
2548 self.record_download_archive(info_dict)
2d30509f 2549 # Do nothing else if in simulate mode
8222d8de
JMF
2550 return
2551
de6000d9 2552 if full_filename is None:
8222d8de 2553 return
e92caff5 2554 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2555 return
e92caff5 2556 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2557 return
2558
80c03fa9 2559 if self._write_description('video', info_dict,
2560 self.prepare_filename(info_dict, 'description')) is None:
2561 return
2562
2563 sub_files = self._write_subtitles(info_dict, temp_filename)
2564 if sub_files is None:
2565 return
2566 files_to_move.update(dict(sub_files))
2567
2568 thumb_files = self._write_thumbnails(
2569 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2570 if thumb_files is None:
2571 return
2572 files_to_move.update(dict(thumb_files))
8222d8de 2573
80c03fa9 2574 infofn = self.prepare_filename(info_dict, 'infojson')
2575 _infojson_written = self._write_info_json('video', info_dict, infofn)
2576 if _infojson_written:
2577 info_dict['__infojson_filename'] = infofn
2578 elif _infojson_written is None:
2579 return
2580
2581 # Note: Annotations are deprecated
2582 annofn = None
1fb07d10 2583 if self.params.get('writeannotations', False):
de6000d9 2584 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2585 if annofn:
e92caff5 2586 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2587 return
0c3d0f51 2588 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2589 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2590 elif not info_dict.get('annotations'):
2591 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2592 else:
2593 try:
6febd1c1 2594 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2595 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2596 annofile.write(info_dict['annotations'])
2597 except (KeyError, TypeError):
6febd1c1 2598 self.report_warning('There are no annotations to write.')
7b6fefc9 2599 except (OSError, IOError):
6febd1c1 2600 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2601 return
1fb07d10 2602
732044af 2603 # Write internet shortcut files
2604 url_link = webloc_link = desktop_link = False
2605 if self.params.get('writelink', False):
2606 if sys.platform == "darwin": # macOS.
2607 webloc_link = True
2608 elif sys.platform.startswith("linux"):
2609 desktop_link = True
2610 else: # if sys.platform in ['win32', 'cygwin']:
2611 url_link = True
2612 if self.params.get('writeurllink', False):
2613 url_link = True
2614 if self.params.get('writewebloclink', False):
2615 webloc_link = True
2616 if self.params.get('writedesktoplink', False):
2617 desktop_link = True
2618
2619 if url_link or webloc_link or desktop_link:
2620 if 'webpage_url' not in info_dict:
2621 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2622 return
2623 ascii_url = iri_to_uri(info_dict['webpage_url'])
2624
2625 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2626 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2627 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2628 self.to_screen('[info] Internet shortcut is already present')
2629 else:
2630 try:
2631 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2632 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2633 template_vars = {'url': ascii_url}
2634 if embed_filename:
2635 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2636 linkfile.write(template % template_vars)
2637 except (OSError, IOError):
2638 self.report_error('Cannot write internet shortcut ' + linkfn)
2639 return False
2640 return True
2641
2642 if url_link:
2643 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2644 return
2645 if webloc_link:
2646 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2647 return
2648 if desktop_link:
2649 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2650 return
2651
56d868db 2652 try:
2653 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2654 except PostProcessingError as err:
2655 self.report_error('Preprocessing: %s' % str(err))
2656 return
2657
732044af 2658 must_record_download_archive = False
56d868db 2659 if self.params.get('skip_download', False):
2660 info_dict['filepath'] = temp_filename
2661 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2662 info_dict['__files_to_move'] = files_to_move
2663 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2664 else:
2665 # Download
b868936c 2666 info_dict.setdefault('__postprocessors', [])
4340deca 2667 try:
0202b52a 2668
6b591b29 2669 def existing_file(*filepaths):
2670 ext = info_dict.get('ext')
2671 final_ext = self.params.get('final_ext', ext)
2672 existing_files = []
2673 for file in orderedSet(filepaths):
2674 if final_ext != ext:
2675 converted = replace_extension(file, final_ext, ext)
2676 if os.path.exists(encodeFilename(converted)):
2677 existing_files.append(converted)
2678 if os.path.exists(encodeFilename(file)):
2679 existing_files.append(file)
2680
2681 if not existing_files or self.params.get('overwrites', False):
2682 for file in orderedSet(existing_files):
2683 self.report_file_delete(file)
2684 os.remove(encodeFilename(file))
2685 return None
2686
6b591b29 2687 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2688 return existing_files[0]
0202b52a 2689
2690 success = True
4340deca 2691 if info_dict.get('requested_formats') is not None:
81cd954a
S
2692
2693 def compatible_formats(formats):
d03cfdce 2694 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2695 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2696 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2697 if len(video_formats) > 2 or len(audio_formats) > 2:
2698 return False
2699
81cd954a 2700 # Check extension
d03cfdce 2701 exts = set(format.get('ext') for format in formats)
2702 COMPATIBLE_EXTS = (
2703 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2704 set(('webm',)),
2705 )
2706 for ext_sets in COMPATIBLE_EXTS:
2707 if ext_sets.issuperset(exts):
2708 return True
81cd954a
S
2709 # TODO: Check acodec/vcodec
2710 return False
2711
2712 requested_formats = info_dict['requested_formats']
0202b52a 2713 old_ext = info_dict['ext']
4e3b637d 2714 if self.params.get('merge_output_format') is None:
2715 if not compatible_formats(requested_formats):
2716 info_dict['ext'] = 'mkv'
2717 self.report_warning(
2718 'Requested formats are incompatible for merge and will be merged into mkv')
2719 if (info_dict['ext'] == 'webm'
2720 and info_dict.get('thumbnails')
2721 # check with type instead of pp_key, __name__, or isinstance
2722 # since we dont want any custom PPs to trigger this
2723 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2724 info_dict['ext'] = 'mkv'
2725 self.report_warning(
2726 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 2727 new_ext = info_dict['ext']
0202b52a 2728
124bc071 2729 def correct_ext(filename, ext=new_ext):
96fccc10 2730 if filename == '-':
2731 return filename
0202b52a 2732 filename_real_ext = os.path.splitext(filename)[1][1:]
2733 filename_wo_ext = (
2734 os.path.splitext(filename)[0]
124bc071 2735 if filename_real_ext in (old_ext, new_ext)
0202b52a 2736 else filename)
124bc071 2737 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 2738
38c6902b 2739 # Ensure filename always has a correct extension for successful merge
0202b52a 2740 full_filename = correct_ext(full_filename)
2741 temp_filename = correct_ext(temp_filename)
2742 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2743 info_dict['__real_download'] = False
18e674b4 2744
dbf5416a 2745 if dl_filename is not None:
6c7274ec 2746 self.report_file_already_downloaded(dl_filename)
c111cefa 2747 elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
dbf5416a 2748 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2749 success, real_download = self.dl(temp_filename, info_dict)
2750 info_dict['__real_download'] = real_download
18e674b4 2751 else:
2752 downloaded = []
2753 merger = FFmpegMergerPP(self)
2754 if self.params.get('allow_unplayable_formats'):
2755 self.report_warning(
2756 'You have requested merging of multiple formats '
2757 'while also allowing unplayable formats to be downloaded. '
2758 'The formats won\'t be merged to prevent data corruption.')
2759 elif not merger.available:
2760 self.report_warning(
2761 'You have requested merging of multiple formats but ffmpeg is not installed. '
2762 'The formats won\'t be merged.')
2763
96fccc10 2764 if temp_filename == '-':
c111cefa 2765 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
96fccc10 2766 else 'but the formats are incompatible for simultaneous download' if merger.available
2767 else 'but ffmpeg is not installed')
2768 self.report_warning(
2769 f'You have requested downloading multiple formats to stdout {reason}. '
2770 'The formats will be streamed one after the other')
2771 fname = temp_filename
dbf5416a 2772 for f in requested_formats:
2773 new_info = dict(info_dict)
2774 del new_info['requested_formats']
2775 new_info.update(f)
96fccc10 2776 if temp_filename != '-':
124bc071 2777 fname = prepend_extension(
2778 correct_ext(temp_filename, new_info['ext']),
2779 'f%s' % f['format_id'], new_info['ext'])
96fccc10 2780 if not self._ensure_dir_exists(fname):
2781 return
a21e0ab1 2782 f['filepath'] = fname
96fccc10 2783 downloaded.append(fname)
dbf5416a 2784 partial_success, real_download = self.dl(fname, new_info)
2785 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2786 success = success and partial_success
2787 if merger.available and not self.params.get('allow_unplayable_formats'):
2788 info_dict['__postprocessors'].append(merger)
2789 info_dict['__files_to_merge'] = downloaded
2790 # Even if there were no downloads, it is being merged only now
2791 info_dict['__real_download'] = True
2792 else:
2793 for file in downloaded:
2794 files_to_move[file] = None
4340deca
P
2795 else:
2796 # Just a single file
0202b52a 2797 dl_filename = existing_file(full_filename, temp_filename)
6c7274ec 2798 if dl_filename is None or dl_filename == temp_filename:
2799 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2800 # So we should try to resume the download
e8e73840 2801 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2802 info_dict['__real_download'] = real_download
6c7274ec 2803 else:
2804 self.report_file_already_downloaded(dl_filename)
0202b52a 2805
0202b52a 2806 dl_filename = dl_filename or temp_filename
c571435f 2807 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2808
3158150c 2809 except network_exceptions as err:
7960b056 2810 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2811 return
2812 except (OSError, IOError) as err:
2813 raise UnavailableVideoError(err)
2814 except (ContentTooShortError, ) as err:
2815 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2816 return
8222d8de 2817
de6000d9 2818 if success and full_filename != '-':
f17f8651 2819
fd7cfb64 2820 def fixup():
2821 do_fixup = True
2822 fixup_policy = self.params.get('fixup')
2823 vid = info_dict['id']
2824
2825 if fixup_policy in ('ignore', 'never'):
2826 return
2827 elif fixup_policy == 'warn':
2828 do_fixup = False
f89b3e2d 2829 elif fixup_policy != 'force':
2830 assert fixup_policy in ('detect_or_warn', None)
2831 if not info_dict.get('__real_download'):
2832 do_fixup = False
fd7cfb64 2833
2834 def ffmpeg_fixup(cndn, msg, cls):
2835 if not cndn:
2836 return
2837 if not do_fixup:
2838 self.report_warning(f'{vid}: {msg}')
2839 return
2840 pp = cls(self)
2841 if pp.available:
2842 info_dict['__postprocessors'].append(pp)
2843 else:
2844 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2845
2846 stretched_ratio = info_dict.get('stretched_ratio')
2847 ffmpeg_fixup(
2848 stretched_ratio not in (1, None),
2849 f'Non-uniform pixel ratio {stretched_ratio}',
2850 FFmpegFixupStretchedPP)
2851
2852 ffmpeg_fixup(
2853 (info_dict.get('requested_formats') is None
2854 and info_dict.get('container') == 'm4a_dash'
2855 and info_dict.get('ext') == 'm4a'),
2856 'writing DASH m4a. Only some players support this container',
2857 FFmpegFixupM4aPP)
2858
2859 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2860 if 'protocol' in info_dict else None)
84726743 2861 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2862 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2863 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2864 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2865
2866 fixup()
8222d8de 2867 try:
23c1a667 2868 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2869 except PostProcessingError as err:
2870 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2871 return
ab8e5e51
AM
2872 try:
2873 for ph in self._post_hooks:
23c1a667 2874 ph(info_dict['filepath'])
ab8e5e51
AM
2875 except Exception as err:
2876 self.report_error('post hooks: %s' % str(err))
2877 return
2d30509f 2878 must_record_download_archive = True
2879
2880 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2881 self.record_download_archive(info_dict)
c3e6ffba 2882 max_downloads = self.params.get('max_downloads')
2883 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2884 raise MaxDownloadsReached()
8222d8de
JMF
2885
2886 def download(self, url_list):
2887 """Download a given list of URLs."""
de6000d9 2888 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2889 if (len(url_list) > 1
2890 and outtmpl != '-'
2891 and '%' not in outtmpl
2892 and self.params.get('max_downloads') != 1):
acd69589 2893 raise SameFileError(outtmpl)
8222d8de
JMF
2894
2895 for url in url_list:
2896 try:
5f6a1245 2897 # It also downloads the videos
61aa5ba3
S
2898 res = self.extract_info(
2899 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2900 except UnavailableVideoError:
6febd1c1 2901 self.report_error('unable to download video')
8222d8de 2902 except MaxDownloadsReached:
8f18aca8 2903 self.to_screen('[info] Maximum number of downloads reached')
8b0d7497 2904 raise
2905 except ExistingVideoReached:
8f18aca8 2906 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2907 raise
2908 except RejectedVideoReached:
8f18aca8 2909 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
8222d8de 2910 raise
63e0be34
PH
2911 else:
2912 if self.params.get('dump_single_json', False):
277d6ff5 2913 self.post_extract(res)
6e84b215 2914 self.to_stdout(json.dumps(self.sanitize_info(res)))
8222d8de
JMF
2915
2916 return self._download_retcode
2917
1dcc4c0c 2918 def download_with_info_file(self, info_filename):
31bd3925
JMF
2919 with contextlib.closing(fileinput.FileInput(
2920 [info_filename], mode='r',
2921 openhook=fileinput.hook_encoded('utf-8'))) as f:
2922 # FileInput doesn't have a read method, we can't call json.load
8012d892 2923 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2924 try:
2925 self.process_ie_result(info, download=True)
d3f62c19 2926 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2927 webpage_url = info.get('webpage_url')
2928 if webpage_url is not None:
6febd1c1 2929 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2930 return self.download([webpage_url])
2931 else:
2932 raise
2933 return self._download_retcode
1dcc4c0c 2934
cb202fd2 2935 @staticmethod
8012d892 2936 def sanitize_info(info_dict, remove_private_keys=False):
2937 ''' Sanitize the infodict for converting to json '''
3ad56b42 2938 if info_dict is None:
2939 return info_dict
6e84b215 2940 info_dict.setdefault('epoch', int(time.time()))
2941 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
ae8f99e6 2942 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
8012d892 2943 if remove_private_keys:
6e84b215 2944 remove_keys |= {
2945 'requested_formats', 'requested_subtitles', 'requested_entries',
2946 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2947 }
ae8f99e6 2948 empty_values = (None, {}, [], set(), tuple())
2949 reject = lambda k, v: k not in keep_keys and (
2950 k.startswith('_') or k in remove_keys or v in empty_values)
2951 else:
ae8f99e6 2952 reject = lambda k, v: k in remove_keys
5226731e 2953 filter_fn = lambda obj: (
b0249bca 2954 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2955 else obj if not isinstance(obj, dict)
ae8f99e6 2956 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2957 return filter_fn(info_dict)
cb202fd2 2958
8012d892 2959 @staticmethod
2960 def filter_requested_info(info_dict, actually_filter=True):
2961 ''' Alias of sanitize_info for backward compatibility '''
2962 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2963
dcf64d43 2964 def run_pp(self, pp, infodict):
5bfa4862 2965 files_to_delete = []
dcf64d43 2966 if '__files_to_move' not in infodict:
2967 infodict['__files_to_move'] = {}
b1940459 2968 try:
2969 files_to_delete, infodict = pp.run(infodict)
2970 except PostProcessingError as e:
2971 # Must be True and not 'only_download'
2972 if self.params.get('ignoreerrors') is True:
2973 self.report_error(e)
2974 return infodict
2975 raise
2976
5bfa4862 2977 if not files_to_delete:
dcf64d43 2978 return infodict
5bfa4862 2979 if self.params.get('keepvideo', False):
2980 for f in files_to_delete:
dcf64d43 2981 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2982 else:
2983 for old_filename in set(files_to_delete):
2984 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2985 try:
2986 os.remove(encodeFilename(old_filename))
2987 except (IOError, OSError):
2988 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2989 if old_filename in infodict['__files_to_move']:
2990 del infodict['__files_to_move'][old_filename]
2991 return infodict
5bfa4862 2992
277d6ff5 2993 @staticmethod
2994 def post_extract(info_dict):
2995 def actual_post_extract(info_dict):
2996 if info_dict.get('_type') in ('playlist', 'multi_video'):
2997 for video_dict in info_dict.get('entries', {}):
b050d210 2998 actual_post_extract(video_dict or {})
277d6ff5 2999 return
3000
07cce701 3001 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 3002 extra = post_extractor().items()
3003 info_dict.update(extra)
07cce701 3004 info_dict.pop('__post_extractor', None)
277d6ff5 3005
4ec82a72 3006 original_infodict = info_dict.get('__original_infodict') or {}
3007 original_infodict.update(extra)
3008 original_infodict.pop('__post_extractor', None)
3009
b050d210 3010 actual_post_extract(info_dict or {})
277d6ff5 3011
56d868db 3012 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3013 info = dict(ie_info)
56d868db 3014 info['__files_to_move'] = files_to_move or {}
3015 for pp in self._pps[key]:
dcf64d43 3016 info = self.run_pp(pp, info)
56d868db 3017 return info, info.pop('__files_to_move', None)
5bfa4862 3018
dcf64d43 3019 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
3020 """Run all the postprocessors on the given file."""
3021 info = dict(ie_info)
3022 info['filepath'] = filename
dcf64d43 3023 info['__files_to_move'] = files_to_move or {}
0202b52a 3024
56d868db 3025 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 3026 info = self.run_pp(pp, info)
3027 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3028 del info['__files_to_move']
56d868db 3029 for pp in self._pps['after_move']:
dcf64d43 3030 info = self.run_pp(pp, info)
23c1a667 3031 return info
c1c9a79c 3032
5db07df6 3033 def _make_archive_id(self, info_dict):
e9fef7ee
S
3034 video_id = info_dict.get('id')
3035 if not video_id:
3036 return
5db07df6
PH
3037 # Future-proof against any change in case
3038 # and backwards compatibility with prior versions
e9fef7ee 3039 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3040 if extractor is None:
1211bb6d
S
3041 url = str_or_none(info_dict.get('url'))
3042 if not url:
3043 return
e9fef7ee 3044 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3045 for ie_key, ie in self._ies.items():
1211bb6d 3046 if ie.suitable(url):
8b7491c8 3047 extractor = ie_key
e9fef7ee
S
3048 break
3049 else:
3050 return
d0757229 3051 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3052
3053 def in_download_archive(self, info_dict):
3054 fn = self.params.get('download_archive')
3055 if fn is None:
3056 return False
3057
3058 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3059 if not vid_id:
7012b23c 3060 return False # Incomplete video information
5db07df6 3061
a45e8619 3062 return vid_id in self.archive
c1c9a79c
PH
3063
3064 def record_download_archive(self, info_dict):
3065 fn = self.params.get('download_archive')
3066 if fn is None:
3067 return
5db07df6
PH
3068 vid_id = self._make_archive_id(info_dict)
3069 assert vid_id
c1c9a79c 3070 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3071 archive_file.write(vid_id + '\n')
a45e8619 3072 self.archive.add(vid_id)
dd82ffea 3073
8c51aa65 3074 @staticmethod
8abeeb94 3075 def format_resolution(format, default='unknown'):
a903d828 3076 is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
9359f3d4 3077 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3078 return 'audio only'
f49d89ee
PH
3079 if format.get('resolution') is not None:
3080 return format['resolution']
35615307
DA
3081 if format.get('width') and format.get('height'):
3082 res = '%dx%d' % (format['width'], format['height'])
3083 elif format.get('height'):
3084 res = '%sp' % format['height']
3085 elif format.get('width'):
388ae76b 3086 res = '%dx?' % format['width']
a903d828 3087 elif is_images:
3088 return 'images'
8c51aa65 3089 else:
a903d828 3090 return default
3091 return f'{res} images' if is_images else res
8c51aa65 3092
c57f7757
PH
3093 def _format_note(self, fdict):
3094 res = ''
3095 if fdict.get('ext') in ['f4f', 'f4m']:
3096 res += '(unsupported) '
32f90364
PH
3097 if fdict.get('language'):
3098 if res:
3099 res += ' '
9016d76f 3100 res += '[%s] ' % fdict['language']
c57f7757
PH
3101 if fdict.get('format_note') is not None:
3102 res += fdict['format_note'] + ' '
3103 if fdict.get('tbr') is not None:
3104 res += '%4dk ' % fdict['tbr']
3105 if fdict.get('container') is not None:
3106 if res:
3107 res += ', '
3108 res += '%s container' % fdict['container']
3089bc74
S
3109 if (fdict.get('vcodec') is not None
3110 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3111 if res:
3112 res += ', '
3113 res += fdict['vcodec']
91c7271a 3114 if fdict.get('vbr') is not None:
c57f7757
PH
3115 res += '@'
3116 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3117 res += 'video@'
3118 if fdict.get('vbr') is not None:
3119 res += '%4dk' % fdict['vbr']
fbb21cf5 3120 if fdict.get('fps') is not None:
5d583bdf
S
3121 if res:
3122 res += ', '
3123 res += '%sfps' % fdict['fps']
c57f7757
PH
3124 if fdict.get('acodec') is not None:
3125 if res:
3126 res += ', '
3127 if fdict['acodec'] == 'none':
3128 res += 'video only'
3129 else:
3130 res += '%-5s' % fdict['acodec']
3131 elif fdict.get('abr') is not None:
3132 if res:
3133 res += ', '
3134 res += 'audio'
3135 if fdict.get('abr') is not None:
3136 res += '@%3dk' % fdict['abr']
3137 if fdict.get('asr') is not None:
3138 res += ' (%5dHz)' % fdict['asr']
3139 if fdict.get('filesize') is not None:
3140 if res:
3141 res += ', '
3142 res += format_bytes(fdict['filesize'])
9732d77e
PH
3143 elif fdict.get('filesize_approx') is not None:
3144 if res:
3145 res += ', '
3146 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3147 return res
91c7271a 3148
c57f7757 3149 def list_formats(self, info_dict):
94badb25 3150 formats = info_dict.get('formats', [info_dict])
53ed7066 3151 new_format = (
3152 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3153 and self.params.get('listformats_table', True) is not False)
76d321f6 3154 if new_format:
3155 table = [
3156 [
3157 format_field(f, 'format_id'),
3158 format_field(f, 'ext'),
3159 self.format_resolution(f),
3160 format_field(f, 'fps', '%d'),
3161 '|',
3162 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3163 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3164 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3165 '|',
3166 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3167 format_field(f, 'vbr', '%4dk'),
3168 format_field(f, 'acodec', default='unknown').replace('none', ''),
3169 format_field(f, 'abr', '%3dk'),
3170 format_field(f, 'asr', '%5dHz'),
3f698246 3171 ', '.join(filter(None, (
3172 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3173 format_field(f, 'language', '[%s]'),
3174 format_field(f, 'format_note'),
3175 format_field(f, 'container', ignore=(None, f.get('ext'))),
ea05b302 3176 ))),
3f698246 3177 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3178 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3179 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3180 else:
3181 table = [
3182 [
3183 format_field(f, 'format_id'),
3184 format_field(f, 'ext'),
3185 self.format_resolution(f),
3186 self._format_note(f)]
3187 for f in formats
3188 if f.get('preference') is None or f['preference'] >= -1000]
3189 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3190
cfb56d1a 3191 self.to_screen(
169dbde9 3192 '[info] Available formats for %s:' % info_dict['id'])
3193 self.to_stdout(render_table(
bc97cdae 3194 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3195
3196 def list_thumbnails(self, info_dict):
b0249bca 3197 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3198 if not thumbnails:
b7b72db9 3199 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3200 return
cfb56d1a
PH
3201
3202 self.to_screen(
3203 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3204 self.to_stdout(render_table(
cfb56d1a
PH
3205 ['ID', 'width', 'height', 'URL'],
3206 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3207
360e1ca5 3208 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3209 if not subtitles:
360e1ca5 3210 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3211 return
a504ced0 3212 self.to_screen(
edab9dbf 3213 'Available %s for %s:' % (name, video_id))
2412044c 3214
3215 def _row(lang, formats):
49c258e1 3216 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3217 if len(set(names)) == 1:
7aee40c1 3218 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3219 return [lang, ', '.join(names), ', '.join(exts)]
3220
169dbde9 3221 self.to_stdout(render_table(
2412044c 3222 ['Language', 'Name', 'Formats'],
3223 [_row(lang, formats) for lang, formats in subtitles.items()],
3224 hideEmpty=True))
a504ced0 3225
dca08720
PH
3226 def urlopen(self, req):
3227 """ Start an HTTP download """
82d8a8b6 3228 if isinstance(req, compat_basestring):
67dda517 3229 req = sanitized_Request(req)
19a41fc6 3230 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3231
3232 def print_debug_header(self):
3233 if not self.params.get('verbose'):
3234 return
62fec3b2 3235
c6afed48
PH
3236 stdout_encoding = getattr(
3237 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3238 encoding_str = (
734f90bb
PH
3239 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3240 locale.getpreferredencoding(),
3241 sys.getfilesystemencoding(),
c6afed48 3242 stdout_encoding,
b0472057 3243 self.get_encoding()))
4192b51c 3244 write_string(encoding_str, encoding=None)
734f90bb 3245
4c88ff87 3246 source = detect_variant()
3247 self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
e0986e31 3248 if _LAZY_LOADER:
f74980cb 3249 self._write_string('[debug] Lazy loading extractors enabled\n')
3ae5e797 3250 if plugin_extractors or plugin_postprocessors:
3251 self._write_string('[debug] Plugins: %s\n' % [
3252 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3253 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3254 if self.params.get('compat_opts'):
3255 self._write_string(
3256 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3257 try:
3258 sp = subprocess.Popen(
3259 ['git', 'rev-parse', '--short', 'HEAD'],
3260 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3261 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3262 out, err = process_communicate_or_kill(sp)
dca08720
PH
3263 out = out.decode().strip()
3264 if re.match('[0-9a-f]+', out):
f74980cb 3265 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3266 except Exception:
dca08720
PH
3267 try:
3268 sys.exc_clear()
70a1165b 3269 except Exception:
dca08720 3270 pass
b300cda4
S
3271
3272 def python_implementation():
3273 impl_name = platform.python_implementation()
3274 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3275 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3276 return impl_name
3277
e5813e53 3278 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3279 platform.python_version(),
3280 python_implementation(),
3281 platform.architecture()[0],
b300cda4 3282 platform_name()))
d28b5171 3283
73fac4e9 3284 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3285 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3286 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3287 exe_str = ', '.join(
2831b468 3288 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3289 ) or 'none'
d28b5171 3290 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720 3291
2831b468 3292 from .downloader.websocket import has_websockets
3293 from .postprocessor.embedthumbnail import has_mutagen
3294 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3295
ad3dc496 3296 lib_str = ', '.join(sorted(filter(None, (
edf65256 3297 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
2831b468 3298 has_websockets and 'websockets',
3299 has_mutagen and 'mutagen',
3300 SQLITE_AVAILABLE and 'sqlite',
3301 KEYRING_AVAILABLE and 'keyring',
ad3dc496 3302 )))) or 'none'
2831b468 3303 self._write_string('[debug] Optional libraries: %s\n' % lib_str)
d1d5c08f 3304 self._write_string('[debug] ANSI escape support: stdout = %s, stderr = %s\n' % (
3305 supports_terminal_sequences(self._screen_file),
3306 supports_terminal_sequences(self._err_file)))
2831b468 3307
dca08720
PH
3308 proxy_map = {}
3309 for handler in self._opener.handlers:
3310 if hasattr(handler, 'proxies'):
3311 proxy_map.update(handler.proxies)
734f90bb 3312 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3313
58b1f00d
PH
3314 if self.params.get('call_home', False):
3315 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3316 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3317 return
58b1f00d
PH
3318 latest_version = self.urlopen(
3319 'https://yt-dl.org/latest/version').read().decode('utf-8')
3320 if version_tuple(latest_version) > version_tuple(__version__):
3321 self.report_warning(
3322 'You are using an outdated version (newest version: %s)! '
3323 'See https://yt-dl.org/update if you need help updating.' %
3324 latest_version)
3325
e344693b 3326 def _setup_opener(self):
6ad14cab 3327 timeout_val = self.params.get('socket_timeout')
19a41fc6 3328 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3329
982ee69a 3330 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3331 opts_cookiefile = self.params.get('cookiefile')
3332 opts_proxy = self.params.get('proxy')
3333
982ee69a 3334 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3335
6a3f4c3f 3336 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3337 if opts_proxy is not None:
3338 if opts_proxy == '':
3339 proxies = {}
3340 else:
3341 proxies = {'http': opts_proxy, 'https': opts_proxy}
3342 else:
3343 proxies = compat_urllib_request.getproxies()
067aa17e 3344 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3345 if 'http' in proxies and 'https' not in proxies:
3346 proxies['https'] = proxies['http']
91410c9b 3347 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3348
3349 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3350 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3351 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3352 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3353 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3354
3355 # When passing our own FileHandler instance, build_opener won't add the
3356 # default FileHandler and allows us to disable the file protocol, which
3357 # can be used for malicious purposes (see
067aa17e 3358 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3359 file_handler = compat_urllib_request.FileHandler()
3360
3361 def file_open(*args, **kwargs):
7a5c1cfe 3362 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3363 file_handler.file_open = file_open
3364
3365 opener = compat_urllib_request.build_opener(
fca6dba8 3366 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3367
dca08720
PH
3368 # Delete the default user-agent header, which would otherwise apply in
3369 # cases where our custom HTTP handler doesn't come into play
067aa17e 3370 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3371 opener.addheaders = []
3372 self._opener = opener
62fec3b2
PH
3373
3374 def encode(self, s):
3375 if isinstance(s, bytes):
3376 return s # Already encoded
3377
3378 try:
3379 return s.encode(self.get_encoding())
3380 except UnicodeEncodeError as err:
3381 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3382 raise
3383
3384 def get_encoding(self):
3385 encoding = self.params.get('encoding')
3386 if encoding is None:
3387 encoding = preferredencoding()
3388 return encoding
ec82d85a 3389
80c03fa9 3390 def _write_info_json(self, label, ie_result, infofn):
3391 ''' Write infojson and returns True = written, False = skip, None = error '''
3392 if not self.params.get('writeinfojson'):
3393 return False
3394 elif not infofn:
3395 self.write_debug(f'Skipping writing {label} infojson')
3396 return False
3397 elif not self._ensure_dir_exists(infofn):
3398 return None
3399 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3400 self.to_screen(f'[info] {label.title()} metadata is already present')
3401 else:
3402 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3403 try:
3404 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3405 except (OSError, IOError):
3406 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3407 return None
3408 return True
3409
3410 def _write_description(self, label, ie_result, descfn):
3411 ''' Write description and returns True = written, False = skip, None = error '''
3412 if not self.params.get('writedescription'):
3413 return False
3414 elif not descfn:
3415 self.write_debug(f'Skipping writing {label} description')
3416 return False
3417 elif not self._ensure_dir_exists(descfn):
3418 return None
3419 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3420 self.to_screen(f'[info] {label.title()} description is already present')
3421 elif ie_result.get('description') is None:
3422 self.report_warning(f'There\'s no {label} description to write')
3423 return False
3424 else:
3425 try:
3426 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3427 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3428 descfile.write(ie_result['description'])
3429 except (OSError, IOError):
3430 self.report_error(f'Cannot write {label} description file {descfn}')
3431 return None
3432 return True
3433
3434 def _write_subtitles(self, info_dict, filename):
3435 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3436 ret = []
3437 subtitles = info_dict.get('requested_subtitles')
3438 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3439 # subtitles download errors are already managed as troubles in relevant IE
3440 # that way it will silently go on when used with unsupporting IE
3441 return ret
3442
3443 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3444 if not sub_filename_base:
3445 self.to_screen('[info] Skipping writing video subtitles')
3446 return ret
3447 for sub_lang, sub_info in subtitles.items():
3448 sub_format = sub_info['ext']
3449 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3450 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3451 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3452 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3453 sub_info['filepath'] = sub_filename
3454 ret.append((sub_filename, sub_filename_final))
3455 continue
3456
3457 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3458 if sub_info.get('data') is not None:
3459 try:
3460 # Use newline='' to prevent conversion of newline characters
3461 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3462 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3463 subfile.write(sub_info['data'])
3464 sub_info['filepath'] = sub_filename
3465 ret.append((sub_filename, sub_filename_final))
3466 continue
3467 except (OSError, IOError):
3468 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3469 return None
3470
3471 try:
3472 sub_copy = sub_info.copy()
3473 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3474 self.dl(sub_filename, sub_copy, subtitle=True)
3475 sub_info['filepath'] = sub_filename
3476 ret.append((sub_filename, sub_filename_final))
3477 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3478 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3479 continue
519804a9 3480 return ret
80c03fa9 3481
3482 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3483 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3484 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3485 thumbnails, ret = [], []
6c4fd172 3486 if write_all or self.params.get('writethumbnail', False):
0202b52a 3487 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3488 multiple = write_all and len(thumbnails) > 1
ec82d85a 3489
80c03fa9 3490 if thumb_filename_base is None:
3491 thumb_filename_base = filename
3492 if thumbnails and not thumb_filename_base:
3493 self.write_debug(f'Skipping writing {label} thumbnail')
3494 return ret
3495
981052c9 3496 for t in thumbnails[::-1]:
80c03fa9 3497 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3498 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3499 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3500 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3501
80c03fa9 3502 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3503 ret.append((thumb_filename, thumb_filename_final))
8ba87148 3504 t['filepath'] = thumb_filename
80c03fa9 3505 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
ec82d85a 3506 else:
80c03fa9 3507 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a
PH
3508 try:
3509 uf = self.urlopen(t['url'])
80c03fa9 3510 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3511 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3512 shutil.copyfileobj(uf, thumbf)
80c03fa9 3513 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3514 t['filepath'] = thumb_filename
3158150c 3515 except network_exceptions as err:
80c03fa9 3516 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3517 if ret and not write_all:
3518 break
0202b52a 3519 return ret