]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Fix `--flat-playlist` when entry has no `ie_key`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
dca08720 34 compat_cookiejar,
003c69a8 35 compat_get_terminal_size,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
498f5606 60 EntryNotInPlaylist,
a06916d9 61 error_to_compat_str,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
752cda38 68 STR_FORMAT_RE,
525ef922 69 formatSeconds,
773f291d 70 GeoRestrictedError,
b0249bca 71 HEADRequest,
c9969434 72 int_or_none,
732044af 73 iri_to_uri,
773f291d 74 ISO3166Utils,
56a8fb4f 75 LazyList,
ce02ed60 76 locked_file,
0202b52a 77 make_dir,
dca08720 78 make_HTTPS_handler,
ce02ed60 79 MaxDownloadsReached,
3158150c 80 network_exceptions,
cd6fc19e 81 orderedSet,
a06916d9 82 OUTTMPL_TYPES,
b7ab0590 83 PagedList,
083c9df9 84 parse_filesize,
91410c9b 85 PerRequestProxyHandler,
dca08720 86 platform_name,
eedb7ba5 87 PostProcessingError,
ce02ed60 88 preferredencoding,
eedb7ba5 89 prepend_extension,
a06916d9 90 process_communicate_or_kill,
51fb4995 91 register_socks_protocols,
a06916d9 92 RejectedVideoReached,
cfb56d1a 93 render_table,
eedb7ba5 94 replace_extension,
ce02ed60
PH
95 SameFileError,
96 sanitize_filename,
1bb5c511 97 sanitize_path,
dcf77cf1 98 sanitize_url,
67dda517 99 sanitized_Request,
e5660ee6 100 std_headers,
1211bb6d 101 str_or_none,
e29663c6 102 strftime_or_none,
ce02ed60 103 subtitles_filename,
51d9739f 104 ThrottledDownload,
732044af 105 to_high_limit_path,
324ad820 106 traverse_obj,
6033d980 107 try_get,
ce02ed60 108 UnavailableVideoError,
29eb5174 109 url_basename,
58b1f00d 110 version_tuple,
ce02ed60
PH
111 write_json_file,
112 write_string,
1bab3437 113 YoutubeDLCookieJar,
6a3f4c3f 114 YoutubeDLCookieProcessor,
dca08720 115 YoutubeDLHandler,
fca6dba8 116 YoutubeDLRedirectHandler,
ce02ed60 117)
a0e07d31 118from .cache import Cache
52a8a1e1 119from .extractor import (
120 gen_extractor_classes,
121 get_info_extractor,
122 _LAZY_LOADER,
123 _PLUGIN_CLASSES
124)
4c54b89e 125from .extractor.openload import PhantomJSwrapper
52a8a1e1 126from .downloader import (
127 get_suitable_downloader,
128 shorten_protocol_name
129)
4c83c967 130from .downloader.rtmp import rtmpdump_version
4f026faf 131from .postprocessor import (
e36d50c5 132 get_postprocessor,
133 FFmpegFixupDurationPP,
f17f8651 134 FFmpegFixupM3u8PP,
62cd676c 135 FFmpegFixupM4aPP,
6271f1ca 136 FFmpegFixupStretchedPP,
e36d50c5 137 FFmpegFixupTimestampPP,
4f026faf
PH
138 FFmpegMergerPP,
139 FFmpegPostProcessor,
0202b52a 140 MoveFilesAfterDownloadPP,
4f026faf 141)
dca08720 142from .version import __version__
8222d8de 143
e9c0cdd3
YCH
144if compat_os_name == 'nt':
145 import ctypes
146
2459b6e1 147
8222d8de
JMF
148class YoutubeDL(object):
149 """YoutubeDL class.
150
151 YoutubeDL objects are the ones responsible of downloading the
152 actual video file and writing it to disk if the user has requested
153 it, among some other tasks. In most cases there should be one per
154 program. As, given a video URL, the downloader doesn't know how to
155 extract all the needed information, task that InfoExtractors do, it
156 has to pass the URL to one of them.
157
158 For this, YoutubeDL objects have a method that allows
159 InfoExtractors to be registered in a given order. When it is passed
160 a URL, the YoutubeDL object handles it to the first InfoExtractor it
161 finds that reports being able to handle it. The InfoExtractor extracts
162 all the information about the video or videos the URL refers to, and
163 YoutubeDL process the extracted information, possibly using a File
164 Downloader to download the video.
165
166 YoutubeDL objects accept a lot of parameters. In order not to saturate
167 the object constructor with arguments, it receives a dictionary of
168 options instead. These options are available through the params
169 attribute for the InfoExtractors to use. The YoutubeDL also
170 registers itself as the downloader in charge for the InfoExtractors
171 that are added to it, so this is a "mutual registration".
172
173 Available options:
174
175 username: Username for authentication purposes.
176 password: Password for authentication purposes.
180940e0 177 videopassword: Password for accessing a video.
1da50aa3
S
178 ap_mso: Adobe Pass multiple-system operator identifier.
179 ap_username: Multiple-system operator account username.
180 ap_password: Multiple-system operator account password.
8222d8de
JMF
181 usenetrc: Use netrc for authentication instead.
182 verbose: Print additional info to stdout.
183 quiet: Do not print messages to stdout.
ad8915b7 184 no_warnings: Do not print out anything for warnings.
53c18592 185 forceprint: A list of templates to force print
186 forceurl: Force printing final URL. (Deprecated)
187 forcetitle: Force printing title. (Deprecated)
188 forceid: Force printing ID. (Deprecated)
189 forcethumbnail: Force printing thumbnail URL. (Deprecated)
190 forcedescription: Force printing description. (Deprecated)
191 forcefilename: Force printing final filename. (Deprecated)
192 forceduration: Force printing duration. (Deprecated)
8694c600 193 forcejson: Force printing info_dict as JSON.
63e0be34
PH
194 dump_single_json: Force printing the info_dict of the whole playlist
195 (or video) as a single JSON line.
c25228e5 196 force_write_download_archive: Force writing download archive regardless
197 of 'skip_download' or 'simulate'.
8222d8de 198 simulate: Do not download the video files.
eb8a4433 199 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 200 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 201 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
202 extracting metadata even if the video is not actually
203 available for download (experimental)
c25228e5 204 format_sort: How to sort the video formats. see "Sorting Formats"
205 for more details.
206 format_sort_force: Force the given format_sort. see "Sorting Formats"
207 for more details.
208 allow_multiple_video_streams: Allow multiple video streams to be merged
209 into a single file
210 allow_multiple_audio_streams: Allow multiple audio streams to be merged
211 into a single file
4524baf0 212 paths: Dictionary of output paths. The allowed keys are 'home'
213 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 214 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 215 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
216 A string a also accepted for backward compatibility
a820dc72
RA
217 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
218 restrictfilenames: Do not allow "&" and spaces in file names
219 trim_file_name: Limit length of filename (extension excluded)
4524baf0 220 windowsfilenames: Force the filenames to be windows compatible
a820dc72 221 ignoreerrors: Do not stop on download errors
7a5c1cfe 222 (Default True when running yt-dlp,
a820dc72 223 but False when directly accessing YoutubeDL class)
26e2805c 224 skip_playlist_after_errors: Number of allowed failures until the rest of
225 the playlist is skipped
d22dec74 226 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 227 overwrites: Overwrite all video and metadata files if True,
228 overwrite only non-video files if None
229 and don't overwrite any file if False
8222d8de
JMF
230 playliststart: Playlist item to start at.
231 playlistend: Playlist item to end at.
c14e88f0 232 playlist_items: Specific indices of playlist to download.
ff815fe6 233 playlistreverse: Download playlist items in reverse order.
75822ca7 234 playlistrandom: Download playlist items in random order.
8222d8de
JMF
235 matchtitle: Download only matching titles.
236 rejecttitle: Reject downloads for matching titles.
8bf9319e 237 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
238 logtostderr: Log messages to stderr instead of stdout.
239 writedescription: Write the video description to a .description file
240 writeinfojson: Write the video description to a .info.json file
75d43ca0 241 clean_infojson: Remove private fields from the infojson
06167fbb 242 writecomments: Extract video comments. This will not be written to disk
243 unless writeinfojson is also given
1fb07d10 244 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 245 writethumbnail: Write the thumbnail image to a file
c25228e5 246 allow_playlist_files: Whether to write playlists' description, infojson etc
247 also to disk when using the 'write*' options
ec82d85a 248 write_all_thumbnails: Write all thumbnail formats to files
732044af 249 writelink: Write an internet shortcut file, depending on the
250 current platform (.url/.webloc/.desktop)
251 writeurllink: Write a Windows internet shortcut file (.url)
252 writewebloclink: Write a macOS internet shortcut file (.webloc)
253 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 254 writesubtitles: Write the video subtitles to a file
741dd8ea 255 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 256 allsubtitles: Deprecated - Use subtitlelangs = ['all']
257 Downloads all the subtitles of the video
0b7f3118 258 (requires writesubtitles or writeautomaticsub)
8222d8de 259 listsubtitles: Lists all available subtitles for the video
a504ced0 260 subtitlesformat: The format code for subtitles
c32b0aab 261 subtitleslangs: List of languages of the subtitles to download (can be regex).
262 The list may contain "all" to refer to all the available
263 subtitles. The language can be prefixed with a "-" to
264 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
265 keepvideo: Keep the video file after post-processing
266 daterange: A DateRange object, download only if the upload_date is in the range.
267 skip_download: Skip the actual download of the video file
c35f9e72 268 cachedir: Location of the cache files in the filesystem.
a0e07d31 269 False to disable filesystem cache.
47192f92 270 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
271 age_limit: An integer representing the user's age in years.
272 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
273 min_views: An integer representing the minimum view count the video
274 must have in order to not be skipped.
275 Videos without view count information are always
276 downloaded. None for no limit.
277 max_views: An integer representing the maximum view count.
278 Videos that are more popular than that are not
279 downloaded.
280 Videos without view count information are always
281 downloaded. None for no limit.
282 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
283 Videos already present in the file are not downloaded
284 again.
8a51f564 285 break_on_existing: Stop the download process after attempting to download a
286 file that is in the archive.
287 break_on_reject: Stop the download process when encountering a video that
288 has been filtered out.
289 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 290 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
291 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
292 At the moment, this is only supported by YouTube.
a1ee09e8 293 proxy: URL of the proxy server to use
38cce791 294 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 295 on geo-restricted sites.
e344693b 296 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
297 bidi_workaround: Work around buggy terminals without bidirectional text
298 support, using fridibi
a0ddb8a2 299 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 300 include_ads: Download ads as well
04b4d394
PH
301 default_search: Prepend this string if an input url is not valid.
302 'auto' for elaborate guessing
62fec3b2 303 encoding: Use this encoding instead of the system-specified.
e8ee972c 304 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
305 Pass in 'in_playlist' to only show this behavior for
306 playlist items.
4f026faf 307 postprocessors: A list of dictionaries, each with an entry
71b640cc 308 * key: The name of the postprocessor. See
7a5c1cfe 309 yt_dlp/postprocessor/__init__.py for a list.
56d868db 310 * when: When to run the postprocessor. Can be one of
311 pre_process|before_dl|post_process|after_move.
312 Assumed to be 'post_process' if not given
ab8e5e51
AM
313 post_hooks: A list of functions that get called as the final step
314 for each video file, after all postprocessors have been
315 called. The filename will be passed as the only argument.
71b640cc
PH
316 progress_hooks: A list of functions that get called on download
317 progress, with a dictionary with the entries
5cda4eda 318 * status: One of "downloading", "error", or "finished".
ee69b99a 319 Check this first and ignore unknown values.
71b640cc 320
5cda4eda 321 If status is one of "downloading", or "finished", the
ee69b99a
PH
322 following properties may also be present:
323 * filename: The final filename (always present)
5cda4eda 324 * tmpfilename: The filename we're currently writing to
71b640cc
PH
325 * downloaded_bytes: Bytes on disk
326 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
327 * total_bytes_estimate: Guess of the eventual file size,
328 None if unavailable.
329 * elapsed: The number of seconds since download started.
71b640cc
PH
330 * eta: The estimated time in seconds, None if unknown
331 * speed: The download speed in bytes/second, None if
332 unknown
5cda4eda
PH
333 * fragment_index: The counter of the currently
334 downloaded video fragment.
335 * fragment_count: The number of fragments (= individual
336 files that will be merged)
71b640cc
PH
337
338 Progress hooks are guaranteed to be called at least once
339 (with status "finished") if the download is successful.
45598f15 340 merge_output_format: Extension to use when merging formats.
6b591b29 341 final_ext: Expected final extension; used to detect when the file was
342 already downloaded and converted. "merge_output_format" is
343 replaced by this extension when given
6271f1ca
PH
344 fixup: Automatically correct known faults of the file.
345 One of:
346 - "never": do nothing
347 - "warn": only emit a warning
348 - "detect_or_warn": check whether we can do anything
62cd676c 349 about it, warn otherwise (default)
504f20dd 350 source_address: Client-side IP address to bind to.
6ec6cb4e 351 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 352 yt-dlp servers for debugging. (BROKEN)
1cf376f5 353 sleep_interval_requests: Number of seconds to sleep between requests
354 during extraction
7aa589a5
S
355 sleep_interval: Number of seconds to sleep before each download when
356 used alone or a lower bound of a range for randomized
357 sleep before each download (minimum possible number
358 of seconds to sleep) when used along with
359 max_sleep_interval.
360 max_sleep_interval:Upper bound of a range for randomized sleep before each
361 download (maximum possible number of seconds to sleep).
362 Must only be used along with sleep_interval.
363 Actual sleep time will be a random float from range
364 [sleep_interval; max_sleep_interval].
1cf376f5 365 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
366 listformats: Print an overview of available video formats and exit.
367 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
368 match_filter: A function that gets called with the info_dict of
369 every video.
370 If it returns a message, the video is ignored.
371 If it returns None, the video is downloaded.
372 match_filter_func in utils.py is one example for this.
7e5db8c9 373 no_color: Do not emit color codes in output.
0a840f58 374 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 375 HTTP header
0a840f58 376 geo_bypass_country:
773f291d
S
377 Two-letter ISO 3166-2 country code that will be used for
378 explicit geographic restriction bypassing via faking
504f20dd 379 X-Forwarded-For HTTP header
5f95927a
S
380 geo_bypass_ip_block:
381 IP range in CIDR notation that will be used similarly to
504f20dd 382 geo_bypass_country
71b640cc 383
85729c51 384 The following options determine which downloader is picked:
52a8a1e1 385 external_downloader: A dictionary of protocol keys and the executable of the
386 external downloader to use for it. The allowed protocols
387 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
388 Set the value to 'native' to use the native downloader
389 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
390 or {'m3u8': 'ffmpeg'} instead.
391 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
392 if True, otherwise use ffmpeg/avconv if False, otherwise
393 use downloader suggested by extractor if None.
53ed7066 394 compat_opts: Compatibility options. See "Differences in default behavior".
18e674b4 395 Note that only format-sort, format-spec, no-live-chat,
396 no-attach-info-json, playlist-index, list-formats,
e858a9d6 397 no-direct-merge, embed-thumbnail-atomicparsley,
398 no-youtube-unavailable-videos, no-youtube-channel-redirect,
399 works when used via the API
fe7e0c98 400
8222d8de 401 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 402 the downloader (see yt_dlp/downloader/common.py):
51d9739f 403 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
404 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
405 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
406
407 The following options are used by the post processors:
d4a24f40 408 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 409 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
410 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
411 to the binary or its containing directory.
43820c03 412 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
413 and a list of additional command-line arguments for the
414 postprocessor/executable. The dict can also have "PP+EXE" keys
415 which are used when the given exe is used by the given PP.
416 Use 'default' as the name for arguments to passed to all PP
e409895f 417
418 The following options are used by the extractors:
62bff2c1 419 extractor_retries: Number of times to retry for known errors
420 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 421 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 422 discontinuities such as ad breaks (default: False)
3600fd59 423 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about DASH. (only for youtube)
e409895f 427 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 428 data will be downloaded and processed by extractor.
429 You can reduce network I/O by disabling it if you don't
430 care about HLS. (only for youtube)
8222d8de
JMF
431 """
432
c9969434
S
433 _NUMERIC_FIELDS = set((
434 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
435 'timestamp', 'upload_year', 'upload_month', 'upload_day',
436 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
437 'average_rating', 'comment_count', 'age_limit',
438 'start_time', 'end_time',
439 'chapter_number', 'season_number', 'episode_number',
440 'track_number', 'disc_number', 'release_year',
441 'playlist_index',
442 ))
443
8222d8de
JMF
444 params = None
445 _ies = []
56d868db 446 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 447 __prepare_filename_warned = False
1cf376f5 448 _first_webpage_request = True
8222d8de
JMF
449 _download_retcode = None
450 _num_downloads = None
30a074c2 451 _playlist_level = 0
452 _playlist_urls = set()
8222d8de
JMF
453 _screen_file = None
454
3511266b 455 def __init__(self, params=None, auto_init=True):
8222d8de 456 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
457 if params is None:
458 params = {}
8222d8de 459 self._ies = []
56c73665 460 self._ies_instances = {}
56d868db 461 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 462 self.__prepare_filename_warned = False
1cf376f5 463 self._first_webpage_request = True
ab8e5e51 464 self._post_hooks = []
933605d7 465 self._progress_hooks = []
8222d8de
JMF
466 self._download_retcode = 0
467 self._num_downloads = 0
468 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 469 self._err_file = sys.stderr
4abf617b
S
470 self.params = {
471 # Default parameters
472 'nocheckcertificate': False,
473 }
474 self.params.update(params)
a0e07d31 475 self.cache = Cache(self)
34308b30 476
a61f4b28 477 if sys.version_info < (3, 6):
478 self.report_warning(
0181adef 479 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 480
be5df5ee
S
481 def check_deprecated(param, option, suggestion):
482 if self.params.get(param) is not None:
53ed7066 483 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
484 return True
485 return False
486
487 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
488 if self.params.get('geo_verification_proxy') is None:
489 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
490
0d1bb027 491 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
492 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 493 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 494
495 for msg in self.params.get('warnings', []):
496 self.report_warning(msg)
497
6b591b29 498 if self.params.get('final_ext'):
499 if self.params.get('merge_output_format'):
500 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
501 self.params['merge_output_format'] = self.params['final_ext']
502
b9d973be 503 if 'overwrites' in self.params and self.params['overwrites'] is None:
504 del self.params['overwrites']
505
0783b09b 506 if params.get('bidi_workaround', False):
1c088fa8
PH
507 try:
508 import pty
509 master, slave = pty.openpty()
003c69a8 510 width = compat_get_terminal_size().columns
1c088fa8
PH
511 if width is None:
512 width_args = []
513 else:
514 width_args = ['-w', str(width)]
5d681e96 515 sp_kwargs = dict(
1c088fa8
PH
516 stdin=subprocess.PIPE,
517 stdout=slave,
518 stderr=self._err_file)
5d681e96
PH
519 try:
520 self._output_process = subprocess.Popen(
521 ['bidiv'] + width_args, **sp_kwargs
522 )
523 except OSError:
5d681e96
PH
524 self._output_process = subprocess.Popen(
525 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
526 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 527 except OSError as ose:
66e7ace1 528 if ose.errno == errno.ENOENT:
6febd1c1 529 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
530 else:
531 raise
0783b09b 532
3089bc74
S
533 if (sys.platform != 'win32'
534 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
535 and not params.get('restrictfilenames', False)):
e9137224 536 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 537 self.report_warning(
6febd1c1 538 'Assuming --restrict-filenames since file system encoding '
1b725173 539 'cannot encode all characters. '
6febd1c1 540 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 541 self.params['restrictfilenames'] = True
34308b30 542
de6000d9 543 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 544
187986a8 545 # Creating format selector here allows us to catch syntax errors before the extraction
546 self.format_selector = (
547 None if self.params.get('format') is None
548 else self.build_format_selector(self.params['format']))
549
dca08720
PH
550 self._setup_opener()
551
4cd0a709 552 """Preload the archive, if any is specified"""
553 def preload_download_archive(fn):
554 if fn is None:
555 return False
0760b0a7 556 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 557 try:
558 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
559 for line in archive_file:
560 self.archive.add(line.strip())
561 except IOError as ioe:
562 if ioe.errno != errno.ENOENT:
563 raise
564 return False
565 return True
566
567 self.archive = set()
568 preload_download_archive(self.params.get('download_archive'))
569
3511266b
PH
570 if auto_init:
571 self.print_debug_header()
572 self.add_default_info_extractors()
573
4f026faf 574 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 575 pp_def = dict(pp_def_raw)
fd7cfb64 576 when = pp_def.pop('when', 'post_process')
577 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 578 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 579 self.add_post_processor(pp, when=when)
4f026faf 580
ab8e5e51
AM
581 for ph in self.params.get('post_hooks', []):
582 self.add_post_hook(ph)
583
71b640cc
PH
584 for ph in self.params.get('progress_hooks', []):
585 self.add_progress_hook(ph)
586
51fb4995
YCH
587 register_socks_protocols()
588
7d4111ed
PH
589 def warn_if_short_id(self, argv):
590 # short YouTube ID starting with dash?
591 idxs = [
592 i for i, a in enumerate(argv)
593 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
594 if idxs:
595 correct_argv = (
7a5c1cfe 596 ['yt-dlp']
3089bc74
S
597 + [a for i, a in enumerate(argv) if i not in idxs]
598 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
599 )
600 self.report_warning(
601 'Long argument string detected. '
602 'Use -- to separate parameters and URLs, like this:\n%s\n' %
603 args_to_str(correct_argv))
604
8222d8de
JMF
605 def add_info_extractor(self, ie):
606 """Add an InfoExtractor object to the end of the list."""
607 self._ies.append(ie)
e52d7f85
JMF
608 if not isinstance(ie, type):
609 self._ies_instances[ie.ie_key()] = ie
610 ie.set_downloader(self)
8222d8de 611
56c73665
JMF
612 def get_info_extractor(self, ie_key):
613 """
614 Get an instance of an IE with name ie_key, it will try to get one from
615 the _ies list, if there's no instance it will create a new one and add
616 it to the extractor list.
617 """
618 ie = self._ies_instances.get(ie_key)
619 if ie is None:
620 ie = get_info_extractor(ie_key)()
621 self.add_info_extractor(ie)
622 return ie
623
023fa8c4
JMF
624 def add_default_info_extractors(self):
625 """
626 Add the InfoExtractors returned by gen_extractors to the end of the list
627 """
e52d7f85 628 for ie in gen_extractor_classes():
023fa8c4
JMF
629 self.add_info_extractor(ie)
630
56d868db 631 def add_post_processor(self, pp, when='post_process'):
8222d8de 632 """Add a PostProcessor object to the end of the chain."""
5bfa4862 633 self._pps[when].append(pp)
8222d8de
JMF
634 pp.set_downloader(self)
635
ab8e5e51
AM
636 def add_post_hook(self, ph):
637 """Add the post hook"""
638 self._post_hooks.append(ph)
639
933605d7
JMF
640 def add_progress_hook(self, ph):
641 """Add the progress hook (currently only for the file downloader)"""
642 self._progress_hooks.append(ph)
8ab470f1 643
1c088fa8 644 def _bidi_workaround(self, message):
5d681e96 645 if not hasattr(self, '_output_channel'):
1c088fa8
PH
646 return message
647
5d681e96 648 assert hasattr(self, '_output_process')
11b85ce6 649 assert isinstance(message, compat_str)
6febd1c1
PH
650 line_count = message.count('\n') + 1
651 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 652 self._output_process.stdin.flush()
6febd1c1 653 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 654 for _ in range(line_count))
6febd1c1 655 return res[:-len('\n')]
1c088fa8 656
734f90bb 657 def _write_string(self, s, out=None):
b58ddb32 658 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 659
848887eb 660 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 661 """Print message to stdout"""
8bf9319e 662 if self.params.get('logger'):
43afe285 663 self.params['logger'].debug(message)
835a1478 664 elif not quiet or self.params.get('verbose'):
665 self._write_string(
666 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
667 self._err_file if quiet else self._screen_file)
8222d8de
JMF
668
669 def to_stderr(self, message):
0760b0a7 670 """Print message to stderr"""
11b85ce6 671 assert isinstance(message, compat_str)
8bf9319e 672 if self.params.get('logger'):
43afe285
IB
673 self.params['logger'].error(message)
674 else:
835a1478 675 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
8222d8de 676
1e5b9a95
PH
677 def to_console_title(self, message):
678 if not self.params.get('consoletitle', False):
679 return
4bede0d8
C
680 if compat_os_name == 'nt':
681 if ctypes.windll.kernel32.GetConsoleWindow():
682 # c_wchar_p() might not be necessary if `message` is
683 # already of type unicode()
684 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 685 elif 'TERM' in os.environ:
b46696bd 686 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 687
bdde425c
PH
688 def save_console_title(self):
689 if not self.params.get('consoletitle', False):
690 return
94c3442e
S
691 if self.params.get('simulate', False):
692 return
4bede0d8 693 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 694 # Save the title on stack
734f90bb 695 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
696
697 def restore_console_title(self):
698 if not self.params.get('consoletitle', False):
699 return
94c3442e
S
700 if self.params.get('simulate', False):
701 return
4bede0d8 702 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 703 # Restore the title from stack
734f90bb 704 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
705
706 def __enter__(self):
707 self.save_console_title()
708 return self
709
710 def __exit__(self, *args):
711 self.restore_console_title()
f89197d7 712
dca08720 713 if self.params.get('cookiefile') is not None:
1bab3437 714 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 715
8222d8de
JMF
716 def trouble(self, message=None, tb=None):
717 """Determine action to take when a download problem appears.
718
719 Depending on if the downloader has been configured to ignore
720 download errors or not, this method may throw an exception or
721 not when errors are found, after printing the message.
722
723 tb, if given, is additional traceback information.
724 """
725 if message is not None:
726 self.to_stderr(message)
727 if self.params.get('verbose'):
728 if tb is None:
729 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 730 tb = ''
8222d8de 731 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 732 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 733 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
734 else:
735 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 736 tb = ''.join(tb_data)
c19bc311 737 if tb:
738 self.to_stderr(tb)
8222d8de
JMF
739 if not self.params.get('ignoreerrors', False):
740 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
741 exc_info = sys.exc_info()[1].exc_info
742 else:
743 exc_info = sys.exc_info()
744 raise DownloadError(message, exc_info)
745 self._download_retcode = 1
746
0760b0a7 747 def to_screen(self, message, skip_eol=False):
748 """Print message to stdout if not in quiet mode"""
749 self.to_stdout(
750 message, skip_eol, quiet=self.params.get('quiet', False))
751
8222d8de
JMF
752 def report_warning(self, message):
753 '''
754 Print the message to stderr, it will be prefixed with 'WARNING:'
755 If stderr is a tty file the 'WARNING:' will be colored
756 '''
6d07ce01
JMF
757 if self.params.get('logger') is not None:
758 self.params['logger'].warning(message)
8222d8de 759 else:
ad8915b7
PH
760 if self.params.get('no_warnings'):
761 return
e9c0cdd3 762 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
763 _msg_header = '\033[0;33mWARNING:\033[0m'
764 else:
765 _msg_header = 'WARNING:'
766 warning_message = '%s %s' % (_msg_header, message)
767 self.to_stderr(warning_message)
8222d8de
JMF
768
769 def report_error(self, message, tb=None):
770 '''
771 Do the same as trouble, but prefixes the message with 'ERROR:', colored
772 in red if stderr is a tty file.
773 '''
e9c0cdd3 774 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 775 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 776 else:
6febd1c1
PH
777 _msg_header = 'ERROR:'
778 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
779 self.trouble(error_message, tb)
780
0760b0a7 781 def write_debug(self, message):
782 '''Log debug message or Print message to stderr'''
783 if not self.params.get('verbose', False):
784 return
785 message = '[debug] %s' % message
786 if self.params.get('logger'):
787 self.params['logger'].debug(message)
788 else:
789 self._write_string('%s\n' % message)
790
8222d8de
JMF
791 def report_file_already_downloaded(self, file_name):
792 """Report file has already been fully downloaded."""
793 try:
6febd1c1 794 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 795 except UnicodeEncodeError:
6febd1c1 796 self.to_screen('[download] The file has already been downloaded')
8222d8de 797
0c3d0f51 798 def report_file_delete(self, file_name):
799 """Report that existing file will be deleted."""
800 try:
c25228e5 801 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 802 except UnicodeEncodeError:
c25228e5 803 self.to_screen('Deleting existing file')
0c3d0f51 804
de6000d9 805 def parse_outtmpl(self):
806 outtmpl_dict = self.params.get('outtmpl', {})
807 if not isinstance(outtmpl_dict, dict):
808 outtmpl_dict = {'default': outtmpl_dict}
809 outtmpl_dict.update({
810 k: v for k, v in DEFAULT_OUTTMPL.items()
811 if not outtmpl_dict.get(k)})
812 for key, val in outtmpl_dict.items():
813 if isinstance(val, bytes):
814 self.report_warning(
815 'Parameter outtmpl is bytes, but should be a unicode string. '
816 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
817 return outtmpl_dict
818
21cd8fae 819 def get_output_path(self, dir_type='', filename=None):
820 paths = self.params.get('paths', {})
821 assert isinstance(paths, dict)
822 path = os.path.join(
823 expand_path(paths.get('home', '').strip()),
824 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
825 filename or '')
826
827 # Temporary fix for #4787
828 # 'Treat' all problem characters by passing filename through preferredencoding
829 # to workaround encoding issues with subprocess on python2 @ Windows
830 if sys.version_info < (3, 0) and sys.platform == 'win32':
831 path = encodeFilename(path, True).decode(preferredencoding())
832 return sanitize_path(path, force=self.params.get('windowsfilenames'))
833
76a264ac 834 @staticmethod
835 def validate_outtmpl(tmpl):
836 ''' @return None or Exception object '''
837 try:
838 re.sub(
839 STR_FORMAT_RE.format(''),
840 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
841 tmpl
842 ) % collections.defaultdict(int)
843 return None
844 except ValueError as err:
845 return err
846
143db31d 847 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
848 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
752cda38 849 info_dict = dict(info_dict)
a439a3a4 850 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 851
752cda38 852 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 853 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 854 if info_dict.get('duration', None) is not None
855 else None)
752cda38 856 info_dict['epoch'] = int(time.time())
857 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
858 if info_dict.get('resolution') is None:
859 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 860
143db31d 861 # For fields playlist_index and autonumber convert all occurrences
862 # of %(field)s to %(field)0Nd for backward compatibility
863 field_size_compat_map = {
752cda38 864 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
865 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 866 }
752cda38 867
385a27fa 868 TMPL_DICT = {}
869 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
870 MATH_FUNCTIONS = {
871 '+': float.__add__,
872 '-': float.__sub__,
873 }
e625be0d 874 # Field is of the form key1.key2...
875 # where keys (except first) can be string, int or slice
385a27fa 876 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
877 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
878 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 879 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
880 (?P<negate>-)?
385a27fa 881 (?P<fields>{field})
882 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 883 (?:>(?P<strf_format>.+?))?
884 (?:\|(?P<default>.*?))?
385a27fa 885 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 886
76a264ac 887 get_key = lambda k: traverse_obj(
888 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
889
752cda38 890 def get_value(mdict):
891 # Object traversal
76a264ac 892 value = get_key(mdict['fields'])
752cda38 893 # Negative
894 if mdict['negate']:
895 value = float_or_none(value)
896 if value is not None:
897 value *= -1
898 # Do maths
385a27fa 899 offset_key = mdict['maths']
900 if offset_key:
752cda38 901 value = float_or_none(value)
902 operator = None
385a27fa 903 while offset_key:
904 item = re.match(
905 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
906 offset_key).group(0)
907 offset_key = offset_key[len(item):]
908 if operator is None:
752cda38 909 operator = MATH_FUNCTIONS[item]
385a27fa 910 continue
911 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
912 offset = float_or_none(item)
913 if offset is None:
914 offset = float_or_none(get_key(item))
915 try:
916 value = operator(value, multiplier * offset)
917 except (TypeError, ZeroDivisionError):
918 return None
919 operator = None
752cda38 920 # Datetime formatting
921 if mdict['strf_format']:
922 value = strftime_or_none(value, mdict['strf_format'])
923
924 return value
925
926 def create_key(outer_mobj):
927 if not outer_mobj.group('has_key'):
928 return '%{}'.format(outer_mobj.group(0))
929
930 key = outer_mobj.group('key')
931 fmt = outer_mobj.group('format')
932 mobj = re.match(INTERNAL_FORMAT_RE, key)
933 if mobj is None:
9fea350f 934 value, default, mobj = None, na, {'fields': ''}
752cda38 935 else:
e625be0d 936 mobj = mobj.groupdict()
752cda38 937 default = mobj['default'] if mobj['default'] is not None else na
938 value = get_value(mobj)
939
940 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
941 fmt = '0{:d}d'.format(field_size_compat_map[key])
942
943 value = default if value is None else value
752cda38 944
76a264ac 945 if fmt == 'c':
946 value = compat_str(value)
947 if value is None:
948 value, fmt = default, 's'
949 else:
950 value = value[0]
951 elif fmt[-1] not in 'rs': # numeric
a439a3a4 952 value = float_or_none(value)
752cda38 953 if value is None:
954 value, fmt = default, 's'
955 if sanitize:
956 if fmt[-1] == 'r':
957 # If value is an object, sanitize might convert it to a string
958 # So we convert it to repr first
959 value, fmt = repr(value), '%ss' % fmt[:-1]
639f1cea 960 if fmt[-1] in 'csr':
9fea350f 961 value = sanitize(mobj['fields'].split('.')[-1], value)
962 key += '\0%s' % fmt
385a27fa 963 TMPL_DICT[key] = value
752cda38 964 return '%({key}){fmt}'.format(key=key, fmt=fmt)
965
385a27fa 966 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 967
de6000d9 968 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 969 try:
586a91b6 970 sanitize = lambda k, v: sanitize_filename(
45598aab 971 compat_str(v),
1bb5c511 972 restricted=self.params.get('restrictfilenames'),
40df485f 973 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 974 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 975 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 976
15da37c7
S
977 # expand_path translates '%%' into '%' and '$$' into '$'
978 # correspondingly that is not what we want since we need to keep
979 # '%%' intact for template dict substitution step. Working around
980 # with boundary-alike separator hack.
961ea474 981 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
982 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
983
984 # outtmpl should be expand_path'ed before template dict substitution
985 # because meta fields may contain env variables we don't want to
986 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
987 # title "Hello $PATH", we don't want `$PATH` to be expanded.
988 filename = expand_path(outtmpl).replace(sep, '') % template_dict
989
143db31d 990 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 991 if force_ext is not None:
752cda38 992 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 993
bdc3fd2f
U
994 # https://github.com/blackjack4494/youtube-dlc/issues/85
995 trim_file_name = self.params.get('trim_file_name', False)
996 if trim_file_name:
997 fn_groups = filename.rsplit('.')
998 ext = fn_groups[-1]
999 sub_ext = ''
1000 if len(fn_groups) > 2:
1001 sub_ext = fn_groups[-2]
1002 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1003
0202b52a 1004 return filename
8222d8de 1005 except ValueError as err:
6febd1c1 1006 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1007 return None
1008
de6000d9 1009 def prepare_filename(self, info_dict, dir_type='', warn=False):
1010 """Generate the output filename."""
21cd8fae 1011
de6000d9 1012 filename = self._prepare_filename(info_dict, dir_type or 'default')
1013
1014 if warn and not self.__prepare_filename_warned:
21cd8fae 1015 if not self.params.get('paths'):
de6000d9 1016 pass
1017 elif filename == '-':
1018 self.report_warning('--paths is ignored when an outputting to stdout')
1019 elif os.path.isabs(filename):
1020 self.report_warning('--paths is ignored since an absolute path is given in output template')
1021 self.__prepare_filename_warned = True
1022 if filename == '-' or not filename:
1023 return filename
1024
21cd8fae 1025 return self.get_output_path(dir_type, filename)
0202b52a 1026
120fe513 1027 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1028 """ Returns None if the file should be downloaded """
8222d8de 1029
c77495e3 1030 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1031
8b0d7497 1032 def check_filter():
8b0d7497 1033 if 'title' in info_dict:
1034 # This can happen when we're just evaluating the playlist
1035 title = info_dict['title']
1036 matchtitle = self.params.get('matchtitle', False)
1037 if matchtitle:
1038 if not re.search(matchtitle, title, re.IGNORECASE):
1039 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1040 rejecttitle = self.params.get('rejecttitle', False)
1041 if rejecttitle:
1042 if re.search(rejecttitle, title, re.IGNORECASE):
1043 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1044 date = info_dict.get('upload_date')
1045 if date is not None:
1046 dateRange = self.params.get('daterange', DateRange())
1047 if date not in dateRange:
1048 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1049 view_count = info_dict.get('view_count')
1050 if view_count is not None:
1051 min_views = self.params.get('min_views')
1052 if min_views is not None and view_count < min_views:
1053 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1054 max_views = self.params.get('max_views')
1055 if max_views is not None and view_count > max_views:
1056 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1057 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1058 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1059
1060 if not incomplete:
1061 match_filter = self.params.get('match_filter')
1062 if match_filter is not None:
1063 ret = match_filter(info_dict)
1064 if ret is not None:
1065 return ret
1066 return None
1067
c77495e3 1068 if self.in_download_archive(info_dict):
1069 reason = '%s has already been recorded in the archive' % video_title
1070 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1071 else:
1072 reason = check_filter()
1073 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1074 if reason is not None:
120fe513 1075 if not silent:
1076 self.to_screen('[download] ' + reason)
c77495e3 1077 if self.params.get(break_opt, False):
1078 raise break_err()
8b0d7497 1079 return reason
fe7e0c98 1080
b6c45014
JMF
1081 @staticmethod
1082 def add_extra_info(info_dict, extra_info):
1083 '''Set the keys from extra_info in info dict if they are missing'''
1084 for key, value in extra_info.items():
1085 info_dict.setdefault(key, value)
1086
58f197b7 1087 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1088 process=True, force_generic_extractor=False):
41d1cca3 1089 """
1090 Return a list with a dictionary for each video extracted.
1091
1092 Arguments:
1093 url -- URL to extract
1094
1095 Keyword arguments:
1096 download -- whether to download videos during extraction
1097 ie_key -- extractor key hint
1098 extra_info -- dictionary containing the extra values to add to each result
1099 process -- whether to resolve all unresolved references (URLs, playlist items),
1100 must be True for download to work.
1101 force_generic_extractor -- force using the generic extractor
1102 """
fe7e0c98 1103
61aa5ba3 1104 if not ie_key and force_generic_extractor:
d22dec74
S
1105 ie_key = 'Generic'
1106
8222d8de 1107 if ie_key:
56c73665 1108 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1109 else:
1110 ies = self._ies
1111
1112 for ie in ies:
1113 if not ie.suitable(url):
1114 continue
1115
9a68de12 1116 ie_key = ie.ie_key()
1117 ie = self.get_info_extractor(ie_key)
8222d8de 1118 if not ie.working():
6febd1c1
PH
1119 self.report_warning('The program functionality for this site has been marked as broken, '
1120 'and will probably not work.')
8222d8de
JMF
1121
1122 try:
d0757229 1123 temp_id = str_or_none(
63be1aab 1124 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1125 else ie._match_id(url))
a0566bbf 1126 except (AssertionError, IndexError, AttributeError):
1127 temp_id = None
1128 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1129 self.to_screen("[%s] %s: has already been recorded in archive" % (
1130 ie_key, temp_id))
1131 break
58f197b7 1132 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1133 else:
1134 self.report_error('no suitable InfoExtractor for URL %s' % url)
1135
1136 def __handle_extraction_exceptions(func):
1137 def wrapper(self, *args, **kwargs):
1138 try:
1139 return func(self, *args, **kwargs)
773f291d
S
1140 except GeoRestrictedError as e:
1141 msg = e.msg
1142 if e.countries:
1143 msg += '\nThis video is available in %s.' % ', '.join(
1144 map(ISO3166Utils.short2full, e.countries))
1145 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1146 self.report_error(msg)
fb043a6e 1147 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1148 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1149 except ThrottledDownload:
1150 self.to_stderr('\r')
1151 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1152 return wrapper(self, *args, **kwargs)
8b0d7497 1153 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1154 raise
8222d8de
JMF
1155 except Exception as e:
1156 if self.params.get('ignoreerrors', False):
9b9c5355 1157 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1158 else:
1159 raise
a0566bbf 1160 return wrapper
1161
1162 @__handle_extraction_exceptions
58f197b7 1163 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1164 ie_result = ie.extract(url)
1165 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1166 return
1167 if isinstance(ie_result, list):
1168 # Backwards compatibility: old IE result format
1169 ie_result = {
1170 '_type': 'compat_list',
1171 'entries': ie_result,
1172 }
a0566bbf 1173 self.add_default_extra_info(ie_result, ie, url)
1174 if process:
1175 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1176 else:
a0566bbf 1177 return ie_result
fe7e0c98 1178
ea38e55f 1179 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1180 if url is not None:
1181 self.add_extra_info(ie_result, {
1182 'webpage_url': url,
1183 'original_url': url,
1184 'webpage_url_basename': url_basename(url),
1185 })
1186 if ie is not None:
1187 self.add_extra_info(ie_result, {
1188 'extractor': ie.IE_NAME,
1189 'extractor_key': ie.ie_key(),
1190 })
ea38e55f 1191
8222d8de
JMF
1192 def process_ie_result(self, ie_result, download=True, extra_info={}):
1193 """
1194 Take the result of the ie(may be modified) and resolve all unresolved
1195 references (URLs, playlist items).
1196
1197 It will also download the videos if 'download'.
1198 Returns the resolved ie_result.
1199 """
e8ee972c
PH
1200 result_type = ie_result.get('_type', 'video')
1201
057a5206 1202 if result_type in ('url', 'url_transparent'):
134c6ea8 1203 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1204 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1205 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1206 or extract_flat is True):
ecb54191 1207 info_copy = ie_result.copy()
1208 self.add_extra_info(info_copy, extra_info)
6033d980 1209 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1210 self.add_default_extra_info(info_copy, ie, ie_result['url'])
ecb54191 1211 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1212 return ie_result
1213
8222d8de 1214 if result_type == 'video':
b6c45014 1215 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1216 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1217 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1218 if additional_urls:
1219 # TODO: Improve MetadataFromFieldPP to allow setting a list
1220 if isinstance(additional_urls, compat_str):
1221 additional_urls = [additional_urls]
1222 self.to_screen(
1223 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1224 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1225 ie_result['additional_entries'] = [
1226 self.extract_info(
1227 url, download, extra_info,
1228 force_generic_extractor=self.params.get('force_generic_extractor'))
1229 for url in additional_urls
1230 ]
1231 return ie_result
8222d8de
JMF
1232 elif result_type == 'url':
1233 # We have to add extra_info to the results because it may be
1234 # contained in a playlist
07cce701 1235 return self.extract_info(
1236 ie_result['url'], download,
1237 ie_key=ie_result.get('ie_key'),
1238 extra_info=extra_info)
7fc3fa05
PH
1239 elif result_type == 'url_transparent':
1240 # Use the information from the embedding page
1241 info = self.extract_info(
1242 ie_result['url'], ie_key=ie_result.get('ie_key'),
1243 extra_info=extra_info, download=False, process=False)
1244
1640eb09
S
1245 # extract_info may return None when ignoreerrors is enabled and
1246 # extraction failed with an error, don't crash and return early
1247 # in this case
1248 if not info:
1249 return info
1250
412c617d
PH
1251 force_properties = dict(
1252 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1253 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1254 if f in force_properties:
1255 del force_properties[f]
1256 new_result = info.copy()
1257 new_result.update(force_properties)
7fc3fa05 1258
0563f7ac
S
1259 # Extracted info may not be a video result (i.e.
1260 # info.get('_type', 'video') != video) but rather an url or
1261 # url_transparent. In such cases outer metadata (from ie_result)
1262 # should be propagated to inner one (info). For this to happen
1263 # _type of info should be overridden with url_transparent. This
067aa17e 1264 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1265 if new_result.get('_type') == 'url':
1266 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1267
1268 return self.process_ie_result(
1269 new_result, download=download, extra_info=extra_info)
40fcba5e 1270 elif result_type in ('playlist', 'multi_video'):
30a074c2 1271 # Protect from infinite recursion due to recursively nested playlists
1272 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1273 webpage_url = ie_result['webpage_url']
1274 if webpage_url in self._playlist_urls:
7e85e872 1275 self.to_screen(
30a074c2 1276 '[download] Skipping already downloaded playlist: %s'
1277 % ie_result.get('title') or ie_result.get('id'))
1278 return
7e85e872 1279
30a074c2 1280 self._playlist_level += 1
1281 self._playlist_urls.add(webpage_url)
bc516a3f 1282 self._sanitize_thumbnails(ie_result)
30a074c2 1283 try:
1284 return self.__process_playlist(ie_result, download)
1285 finally:
1286 self._playlist_level -= 1
1287 if not self._playlist_level:
1288 self._playlist_urls.clear()
8222d8de 1289 elif result_type == 'compat_list':
c9bf4114
PH
1290 self.report_warning(
1291 'Extractor %s returned a compat_list result. '
1292 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1293
8222d8de 1294 def _fixup(r):
9e1a5b84
JW
1295 self.add_extra_info(
1296 r,
9103bbc5
JMF
1297 {
1298 'extractor': ie_result['extractor'],
1299 'webpage_url': ie_result['webpage_url'],
29eb5174 1300 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1301 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1302 }
1303 )
8222d8de
JMF
1304 return r
1305 ie_result['entries'] = [
b6c45014 1306 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1307 for r in ie_result['entries']
1308 ]
1309 return ie_result
1310 else:
1311 raise Exception('Invalid result type: %s' % result_type)
1312
e92caff5 1313 def _ensure_dir_exists(self, path):
1314 return make_dir(path, self.report_error)
1315
30a074c2 1316 def __process_playlist(self, ie_result, download):
1317 # We process each entry in the playlist
1318 playlist = ie_result.get('title') or ie_result.get('id')
1319 self.to_screen('[download] Downloading playlist: %s' % playlist)
1320
498f5606 1321 if 'entries' not in ie_result:
1322 raise EntryNotInPlaylist()
1323 incomplete_entries = bool(ie_result.get('requested_entries'))
1324 if incomplete_entries:
1325 def fill_missing_entries(entries, indexes):
1326 ret = [None] * max(*indexes)
1327 for i, entry in zip(indexes, entries):
1328 ret[i - 1] = entry
1329 return ret
1330 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1331
30a074c2 1332 playlist_results = []
1333
56a8fb4f 1334 playliststart = self.params.get('playliststart', 1)
30a074c2 1335 playlistend = self.params.get('playlistend')
1336 # For backwards compatibility, interpret -1 as whole list
1337 if playlistend == -1:
1338 playlistend = None
1339
1340 playlistitems_str = self.params.get('playlist_items')
1341 playlistitems = None
1342 if playlistitems_str is not None:
1343 def iter_playlistitems(format):
1344 for string_segment in format.split(','):
1345 if '-' in string_segment:
1346 start, end = string_segment.split('-')
1347 for item in range(int(start), int(end) + 1):
1348 yield int(item)
1349 else:
1350 yield int(string_segment)
1351 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1352
1353 ie_entries = ie_result['entries']
56a8fb4f 1354 msg = (
1355 'Downloading %d videos' if not isinstance(ie_entries, list)
1356 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1357 if not isinstance(ie_entries, (list, PagedList)):
1358 ie_entries = LazyList(ie_entries)
1359
1360 entries = []
1361 for i in playlistitems or itertools.count(playliststart):
1362 if playlistitems is None and playlistend is not None and playlistend < i:
1363 break
1364 entry = None
1365 try:
1366 entry = ie_entries[i - 1]
1367 if entry is None:
498f5606 1368 raise EntryNotInPlaylist()
56a8fb4f 1369 except (IndexError, EntryNotInPlaylist):
1370 if incomplete_entries:
1371 raise EntryNotInPlaylist()
1372 elif not playlistitems:
1373 break
1374 entries.append(entry)
120fe513 1375 try:
1376 if entry is not None:
1377 self._match_entry(entry, incomplete=True, silent=True)
1378 except (ExistingVideoReached, RejectedVideoReached):
1379 break
56a8fb4f 1380 ie_result['entries'] = entries
30a074c2 1381
56a8fb4f 1382 # Save playlist_index before re-ordering
1383 entries = [
1384 ((playlistitems[i - 1] if playlistitems else i), entry)
1385 for i, entry in enumerate(entries, 1)
1386 if entry is not None]
1387 n_entries = len(entries)
498f5606 1388
498f5606 1389 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1390 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1391 ie_result['requested_entries'] = playlistitems
1392
1393 if self.params.get('allow_playlist_files', True):
1394 ie_copy = {
1395 'playlist': playlist,
1396 'playlist_id': ie_result.get('id'),
1397 'playlist_title': ie_result.get('title'),
1398 'playlist_uploader': ie_result.get('uploader'),
1399 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1400 'playlist_index': 0,
498f5606 1401 }
1402 ie_copy.update(dict(ie_result))
1403
1404 if self.params.get('writeinfojson', False):
1405 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1406 if not self._ensure_dir_exists(encodeFilename(infofn)):
1407 return
1408 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1409 self.to_screen('[info] Playlist metadata is already present')
1410 else:
1411 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1412 try:
1413 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1414 except (OSError, IOError):
1415 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1416
681de68e 1417 # TODO: This should be passed to ThumbnailsConvertor if necessary
1418 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1419
498f5606 1420 if self.params.get('writedescription', False):
1421 descfn = self.prepare_filename(ie_copy, 'pl_description')
1422 if not self._ensure_dir_exists(encodeFilename(descfn)):
1423 return
1424 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1425 self.to_screen('[info] Playlist description is already present')
1426 elif ie_result.get('description') is None:
1427 self.report_warning('There\'s no playlist description to write.')
1428 else:
1429 try:
1430 self.to_screen('[info] Writing playlist description to: ' + descfn)
1431 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1432 descfile.write(ie_result['description'])
1433 except (OSError, IOError):
1434 self.report_error('Cannot write playlist description file ' + descfn)
1435 return
30a074c2 1436
1437 if self.params.get('playlistreverse', False):
1438 entries = entries[::-1]
30a074c2 1439 if self.params.get('playlistrandom', False):
1440 random.shuffle(entries)
1441
1442 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1443
56a8fb4f 1444 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1445 failures = 0
1446 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1447 for i, entry_tuple in enumerate(entries, 1):
1448 playlist_index, entry = entry_tuple
53ed7066 1449 if 'playlist_index' in self.params.get('compat_options', []):
1450 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1451 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1452 # This __x_forwarded_for_ip thing is a bit ugly but requires
1453 # minimal changes
1454 if x_forwarded_for:
1455 entry['__x_forwarded_for_ip'] = x_forwarded_for
1456 extra = {
1457 'n_entries': n_entries,
f59ae581 1458 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1459 'playlist_index': playlist_index,
1460 'playlist_autonumber': i,
30a074c2 1461 'playlist': playlist,
1462 'playlist_id': ie_result.get('id'),
1463 'playlist_title': ie_result.get('title'),
1464 'playlist_uploader': ie_result.get('uploader'),
1465 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1466 'extractor': ie_result['extractor'],
1467 'webpage_url': ie_result['webpage_url'],
1468 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1469 'extractor_key': ie_result['extractor_key'],
1470 }
1471
1472 if self._match_entry(entry, incomplete=True) is not None:
1473 continue
1474
1475 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1476 if not entry_result:
1477 failures += 1
1478 if failures >= max_failures:
1479 self.report_error(
1480 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1481 break
30a074c2 1482 # TODO: skip failed (empty) entries?
1483 playlist_results.append(entry_result)
1484 ie_result['entries'] = playlist_results
1485 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1486 return ie_result
1487
a0566bbf 1488 @__handle_extraction_exceptions
1489 def __process_iterable_entry(self, entry, download, extra_info):
1490 return self.process_ie_result(
1491 entry, download=download, extra_info=extra_info)
1492
67134eab
JMF
1493 def _build_format_filter(self, filter_spec):
1494 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1495
1496 OPERATORS = {
1497 '<': operator.lt,
1498 '<=': operator.le,
1499 '>': operator.gt,
1500 '>=': operator.ge,
1501 '=': operator.eq,
1502 '!=': operator.ne,
1503 }
67134eab 1504 operator_rex = re.compile(r'''(?x)\s*
187986a8 1505 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1506 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1507 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1508 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1509 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1510 if m:
1511 try:
1512 comparison_value = int(m.group('value'))
1513 except ValueError:
1514 comparison_value = parse_filesize(m.group('value'))
1515 if comparison_value is None:
1516 comparison_value = parse_filesize(m.group('value') + 'B')
1517 if comparison_value is None:
1518 raise ValueError(
1519 'Invalid value %r in format specification %r' % (
67134eab 1520 m.group('value'), filter_spec))
9ddb6925
S
1521 op = OPERATORS[m.group('op')]
1522
083c9df9 1523 if not m:
9ddb6925
S
1524 STR_OPERATORS = {
1525 '=': operator.eq,
10d33b34
YCH
1526 '^=': lambda attr, value: attr.startswith(value),
1527 '$=': lambda attr, value: attr.endswith(value),
1528 '*=': lambda attr, value: value in attr,
9ddb6925 1529 }
187986a8 1530 str_operator_rex = re.compile(r'''(?x)\s*
1531 (?P<key>[a-zA-Z0-9._-]+)\s*
1532 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1533 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1534 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1535 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1536 if m:
1537 comparison_value = m.group('value')
2cc779f4
S
1538 str_op = STR_OPERATORS[m.group('op')]
1539 if m.group('negation'):
e118a879 1540 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1541 else:
1542 op = str_op
083c9df9 1543
9ddb6925 1544 if not m:
187986a8 1545 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1546
1547 def _filter(f):
1548 actual_value = f.get(m.group('key'))
1549 if actual_value is None:
1550 return m.group('none_inclusive')
1551 return op(actual_value, comparison_value)
67134eab
JMF
1552 return _filter
1553
0017d9ad 1554 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1555
af0f7428
S
1556 def can_merge():
1557 merger = FFmpegMergerPP(self)
1558 return merger.available and merger.can_merge()
1559
91ebc640 1560 prefer_best = (
1561 not self.params.get('simulate', False)
1562 and download
1563 and (
1564 not can_merge()
19807826 1565 or info_dict.get('is_live', False)
de6000d9 1566 or self.outtmpl_dict['default'] == '-'))
53ed7066 1567 compat = (
1568 prefer_best
1569 or self.params.get('allow_multiple_audio_streams', False)
1570 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1571
1572 return (
53ed7066 1573 'best/bestvideo+bestaudio' if prefer_best
1574 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1575 else 'bestvideo+bestaudio/best')
0017d9ad 1576
67134eab
JMF
1577 def build_format_selector(self, format_spec):
1578 def syntax_error(note, start):
1579 message = (
1580 'Invalid format specification: '
1581 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1582 return SyntaxError(message)
1583
1584 PICKFIRST = 'PICKFIRST'
1585 MERGE = 'MERGE'
1586 SINGLE = 'SINGLE'
0130afb7 1587 GROUP = 'GROUP'
67134eab
JMF
1588 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1589
91ebc640 1590 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1591 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1592
e8e73840 1593 check_formats = self.params.get('check_formats')
1594
67134eab
JMF
1595 def _parse_filter(tokens):
1596 filter_parts = []
1597 for type, string, start, _, _ in tokens:
1598 if type == tokenize.OP and string == ']':
1599 return ''.join(filter_parts)
1600 else:
1601 filter_parts.append(string)
1602
232541df 1603 def _remove_unused_ops(tokens):
17cc1534 1604 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1605 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1606 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1607 last_string, last_start, last_end, last_line = None, None, None, None
1608 for type, string, start, end, line in tokens:
1609 if type == tokenize.OP and string == '[':
1610 if last_string:
1611 yield tokenize.NAME, last_string, last_start, last_end, last_line
1612 last_string = None
1613 yield type, string, start, end, line
1614 # everything inside brackets will be handled by _parse_filter
1615 for type, string, start, end, line in tokens:
1616 yield type, string, start, end, line
1617 if type == tokenize.OP and string == ']':
1618 break
1619 elif type == tokenize.OP and string in ALLOWED_OPS:
1620 if last_string:
1621 yield tokenize.NAME, last_string, last_start, last_end, last_line
1622 last_string = None
1623 yield type, string, start, end, line
1624 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1625 if not last_string:
1626 last_string = string
1627 last_start = start
1628 last_end = end
1629 else:
1630 last_string += string
1631 if last_string:
1632 yield tokenize.NAME, last_string, last_start, last_end, last_line
1633
cf2ac6df 1634 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1635 selectors = []
1636 current_selector = None
1637 for type, string, start, _, _ in tokens:
1638 # ENCODING is only defined in python 3.x
1639 if type == getattr(tokenize, 'ENCODING', None):
1640 continue
1641 elif type in [tokenize.NAME, tokenize.NUMBER]:
1642 current_selector = FormatSelector(SINGLE, string, [])
1643 elif type == tokenize.OP:
cf2ac6df
JMF
1644 if string == ')':
1645 if not inside_group:
1646 # ')' will be handled by the parentheses group
1647 tokens.restore_last_token()
67134eab 1648 break
cf2ac6df 1649 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1650 tokens.restore_last_token()
1651 break
cf2ac6df
JMF
1652 elif inside_choice and string == ',':
1653 tokens.restore_last_token()
1654 break
1655 elif string == ',':
0a31a350
JMF
1656 if not current_selector:
1657 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1658 selectors.append(current_selector)
1659 current_selector = None
1660 elif string == '/':
d96d604e
JMF
1661 if not current_selector:
1662 raise syntax_error('"/" must follow a format selector', start)
67134eab 1663 first_choice = current_selector
cf2ac6df 1664 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1665 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1666 elif string == '[':
1667 if not current_selector:
1668 current_selector = FormatSelector(SINGLE, 'best', [])
1669 format_filter = _parse_filter(tokens)
1670 current_selector.filters.append(format_filter)
0130afb7
JMF
1671 elif string == '(':
1672 if current_selector:
1673 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1674 group = _parse_format_selection(tokens, inside_group=True)
1675 current_selector = FormatSelector(GROUP, group, [])
67134eab 1676 elif string == '+':
d03cfdce 1677 if not current_selector:
1678 raise syntax_error('Unexpected "+"', start)
1679 selector_1 = current_selector
1680 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1681 if not selector_2:
1682 raise syntax_error('Expected a selector', start)
1683 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1684 else:
1685 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1686 elif type == tokenize.ENDMARKER:
1687 break
1688 if current_selector:
1689 selectors.append(current_selector)
1690 return selectors
1691
f8d4ad9a 1692 def _merge(formats_pair):
1693 format_1, format_2 = formats_pair
1694
1695 formats_info = []
1696 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1697 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1698
1699 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1700 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1701 for (i, fmt_info) in enumerate(formats_info):
551f9388 1702 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1703 formats_info.pop(i)
1704 continue
1705 for aud_vid in ['audio', 'video']:
f8d4ad9a 1706 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1707 if get_no_more[aud_vid]:
1708 formats_info.pop(i)
1709 get_no_more[aud_vid] = True
1710
1711 if len(formats_info) == 1:
1712 return formats_info[0]
1713
1714 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1715 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1716
1717 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1718 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1719
1720 output_ext = self.params.get('merge_output_format')
1721 if not output_ext:
1722 if the_only_video:
1723 output_ext = the_only_video['ext']
1724 elif the_only_audio and not video_fmts:
1725 output_ext = the_only_audio['ext']
1726 else:
1727 output_ext = 'mkv'
1728
1729 new_dict = {
1730 'requested_formats': formats_info,
1731 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1732 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1733 'ext': output_ext,
1734 }
1735
1736 if the_only_video:
1737 new_dict.update({
1738 'width': the_only_video.get('width'),
1739 'height': the_only_video.get('height'),
1740 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1741 'fps': the_only_video.get('fps'),
1742 'vcodec': the_only_video.get('vcodec'),
1743 'vbr': the_only_video.get('vbr'),
1744 'stretched_ratio': the_only_video.get('stretched_ratio'),
1745 })
1746
1747 if the_only_audio:
1748 new_dict.update({
1749 'acodec': the_only_audio.get('acodec'),
1750 'abr': the_only_audio.get('abr'),
1751 })
1752
1753 return new_dict
1754
e8e73840 1755 def _check_formats(formats):
1756 for f in formats:
1757 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1758 temp_file = tempfile.NamedTemporaryFile(
1759 suffix='.tmp', delete=False,
1760 dir=self.get_output_path('temp') or None)
1761 temp_file.close()
fe346461 1762 try:
21cd8fae 1763 dl, _ = self.dl(temp_file.name, f, test=True)
fe346461 1764 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1765 dl = False
1766 finally:
21cd8fae 1767 if os.path.exists(temp_file.name):
1768 try:
1769 os.remove(temp_file.name)
1770 except OSError:
1771 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
e8e73840 1772 if dl:
1773 yield f
1774 else:
1775 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1776
67134eab 1777 def _build_selector_function(selector):
909d24dd 1778 if isinstance(selector, list): # ,
67134eab
JMF
1779 fs = [_build_selector_function(s) for s in selector]
1780
317f7ab6 1781 def selector_function(ctx):
67134eab 1782 for f in fs:
317f7ab6 1783 for format in f(ctx):
67134eab
JMF
1784 yield format
1785 return selector_function
909d24dd 1786
1787 elif selector.type == GROUP: # ()
0130afb7 1788 selector_function = _build_selector_function(selector.selector)
909d24dd 1789
1790 elif selector.type == PICKFIRST: # /
67134eab
JMF
1791 fs = [_build_selector_function(s) for s in selector.selector]
1792
317f7ab6 1793 def selector_function(ctx):
67134eab 1794 for f in fs:
317f7ab6 1795 picked_formats = list(f(ctx))
67134eab
JMF
1796 if picked_formats:
1797 return picked_formats
1798 return []
67134eab 1799
909d24dd 1800 elif selector.type == SINGLE: # atom
598d185d 1801 format_spec = selector.selector or 'best'
909d24dd 1802
f8d4ad9a 1803 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1804 if format_spec == 'all':
1805 def selector_function(ctx):
1806 formats = list(ctx['formats'])
e8e73840 1807 if check_formats:
1808 formats = _check_formats(formats)
1809 for f in formats:
1810 yield f
f8d4ad9a 1811 elif format_spec == 'mergeall':
1812 def selector_function(ctx):
551f9388 1813 formats = ctx['formats']
1814 if check_formats:
1815 formats = list(_check_formats(formats))
e01d6aa4 1816 if not formats:
1817 return
921b76ca 1818 merged_format = formats[-1]
1819 for f in formats[-2::-1]:
f8d4ad9a 1820 merged_format = _merge((merged_format, f))
1821 yield merged_format
909d24dd 1822
1823 else:
e8e73840 1824 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1825 mobj = re.match(
1826 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1827 format_spec)
1828 if mobj is not None:
1829 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1830 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1831 format_type = (mobj.group('type') or [None])[0]
1832 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1833 format_modified = mobj.group('mod') is not None
909d24dd 1834
1835 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1836 _filter_f = (
eff63539 1837 (lambda f: f.get('%scodec' % format_type) != 'none')
1838 if format_type and format_modified # bv*, ba*, wv*, wa*
1839 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1840 if format_type # bv, ba, wv, wa
1841 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1842 if not format_modified # b, w
8326b00a 1843 else lambda f: True) # b*, w*
1844 filter_f = lambda f: _filter_f(f) and (
1845 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1846 else:
909d24dd 1847 filter_f = ((lambda f: f.get('ext') == format_spec)
1848 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1849 else (lambda f: f.get('format_id') == format_spec)) # id
1850
1851 def selector_function(ctx):
1852 formats = list(ctx['formats'])
1853 if not formats:
1854 return
1855 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1856 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1857 # for extractors with incomplete formats (audio only (soundcloud)
1858 # or video only (imgur)) best/worst will fallback to
1859 # best/worst {video,audio}-only format
e8e73840 1860 matches = formats
1861 if format_reverse:
1862 matches = matches[::-1]
1863 if check_formats:
1864 matches = list(itertools.islice(_check_formats(matches), format_idx))
1865 n = len(matches)
1866 if -n <= format_idx - 1 < n:
1867 yield matches[format_idx - 1]
909d24dd 1868
1869 elif selector.type == MERGE: # +
d03cfdce 1870 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1871
317f7ab6
S
1872 def selector_function(ctx):
1873 for pair in itertools.product(
d03cfdce 1874 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1875 yield _merge(pair)
083c9df9 1876
67134eab 1877 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1878
317f7ab6
S
1879 def final_selector(ctx):
1880 ctx_copy = copy.deepcopy(ctx)
67134eab 1881 for _filter in filters:
317f7ab6
S
1882 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1883 return selector_function(ctx_copy)
67134eab 1884 return final_selector
083c9df9 1885
67134eab 1886 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1887 try:
232541df 1888 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1889 except tokenize.TokenError:
1890 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1891
1892 class TokenIterator(object):
1893 def __init__(self, tokens):
1894 self.tokens = tokens
1895 self.counter = 0
1896
1897 def __iter__(self):
1898 return self
1899
1900 def __next__(self):
1901 if self.counter >= len(self.tokens):
1902 raise StopIteration()
1903 value = self.tokens[self.counter]
1904 self.counter += 1
1905 return value
1906
1907 next = __next__
1908
1909 def restore_last_token(self):
1910 self.counter -= 1
1911
1912 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1913 return _build_selector_function(parsed_selector)
a9c58ad9 1914
e5660ee6
JMF
1915 def _calc_headers(self, info_dict):
1916 res = std_headers.copy()
1917
1918 add_headers = info_dict.get('http_headers')
1919 if add_headers:
1920 res.update(add_headers)
1921
1922 cookies = self._calc_cookies(info_dict)
1923 if cookies:
1924 res['Cookie'] = cookies
1925
0016b84e
S
1926 if 'X-Forwarded-For' not in res:
1927 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1928 if x_forwarded_for_ip:
1929 res['X-Forwarded-For'] = x_forwarded_for_ip
1930
e5660ee6
JMF
1931 return res
1932
1933 def _calc_cookies(self, info_dict):
5c2266df 1934 pr = sanitized_Request(info_dict['url'])
e5660ee6 1935 self.cookiejar.add_cookie_header(pr)
662435f7 1936 return pr.get_header('Cookie')
e5660ee6 1937
b0249bca 1938 def _sanitize_thumbnails(self, info_dict):
bc516a3f 1939 thumbnails = info_dict.get('thumbnails')
1940 if thumbnails is None:
1941 thumbnail = info_dict.get('thumbnail')
1942 if thumbnail:
1943 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1944 if thumbnails:
1945 thumbnails.sort(key=lambda t: (
1946 t.get('preference') if t.get('preference') is not None else -1,
1947 t.get('width') if t.get('width') is not None else -1,
1948 t.get('height') if t.get('height') is not None else -1,
1949 t.get('id') if t.get('id') is not None else '',
1950 t.get('url')))
b0249bca 1951
1952 def test_thumbnail(t):
1953 self.to_screen('[info] Testing thumbnail %s' % t['id'])
1954 try:
1955 self.urlopen(HEADRequest(t['url']))
1956 except network_exceptions as err:
1957 self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1958 t['id'], t['url'], error_to_compat_str(err)))
1959 return False
1960 return True
1961
bc516a3f 1962 for i, t in enumerate(thumbnails):
bc516a3f 1963 if t.get('id') is None:
1964 t['id'] = '%d' % i
b0249bca 1965 if t.get('width') and t.get('height'):
1966 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1967 t['url'] = sanitize_url(t['url'])
1968 if self.params.get('check_formats'):
1969 info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1])))
bc516a3f 1970
dd82ffea
JMF
1971 def process_video_result(self, info_dict, download=True):
1972 assert info_dict.get('_type', 'video') == 'video'
1973
bec1fad2
PH
1974 if 'id' not in info_dict:
1975 raise ExtractorError('Missing "id" field in extractor result')
1976 if 'title' not in info_dict:
1977 raise ExtractorError('Missing "title" field in extractor result')
1978
c9969434
S
1979 def report_force_conversion(field, field_not, conversion):
1980 self.report_warning(
1981 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1982 % (field, field_not, conversion))
1983
1984 def sanitize_string_field(info, string_field):
1985 field = info.get(string_field)
1986 if field is None or isinstance(field, compat_str):
1987 return
1988 report_force_conversion(string_field, 'a string', 'string')
1989 info[string_field] = compat_str(field)
1990
1991 def sanitize_numeric_fields(info):
1992 for numeric_field in self._NUMERIC_FIELDS:
1993 field = info.get(numeric_field)
1994 if field is None or isinstance(field, compat_numeric_types):
1995 continue
1996 report_force_conversion(numeric_field, 'numeric', 'int')
1997 info[numeric_field] = int_or_none(field)
1998
1999 sanitize_string_field(info_dict, 'id')
2000 sanitize_numeric_fields(info_dict)
be6217b2 2001
dd82ffea
JMF
2002 if 'playlist' not in info_dict:
2003 # It isn't part of a playlist
2004 info_dict['playlist'] = None
2005 info_dict['playlist_index'] = None
2006
bc516a3f 2007 self._sanitize_thumbnails(info_dict)
d5519808 2008
b7b72db9 2009 if self.params.get('list_thumbnails'):
2010 self.list_thumbnails(info_dict)
2011 return
2012
536a55da 2013 thumbnail = info_dict.get('thumbnail')
bc516a3f 2014 thumbnails = info_dict.get('thumbnails')
536a55da
S
2015 if thumbnail:
2016 info_dict['thumbnail'] = sanitize_url(thumbnail)
2017 elif thumbnails:
d5519808
PH
2018 info_dict['thumbnail'] = thumbnails[-1]['url']
2019
c9ae7b95 2020 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
2021 info_dict['display_id'] = info_dict['id']
2022
10db0d2f 2023 for ts_key, date_key in (
2024 ('timestamp', 'upload_date'),
2025 ('release_timestamp', 'release_date'),
2026 ):
2027 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2028 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2029 # see http://bugs.python.org/issue1646728)
2030 try:
2031 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2032 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2033 except (ValueError, OverflowError, OSError):
2034 pass
9d2ecdbc 2035
33d2fc2f
S
2036 # Auto generate title fields corresponding to the *_number fields when missing
2037 # in order to always have clean titles. This is very common for TV series.
2038 for field in ('chapter', 'season', 'episode'):
2039 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2040 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2041
05108a49
S
2042 for cc_kind in ('subtitles', 'automatic_captions'):
2043 cc = info_dict.get(cc_kind)
2044 if cc:
2045 for _, subtitle in cc.items():
2046 for subtitle_format in subtitle:
2047 if subtitle_format.get('url'):
2048 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2049 if subtitle_format.get('ext') is None:
2050 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2051
2052 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2053 subtitles = info_dict.get('subtitles')
4bba3716 2054
a504ced0 2055 if self.params.get('listsubtitles', False):
360e1ca5 2056 if 'automatic_captions' in info_dict:
05108a49
S
2057 self.list_subtitles(
2058 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2059 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2060 return
05108a49 2061
360e1ca5 2062 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2063 info_dict['id'], subtitles, automatic_captions)
a504ced0 2064
dd82ffea
JMF
2065 # We now pick which formats have to be downloaded
2066 if info_dict.get('formats') is None:
2067 # There's only one format available
2068 formats = [info_dict]
2069 else:
2070 formats = info_dict['formats']
2071
db95dc13 2072 if not formats:
b7da73eb 2073 if not self.params.get('ignore_no_formats_error'):
2074 raise ExtractorError('No video formats found!')
2075 else:
2076 self.report_warning('No video formats found!')
db95dc13 2077
73af5cc8
S
2078 def is_wellformed(f):
2079 url = f.get('url')
a5ac0c47 2080 if not url:
73af5cc8
S
2081 self.report_warning(
2082 '"url" field is missing or empty - skipping format, '
2083 'there is an error in extractor')
a5ac0c47
S
2084 return False
2085 if isinstance(url, bytes):
2086 sanitize_string_field(f, 'url')
2087 return True
73af5cc8
S
2088
2089 # Filter out malformed formats for better extraction robustness
2090 formats = list(filter(is_wellformed, formats))
2091
181c7053
S
2092 formats_dict = {}
2093
dd82ffea 2094 # We check that all the formats have the format and format_id fields
db95dc13 2095 for i, format in enumerate(formats):
c9969434
S
2096 sanitize_string_field(format, 'format_id')
2097 sanitize_numeric_fields(format)
dcf77cf1 2098 format['url'] = sanitize_url(format['url'])
e74e3b63 2099 if not format.get('format_id'):
8016c922 2100 format['format_id'] = compat_str(i)
e2effb08
S
2101 else:
2102 # Sanitize format_id from characters used in format selector expression
ec85ded8 2103 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2104 format_id = format['format_id']
2105 if format_id not in formats_dict:
2106 formats_dict[format_id] = []
2107 formats_dict[format_id].append(format)
2108
2109 # Make sure all formats have unique format_id
2110 for format_id, ambiguous_formats in formats_dict.items():
2111 if len(ambiguous_formats) > 1:
2112 for i, format in enumerate(ambiguous_formats):
2113 format['format_id'] = '%s-%d' % (format_id, i)
2114
2115 for i, format in enumerate(formats):
8c51aa65 2116 if format.get('format') is None:
6febd1c1 2117 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2118 id=format['format_id'],
2119 res=self.format_resolution(format),
6febd1c1 2120 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2121 )
c1002e96 2122 # Automatically determine file extension if missing
5b1d8575 2123 if format.get('ext') is None:
cce929ea 2124 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2125 # Automatically determine protocol if missing (useful for format
2126 # selection purposes)
6f0be937 2127 if format.get('protocol') is None:
b5559424 2128 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2129 # Add HTTP headers, so that external programs can use them from the
2130 # json output
2131 full_format_info = info_dict.copy()
2132 full_format_info.update(format)
2133 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2134 # Remove private housekeeping stuff
2135 if '__x_forwarded_for_ip' in info_dict:
2136 del info_dict['__x_forwarded_for_ip']
dd82ffea 2137
4bcc7bd1 2138 # TODO Central sorting goes here
99e206d5 2139
b7da73eb 2140 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2141 # only set the 'formats' fields if the original info_dict list them
2142 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2143 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2144 # which can't be exported to json
b3d9ef88 2145 info_dict['formats'] = formats
4ec82a72 2146
2147 info_dict, _ = self.pre_process(info_dict)
2148
cfb56d1a 2149 if self.params.get('listformats'):
b7da73eb 2150 if not info_dict.get('formats'):
2151 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2152 self.list_formats(info_dict)
2153 return
2154
187986a8 2155 format_selector = self.format_selector
2156 if format_selector is None:
0017d9ad 2157 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2158 self.write_debug('Default format spec: %s' % req_format)
187986a8 2159 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2160
2161 # While in format selection we may need to have an access to the original
2162 # format set in order to calculate some metrics or do some processing.
2163 # For now we need to be able to guess whether original formats provided
2164 # by extractor are incomplete or not (i.e. whether extractor provides only
2165 # video-only or audio-only formats) for proper formats selection for
2166 # extractors with such incomplete formats (see
067aa17e 2167 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2168 # Since formats may be filtered during format selection and may not match
2169 # the original formats the results may be incorrect. Thus original formats
2170 # or pre-calculated metrics should be passed to format selection routines
2171 # as well.
2172 # We will pass a context object containing all necessary additional data
2173 # instead of just formats.
2174 # This fixes incorrect format selection issue (see
067aa17e 2175 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2176 incomplete_formats = (
317f7ab6 2177 # All formats are video-only or
3089bc74 2178 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2179 # all formats are audio-only
3089bc74 2180 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2181
2182 ctx = {
2183 'formats': formats,
2184 'incomplete_formats': incomplete_formats,
2185 }
2186
2187 formats_to_download = list(format_selector(ctx))
dd82ffea 2188 if not formats_to_download:
b7da73eb 2189 if not self.params.get('ignore_no_formats_error'):
2190 raise ExtractorError('Requested format is not available', expected=True)
2191 else:
2192 self.report_warning('Requested format is not available')
2193 elif download:
2194 self.to_screen(
07cce701 2195 '[info] %s: Downloading %d format(s): %s' % (
2196 info_dict['id'], len(formats_to_download),
2197 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2198 for fmt in formats_to_download:
dd82ffea 2199 new_info = dict(info_dict)
4ec82a72 2200 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2201 new_info['__original_infodict'] = info_dict
b7da73eb 2202 new_info.update(fmt)
dd82ffea
JMF
2203 self.process_info(new_info)
2204 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2205 if formats_to_download:
2206 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2207 return info_dict
2208
98c70d6f 2209 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2210 """Select the requested subtitles and their format"""
98c70d6f
JMF
2211 available_subs = {}
2212 if normal_subtitles and self.params.get('writesubtitles'):
2213 available_subs.update(normal_subtitles)
2214 if automatic_captions and self.params.get('writeautomaticsub'):
2215 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2216 if lang not in available_subs:
2217 available_subs[lang] = cap_info
2218
4d171848
JMF
2219 if (not self.params.get('writesubtitles') and not
2220 self.params.get('writeautomaticsub') or not
2221 available_subs):
2222 return None
a504ced0 2223
c32b0aab 2224 all_sub_langs = available_subs.keys()
a504ced0 2225 if self.params.get('allsubtitles', False):
c32b0aab 2226 requested_langs = all_sub_langs
2227 elif self.params.get('subtitleslangs', False):
2228 requested_langs = set()
2229 for lang in self.params.get('subtitleslangs'):
2230 if lang == 'all':
2231 requested_langs.update(all_sub_langs)
2232 continue
2233 discard = lang[0] == '-'
2234 if discard:
2235 lang = lang[1:]
2236 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2237 if discard:
2238 for lang in current_langs:
2239 requested_langs.discard(lang)
2240 else:
2241 requested_langs.update(current_langs)
2242 elif 'en' in available_subs:
2243 requested_langs = ['en']
a504ced0 2244 else:
c32b0aab 2245 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2246 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2247
2248 formats_query = self.params.get('subtitlesformat', 'best')
2249 formats_preference = formats_query.split('/') if formats_query else []
2250 subs = {}
2251 for lang in requested_langs:
2252 formats = available_subs.get(lang)
2253 if formats is None:
2254 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2255 continue
a504ced0
JMF
2256 for ext in formats_preference:
2257 if ext == 'best':
2258 f = formats[-1]
2259 break
2260 matches = list(filter(lambda f: f['ext'] == ext, formats))
2261 if matches:
2262 f = matches[-1]
2263 break
2264 else:
2265 f = formats[-1]
2266 self.report_warning(
2267 'No subtitle format found matching "%s" for language %s, '
2268 'using %s' % (formats_query, lang, f['ext']))
2269 subs[lang] = f
2270 return subs
2271
d06daf23 2272 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2273 def print_mandatory(field, actual_field=None):
2274 if actual_field is None:
2275 actual_field = field
d06daf23 2276 if (self.params.get('force%s' % field, False)
53c18592 2277 and (not incomplete or info_dict.get(actual_field) is not None)):
2278 self.to_stdout(info_dict[actual_field])
d06daf23
S
2279
2280 def print_optional(field):
2281 if (self.params.get('force%s' % field, False)
2282 and info_dict.get(field) is not None):
2283 self.to_stdout(info_dict[field])
2284
53c18592 2285 info_dict = info_dict.copy()
2286 if filename is not None:
2287 info_dict['filename'] = filename
2288 if info_dict.get('requested_formats') is not None:
2289 # For RTMP URLs, also include the playpath
2290 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2291 elif 'url' in info_dict:
2292 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2293
2294 for tmpl in self.params.get('forceprint', []):
2295 if re.match(r'\w+$', tmpl):
2296 tmpl = '%({})s'.format(tmpl)
2297 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2298 self.to_stdout(tmpl % info_copy)
2299
d06daf23
S
2300 print_mandatory('title')
2301 print_mandatory('id')
53c18592 2302 print_mandatory('url', 'urls')
d06daf23
S
2303 print_optional('thumbnail')
2304 print_optional('description')
53c18592 2305 print_optional('filename')
d06daf23
S
2306 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2307 self.to_stdout(formatSeconds(info_dict['duration']))
2308 print_mandatory('format')
53c18592 2309
d06daf23 2310 if self.params.get('forcejson', False):
277d6ff5 2311 self.post_extract(info_dict)
75d43ca0 2312 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2313
e8e73840 2314 def dl(self, name, info, subtitle=False, test=False):
2315
2316 if test:
2317 verbose = self.params.get('verbose')
2318 params = {
2319 'test': True,
2320 'quiet': not verbose,
2321 'verbose': verbose,
2322 'noprogress': not verbose,
2323 'nopart': True,
2324 'skip_unavailable_fragments': False,
2325 'keep_fragments': False,
2326 'overwrites': True,
2327 '_no_ytdl_file': True,
2328 }
2329 else:
2330 params = self.params
2331 fd = get_suitable_downloader(info, params)(self, params)
2332 if not test:
2333 for ph in self._progress_hooks:
2334 fd.add_progress_hook(ph)
18e674b4 2335 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2336 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2337 new_info = dict(info)
2338 if new_info.get('http_headers') is None:
2339 new_info['http_headers'] = self._calc_headers(new_info)
2340 return fd.download(name, new_info, subtitle)
2341
8222d8de
JMF
2342 def process_info(self, info_dict):
2343 """Process a single resolved IE result."""
2344
2345 assert info_dict.get('_type', 'video') == 'video'
fd288278 2346
0202b52a 2347 info_dict.setdefault('__postprocessors', [])
2348
fd288278
PH
2349 max_downloads = self.params.get('max_downloads')
2350 if max_downloads is not None:
2351 if self._num_downloads >= int(max_downloads):
2352 raise MaxDownloadsReached()
8222d8de 2353
d06daf23 2354 # TODO: backward compatibility, to be removed
8222d8de 2355 info_dict['fulltitle'] = info_dict['title']
8222d8de 2356
11b85ce6 2357 if 'format' not in info_dict:
8222d8de
JMF
2358 info_dict['format'] = info_dict['ext']
2359
c77495e3 2360 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2361 return
2362
277d6ff5 2363 self.post_extract(info_dict)
fd288278 2364 self._num_downloads += 1
8222d8de 2365
dcf64d43 2366 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2367 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2368 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2369 files_to_move = {}
8222d8de
JMF
2370
2371 # Forced printings
0202b52a 2372 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2373
8222d8de 2374 if self.params.get('simulate', False):
2d30509f 2375 if self.params.get('force_write_download_archive', False):
2376 self.record_download_archive(info_dict)
2377
2378 # Do nothing else if in simulate mode
8222d8de
JMF
2379 return
2380
de6000d9 2381 if full_filename is None:
8222d8de
JMF
2382 return
2383
e92caff5 2384 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2385 return
e92caff5 2386 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2387 return
2388
2389 if self.params.get('writedescription', False):
de6000d9 2390 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2391 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2392 return
0c3d0f51 2393 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2394 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2395 elif info_dict.get('description') is None:
2396 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2397 else:
2398 try:
6febd1c1 2399 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2400 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2401 descfile.write(info_dict['description'])
7b6fefc9 2402 except (OSError, IOError):
6febd1c1 2403 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2404 return
8222d8de 2405
1fb07d10 2406 if self.params.get('writeannotations', False):
de6000d9 2407 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2408 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2409 return
0c3d0f51 2410 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2411 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2412 elif not info_dict.get('annotations'):
2413 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2414 else:
2415 try:
6febd1c1 2416 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2417 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2418 annofile.write(info_dict['annotations'])
2419 except (KeyError, TypeError):
6febd1c1 2420 self.report_warning('There are no annotations to write.')
7b6fefc9 2421 except (OSError, IOError):
6febd1c1 2422 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2423 return
1fb07d10 2424
c4a91be7 2425 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2426 self.params.get('writeautomaticsub')])
c4a91be7 2427
c84dd8a9 2428 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2429 # subtitles download errors are already managed as troubles in relevant IE
2430 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2431 subtitles = info_dict['requested_subtitles']
fa57af1e 2432 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2433 for sub_lang, sub_info in subtitles.items():
2434 sub_format = sub_info['ext']
56d868db 2435 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2436 sub_filename_final = subtitles_filename(
2437 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2438 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2439 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2440 sub_info['filepath'] = sub_filename
0202b52a 2441 files_to_move[sub_filename] = sub_filename_final
a504ced0 2442 else:
0c9df79e 2443 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2444 if sub_info.get('data') is not None:
2445 try:
2446 # Use newline='' to prevent conversion of newline characters
067aa17e 2447 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2448 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2449 subfile.write(sub_info['data'])
dcf64d43 2450 sub_info['filepath'] = sub_filename
0202b52a 2451 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2452 except (OSError, IOError):
2453 self.report_error('Cannot write subtitles file ' + sub_filename)
2454 return
7b6fefc9 2455 else:
5ff1bc0c 2456 try:
e8e73840 2457 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2458 sub_info['filepath'] = sub_filename
0202b52a 2459 files_to_move[sub_filename] = sub_filename_final
fe346461 2460 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2461 self.report_warning('Unable to download subtitle for "%s": %s' %
2462 (sub_lang, error_to_compat_str(err)))
2463 continue
8222d8de 2464
8222d8de 2465 if self.params.get('writeinfojson', False):
de6000d9 2466 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2467 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2468 return
0c3d0f51 2469 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2470 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2471 else:
66c935fb 2472 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2473 try:
75d43ca0 2474 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2475 except (OSError, IOError):
66c935fb 2476 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2477 return
de6000d9 2478 info_dict['__infojson_filename'] = infofn
8222d8de 2479
56d868db 2480 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2481 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2482 thumb_filename = replace_extension(
2483 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2484 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2485
732044af 2486 # Write internet shortcut files
2487 url_link = webloc_link = desktop_link = False
2488 if self.params.get('writelink', False):
2489 if sys.platform == "darwin": # macOS.
2490 webloc_link = True
2491 elif sys.platform.startswith("linux"):
2492 desktop_link = True
2493 else: # if sys.platform in ['win32', 'cygwin']:
2494 url_link = True
2495 if self.params.get('writeurllink', False):
2496 url_link = True
2497 if self.params.get('writewebloclink', False):
2498 webloc_link = True
2499 if self.params.get('writedesktoplink', False):
2500 desktop_link = True
2501
2502 if url_link or webloc_link or desktop_link:
2503 if 'webpage_url' not in info_dict:
2504 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2505 return
2506 ascii_url = iri_to_uri(info_dict['webpage_url'])
2507
2508 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2509 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2510 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2511 self.to_screen('[info] Internet shortcut is already present')
2512 else:
2513 try:
2514 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2515 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2516 template_vars = {'url': ascii_url}
2517 if embed_filename:
2518 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2519 linkfile.write(template % template_vars)
2520 except (OSError, IOError):
2521 self.report_error('Cannot write internet shortcut ' + linkfn)
2522 return False
2523 return True
2524
2525 if url_link:
2526 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2527 return
2528 if webloc_link:
2529 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2530 return
2531 if desktop_link:
2532 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2533 return
2534
56d868db 2535 try:
2536 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2537 except PostProcessingError as err:
2538 self.report_error('Preprocessing: %s' % str(err))
2539 return
2540
732044af 2541 must_record_download_archive = False
56d868db 2542 if self.params.get('skip_download', False):
2543 info_dict['filepath'] = temp_filename
2544 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2545 info_dict['__files_to_move'] = files_to_move
2546 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2547 else:
2548 # Download
4340deca 2549 try:
0202b52a 2550
6b591b29 2551 def existing_file(*filepaths):
2552 ext = info_dict.get('ext')
2553 final_ext = self.params.get('final_ext', ext)
2554 existing_files = []
2555 for file in orderedSet(filepaths):
2556 if final_ext != ext:
2557 converted = replace_extension(file, final_ext, ext)
2558 if os.path.exists(encodeFilename(converted)):
2559 existing_files.append(converted)
2560 if os.path.exists(encodeFilename(file)):
2561 existing_files.append(file)
2562
2563 if not existing_files or self.params.get('overwrites', False):
2564 for file in orderedSet(existing_files):
2565 self.report_file_delete(file)
2566 os.remove(encodeFilename(file))
2567 return None
2568
2569 self.report_file_already_downloaded(existing_files[0])
2570 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2571 return existing_files[0]
0202b52a 2572
2573 success = True
4340deca 2574 if info_dict.get('requested_formats') is not None:
81cd954a
S
2575
2576 def compatible_formats(formats):
d03cfdce 2577 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2578 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2579 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2580 if len(video_formats) > 2 or len(audio_formats) > 2:
2581 return False
2582
81cd954a 2583 # Check extension
d03cfdce 2584 exts = set(format.get('ext') for format in formats)
2585 COMPATIBLE_EXTS = (
2586 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2587 set(('webm',)),
2588 )
2589 for ext_sets in COMPATIBLE_EXTS:
2590 if ext_sets.issuperset(exts):
2591 return True
81cd954a
S
2592 # TODO: Check acodec/vcodec
2593 return False
2594
2595 requested_formats = info_dict['requested_formats']
0202b52a 2596 old_ext = info_dict['ext']
4d971a16 2597 if self.params.get('merge_output_format') is None:
2598 if not compatible_formats(requested_formats):
2599 info_dict['ext'] = 'mkv'
2600 self.report_warning(
2601 'Requested formats are incompatible for merge and will be merged into mkv.')
2602 if (info_dict['ext'] == 'webm'
2603 and self.params.get('writethumbnail', False)
2604 and info_dict.get('thumbnails')):
2605 info_dict['ext'] = 'mkv'
2606 self.report_warning(
2607 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2608
2609 def correct_ext(filename):
2610 filename_real_ext = os.path.splitext(filename)[1][1:]
2611 filename_wo_ext = (
2612 os.path.splitext(filename)[0]
2613 if filename_real_ext == old_ext
2614 else filename)
2615 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2616
38c6902b 2617 # Ensure filename always has a correct extension for successful merge
0202b52a 2618 full_filename = correct_ext(full_filename)
2619 temp_filename = correct_ext(temp_filename)
2620 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2621 info_dict['__real_download'] = False
18e674b4 2622
2623 _protocols = set(determine_protocol(f) for f in requested_formats)
2624 if len(_protocols) == 1:
2625 info_dict['protocol'] = _protocols.pop()
2626 directly_mergable = (
2627 'no-direct-merge' not in self.params.get('compat_opts', [])
2628 and info_dict.get('protocol') is not None # All requested formats have same protocol
2629 and not self.params.get('allow_unplayable_formats')
2630 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2631 if directly_mergable:
2632 info_dict['url'] = requested_formats[0]['url']
2633 # Treat it as a single download
2634 dl_filename = existing_file(full_filename, temp_filename)
2635 if dl_filename is None:
2636 success, real_download = self.dl(temp_filename, info_dict)
2637 info_dict['__real_download'] = real_download
2638 else:
2639 downloaded = []
2640 merger = FFmpegMergerPP(self)
2641 if self.params.get('allow_unplayable_formats'):
2642 self.report_warning(
2643 'You have requested merging of multiple formats '
2644 'while also allowing unplayable formats to be downloaded. '
2645 'The formats won\'t be merged to prevent data corruption.')
2646 elif not merger.available:
2647 self.report_warning(
2648 'You have requested merging of multiple formats but ffmpeg is not installed. '
2649 'The formats won\'t be merged.')
2650
2651 if dl_filename is None:
2652 for f in requested_formats:
2653 new_info = dict(info_dict)
2654 del new_info['requested_formats']
2655 new_info.update(f)
2656 fname = prepend_extension(
2657 self.prepare_filename(new_info, 'temp'),
2658 'f%s' % f['format_id'], new_info['ext'])
2659 if not self._ensure_dir_exists(fname):
2660 return
2661 downloaded.append(fname)
2662 partial_success, real_download = self.dl(fname, new_info)
2663 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2664 success = success and partial_success
2665 if merger.available and not self.params.get('allow_unplayable_formats'):
2666 info_dict['__postprocessors'].append(merger)
2667 info_dict['__files_to_merge'] = downloaded
2668 # Even if there were no downloads, it is being merged only now
2669 info_dict['__real_download'] = True
2670 else:
2671 for file in downloaded:
2672 files_to_move[file] = None
4340deca
P
2673 else:
2674 # Just a single file
0202b52a 2675 dl_filename = existing_file(full_filename, temp_filename)
2676 if dl_filename is None:
e8e73840 2677 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2678 info_dict['__real_download'] = real_download
2679
0202b52a 2680 dl_filename = dl_filename or temp_filename
c571435f 2681 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2682
3158150c 2683 except network_exceptions as err:
7960b056 2684 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2685 return
2686 except (OSError, IOError) as err:
2687 raise UnavailableVideoError(err)
2688 except (ContentTooShortError, ) as err:
2689 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2690 return
8222d8de 2691
de6000d9 2692 if success and full_filename != '-':
f17f8651 2693
fd7cfb64 2694 def fixup():
2695 do_fixup = True
2696 fixup_policy = self.params.get('fixup')
2697 vid = info_dict['id']
2698
2699 if fixup_policy in ('ignore', 'never'):
2700 return
2701 elif fixup_policy == 'warn':
2702 do_fixup = False
f89b3e2d 2703 elif fixup_policy != 'force':
2704 assert fixup_policy in ('detect_or_warn', None)
2705 if not info_dict.get('__real_download'):
2706 do_fixup = False
fd7cfb64 2707
2708 def ffmpeg_fixup(cndn, msg, cls):
2709 if not cndn:
2710 return
2711 if not do_fixup:
2712 self.report_warning(f'{vid}: {msg}')
2713 return
2714 pp = cls(self)
2715 if pp.available:
2716 info_dict['__postprocessors'].append(pp)
2717 else:
2718 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2719
2720 stretched_ratio = info_dict.get('stretched_ratio')
2721 ffmpeg_fixup(
2722 stretched_ratio not in (1, None),
2723 f'Non-uniform pixel ratio {stretched_ratio}',
2724 FFmpegFixupStretchedPP)
2725
2726 ffmpeg_fixup(
2727 (info_dict.get('requested_formats') is None
2728 and info_dict.get('container') == 'm4a_dash'
2729 and info_dict.get('ext') == 'm4a'),
2730 'writing DASH m4a. Only some players support this container',
2731 FFmpegFixupM4aPP)
2732
2733 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2734 if 'protocol' in info_dict else None)
2735 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2736 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2737 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2738
2739 fixup()
8222d8de 2740 try:
23c1a667 2741 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2742 except PostProcessingError as err:
2743 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2744 return
ab8e5e51
AM
2745 try:
2746 for ph in self._post_hooks:
23c1a667 2747 ph(info_dict['filepath'])
ab8e5e51
AM
2748 except Exception as err:
2749 self.report_error('post hooks: %s' % str(err))
2750 return
2d30509f 2751 must_record_download_archive = True
2752
2753 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2754 self.record_download_archive(info_dict)
c3e6ffba 2755 max_downloads = self.params.get('max_downloads')
2756 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2757 raise MaxDownloadsReached()
8222d8de
JMF
2758
2759 def download(self, url_list):
2760 """Download a given list of URLs."""
de6000d9 2761 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2762 if (len(url_list) > 1
2763 and outtmpl != '-'
2764 and '%' not in outtmpl
2765 and self.params.get('max_downloads') != 1):
acd69589 2766 raise SameFileError(outtmpl)
8222d8de
JMF
2767
2768 for url in url_list:
2769 try:
5f6a1245 2770 # It also downloads the videos
61aa5ba3
S
2771 res = self.extract_info(
2772 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2773 except UnavailableVideoError:
6febd1c1 2774 self.report_error('unable to download video')
8222d8de 2775 except MaxDownloadsReached:
8b0d7497 2776 self.to_screen('[info] Maximum number of downloaded files reached')
2777 raise
2778 except ExistingVideoReached:
d83cb531 2779 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2780 raise
2781 except RejectedVideoReached:
d83cb531 2782 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2783 raise
63e0be34
PH
2784 else:
2785 if self.params.get('dump_single_json', False):
277d6ff5 2786 self.post_extract(res)
75d43ca0 2787 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2788
2789 return self._download_retcode
2790
1dcc4c0c 2791 def download_with_info_file(self, info_filename):
31bd3925
JMF
2792 with contextlib.closing(fileinput.FileInput(
2793 [info_filename], mode='r',
2794 openhook=fileinput.hook_encoded('utf-8'))) as f:
2795 # FileInput doesn't have a read method, we can't call json.load
498f5606 2796 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2797 try:
2798 self.process_ie_result(info, download=True)
498f5606 2799 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2800 webpage_url = info.get('webpage_url')
2801 if webpage_url is not None:
6febd1c1 2802 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2803 return self.download([webpage_url])
2804 else:
2805 raise
2806 return self._download_retcode
1dcc4c0c 2807
cb202fd2 2808 @staticmethod
75d43ca0 2809 def filter_requested_info(info_dict, actually_filter=True):
ae8f99e6 2810 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2811 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2812 if actually_filter:
bd99f6e6 2813 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
ae8f99e6 2814 empty_values = (None, {}, [], set(), tuple())
2815 reject = lambda k, v: k not in keep_keys and (
2816 k.startswith('_') or k in remove_keys or v in empty_values)
2817 else:
394dcd44 2818 info_dict['epoch'] = int(time.time())
ae8f99e6 2819 reject = lambda k, v: k in remove_keys
5226731e 2820 filter_fn = lambda obj: (
b0249bca 2821 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2822 else obj if not isinstance(obj, dict)
ae8f99e6 2823 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2824 return filter_fn(info_dict)
cb202fd2 2825
dcf64d43 2826 def run_pp(self, pp, infodict):
5bfa4862 2827 files_to_delete = []
dcf64d43 2828 if '__files_to_move' not in infodict:
2829 infodict['__files_to_move'] = {}
af819c21 2830 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2831 if not files_to_delete:
dcf64d43 2832 return infodict
5bfa4862 2833
2834 if self.params.get('keepvideo', False):
2835 for f in files_to_delete:
dcf64d43 2836 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2837 else:
2838 for old_filename in set(files_to_delete):
2839 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2840 try:
2841 os.remove(encodeFilename(old_filename))
2842 except (IOError, OSError):
2843 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2844 if old_filename in infodict['__files_to_move']:
2845 del infodict['__files_to_move'][old_filename]
2846 return infodict
5bfa4862 2847
277d6ff5 2848 @staticmethod
2849 def post_extract(info_dict):
2850 def actual_post_extract(info_dict):
2851 if info_dict.get('_type') in ('playlist', 'multi_video'):
2852 for video_dict in info_dict.get('entries', {}):
b050d210 2853 actual_post_extract(video_dict or {})
277d6ff5 2854 return
2855
07cce701 2856 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2857 extra = post_extractor().items()
2858 info_dict.update(extra)
07cce701 2859 info_dict.pop('__post_extractor', None)
277d6ff5 2860
4ec82a72 2861 original_infodict = info_dict.get('__original_infodict') or {}
2862 original_infodict.update(extra)
2863 original_infodict.pop('__post_extractor', None)
2864
b050d210 2865 actual_post_extract(info_dict or {})
277d6ff5 2866
56d868db 2867 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2868 info = dict(ie_info)
56d868db 2869 info['__files_to_move'] = files_to_move or {}
2870 for pp in self._pps[key]:
dcf64d43 2871 info = self.run_pp(pp, info)
56d868db 2872 return info, info.pop('__files_to_move', None)
5bfa4862 2873
dcf64d43 2874 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2875 """Run all the postprocessors on the given file."""
2876 info = dict(ie_info)
2877 info['filepath'] = filename
dcf64d43 2878 info['__files_to_move'] = files_to_move or {}
0202b52a 2879
56d868db 2880 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2881 info = self.run_pp(pp, info)
2882 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2883 del info['__files_to_move']
56d868db 2884 for pp in self._pps['after_move']:
dcf64d43 2885 info = self.run_pp(pp, info)
23c1a667 2886 return info
c1c9a79c 2887
5db07df6 2888 def _make_archive_id(self, info_dict):
e9fef7ee
S
2889 video_id = info_dict.get('id')
2890 if not video_id:
2891 return
5db07df6
PH
2892 # Future-proof against any change in case
2893 # and backwards compatibility with prior versions
e9fef7ee 2894 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2895 if extractor is None:
1211bb6d
S
2896 url = str_or_none(info_dict.get('url'))
2897 if not url:
2898 return
e9fef7ee
S
2899 # Try to find matching extractor for the URL and take its ie_key
2900 for ie in self._ies:
1211bb6d 2901 if ie.suitable(url):
e9fef7ee
S
2902 extractor = ie.ie_key()
2903 break
2904 else:
2905 return
d0757229 2906 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2907
2908 def in_download_archive(self, info_dict):
2909 fn = self.params.get('download_archive')
2910 if fn is None:
2911 return False
2912
2913 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2914 if not vid_id:
7012b23c 2915 return False # Incomplete video information
5db07df6 2916
a45e8619 2917 return vid_id in self.archive
c1c9a79c
PH
2918
2919 def record_download_archive(self, info_dict):
2920 fn = self.params.get('download_archive')
2921 if fn is None:
2922 return
5db07df6
PH
2923 vid_id = self._make_archive_id(info_dict)
2924 assert vid_id
c1c9a79c 2925 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2926 archive_file.write(vid_id + '\n')
a45e8619 2927 self.archive.add(vid_id)
dd82ffea 2928
8c51aa65 2929 @staticmethod
8abeeb94 2930 def format_resolution(format, default='unknown'):
fb04e403 2931 if format.get('vcodec') == 'none':
8326b00a 2932 if format.get('acodec') == 'none':
2933 return 'images'
fb04e403 2934 return 'audio only'
f49d89ee
PH
2935 if format.get('resolution') is not None:
2936 return format['resolution']
35615307
DA
2937 if format.get('width') and format.get('height'):
2938 res = '%dx%d' % (format['width'], format['height'])
2939 elif format.get('height'):
2940 res = '%sp' % format['height']
2941 elif format.get('width'):
388ae76b 2942 res = '%dx?' % format['width']
8c51aa65 2943 else:
8abeeb94 2944 res = default
8c51aa65
JMF
2945 return res
2946
c57f7757
PH
2947 def _format_note(self, fdict):
2948 res = ''
2949 if fdict.get('ext') in ['f4f', 'f4m']:
2950 res += '(unsupported) '
32f90364
PH
2951 if fdict.get('language'):
2952 if res:
2953 res += ' '
9016d76f 2954 res += '[%s] ' % fdict['language']
c57f7757
PH
2955 if fdict.get('format_note') is not None:
2956 res += fdict['format_note'] + ' '
2957 if fdict.get('tbr') is not None:
2958 res += '%4dk ' % fdict['tbr']
2959 if fdict.get('container') is not None:
2960 if res:
2961 res += ', '
2962 res += '%s container' % fdict['container']
3089bc74
S
2963 if (fdict.get('vcodec') is not None
2964 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2965 if res:
2966 res += ', '
2967 res += fdict['vcodec']
91c7271a 2968 if fdict.get('vbr') is not None:
c57f7757
PH
2969 res += '@'
2970 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2971 res += 'video@'
2972 if fdict.get('vbr') is not None:
2973 res += '%4dk' % fdict['vbr']
fbb21cf5 2974 if fdict.get('fps') is not None:
5d583bdf
S
2975 if res:
2976 res += ', '
2977 res += '%sfps' % fdict['fps']
c57f7757
PH
2978 if fdict.get('acodec') is not None:
2979 if res:
2980 res += ', '
2981 if fdict['acodec'] == 'none':
2982 res += 'video only'
2983 else:
2984 res += '%-5s' % fdict['acodec']
2985 elif fdict.get('abr') is not None:
2986 if res:
2987 res += ', '
2988 res += 'audio'
2989 if fdict.get('abr') is not None:
2990 res += '@%3dk' % fdict['abr']
2991 if fdict.get('asr') is not None:
2992 res += ' (%5dHz)' % fdict['asr']
2993 if fdict.get('filesize') is not None:
2994 if res:
2995 res += ', '
2996 res += format_bytes(fdict['filesize'])
9732d77e
PH
2997 elif fdict.get('filesize_approx') is not None:
2998 if res:
2999 res += ', '
3000 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3001 return res
91c7271a 3002
76d321f6 3003 def _format_note_table(self, f):
3004 def join_fields(*vargs):
3005 return ', '.join((val for val in vargs if val != ''))
3006
3007 return join_fields(
3008 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3009 format_field(f, 'language', '[%s]'),
3010 format_field(f, 'format_note'),
3011 format_field(f, 'container', ignore=(None, f.get('ext'))),
3012 format_field(f, 'asr', '%5dHz'))
3013
c57f7757 3014 def list_formats(self, info_dict):
94badb25 3015 formats = info_dict.get('formats', [info_dict])
53ed7066 3016 new_format = (
3017 'list-formats' not in self.params.get('compat_opts', [])
3018 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 3019 if new_format:
3020 table = [
3021 [
3022 format_field(f, 'format_id'),
3023 format_field(f, 'ext'),
3024 self.format_resolution(f),
3025 format_field(f, 'fps', '%d'),
3026 '|',
3027 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3028 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3029 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3030 '|',
3031 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3032 format_field(f, 'vbr', '%4dk'),
3033 format_field(f, 'acodec', default='unknown').replace('none', ''),
3034 format_field(f, 'abr', '%3dk'),
3035 format_field(f, 'asr', '%5dHz'),
3036 self._format_note_table(f)]
3037 for f in formats
3038 if f.get('preference') is None or f['preference'] >= -1000]
3039 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3040 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3041 else:
3042 table = [
3043 [
3044 format_field(f, 'format_id'),
3045 format_field(f, 'ext'),
3046 self.format_resolution(f),
3047 self._format_note(f)]
3048 for f in formats
3049 if f.get('preference') is None or f['preference'] >= -1000]
3050 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3051
cfb56d1a 3052 self.to_screen(
76d321f6 3053 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3054 header_line,
3055 table,
3056 delim=new_format,
3057 extraGap=(0 if new_format else 1),
3058 hideEmpty=new_format)))
cfb56d1a
PH
3059
3060 def list_thumbnails(self, info_dict):
b0249bca 3061 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3062 if not thumbnails:
b7b72db9 3063 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3064 return
cfb56d1a
PH
3065
3066 self.to_screen(
3067 '[info] Thumbnails for %s:' % info_dict['id'])
3068 self.to_screen(render_table(
3069 ['ID', 'width', 'height', 'URL'],
3070 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3071
360e1ca5 3072 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3073 if not subtitles:
360e1ca5 3074 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3075 return
a504ced0 3076 self.to_screen(
edab9dbf 3077 'Available %s for %s:' % (name, video_id))
2412044c 3078
3079 def _row(lang, formats):
7aee40c1 3080 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
2412044c 3081 if len(set(names)) == 1:
7aee40c1 3082 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3083 return [lang, ', '.join(names), ', '.join(exts)]
3084
edab9dbf 3085 self.to_screen(render_table(
2412044c 3086 ['Language', 'Name', 'Formats'],
3087 [_row(lang, formats) for lang, formats in subtitles.items()],
3088 hideEmpty=True))
a504ced0 3089
dca08720
PH
3090 def urlopen(self, req):
3091 """ Start an HTTP download """
82d8a8b6 3092 if isinstance(req, compat_basestring):
67dda517 3093 req = sanitized_Request(req)
19a41fc6 3094 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3095
3096 def print_debug_header(self):
3097 if not self.params.get('verbose'):
3098 return
62fec3b2 3099
4192b51c 3100 if type('') is not compat_str:
067aa17e 3101 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3102 self.report_warning(
3103 'Your Python is broken! Update to a newer and supported version')
3104
c6afed48
PH
3105 stdout_encoding = getattr(
3106 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3107 encoding_str = (
734f90bb
PH
3108 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3109 locale.getpreferredencoding(),
3110 sys.getfilesystemencoding(),
c6afed48 3111 stdout_encoding,
b0472057 3112 self.get_encoding()))
4192b51c 3113 write_string(encoding_str, encoding=None)
734f90bb 3114
e5813e53 3115 source = (
3116 '(exe)' if hasattr(sys, 'frozen')
3117 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3118 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3119 else '')
3120 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3121 if _LAZY_LOADER:
f74980cb 3122 self._write_string('[debug] Lazy loading extractors enabled\n')
3123 if _PLUGIN_CLASSES:
3124 self._write_string(
3125 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3126 if self.params.get('compat_opts'):
3127 self._write_string(
3128 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3129 try:
3130 sp = subprocess.Popen(
3131 ['git', 'rev-parse', '--short', 'HEAD'],
3132 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3133 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3134 out, err = process_communicate_or_kill(sp)
dca08720
PH
3135 out = out.decode().strip()
3136 if re.match('[0-9a-f]+', out):
f74980cb 3137 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3138 except Exception:
dca08720
PH
3139 try:
3140 sys.exc_clear()
70a1165b 3141 except Exception:
dca08720 3142 pass
b300cda4
S
3143
3144 def python_implementation():
3145 impl_name = platform.python_implementation()
3146 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3147 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3148 return impl_name
3149
e5813e53 3150 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3151 platform.python_version(),
3152 python_implementation(),
3153 platform.architecture()[0],
b300cda4 3154 platform_name()))
d28b5171 3155
73fac4e9 3156 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3157 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3158 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3159 exe_str = ', '.join(
3160 '%s %s' % (exe, v)
3161 for exe, v in sorted(exe_versions.items())
3162 if v
3163 )
3164 if not exe_str:
3165 exe_str = 'none'
3166 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3167
3168 proxy_map = {}
3169 for handler in self._opener.handlers:
3170 if hasattr(handler, 'proxies'):
3171 proxy_map.update(handler.proxies)
734f90bb 3172 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3173
58b1f00d
PH
3174 if self.params.get('call_home', False):
3175 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3176 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3177 return
58b1f00d
PH
3178 latest_version = self.urlopen(
3179 'https://yt-dl.org/latest/version').read().decode('utf-8')
3180 if version_tuple(latest_version) > version_tuple(__version__):
3181 self.report_warning(
3182 'You are using an outdated version (newest version: %s)! '
3183 'See https://yt-dl.org/update if you need help updating.' %
3184 latest_version)
3185
e344693b 3186 def _setup_opener(self):
6ad14cab 3187 timeout_val = self.params.get('socket_timeout')
19a41fc6 3188 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3189
dca08720
PH
3190 opts_cookiefile = self.params.get('cookiefile')
3191 opts_proxy = self.params.get('proxy')
3192
3193 if opts_cookiefile is None:
3194 self.cookiejar = compat_cookiejar.CookieJar()
3195 else:
590bc6f6 3196 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3197 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3198 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3199 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3200
6a3f4c3f 3201 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3202 if opts_proxy is not None:
3203 if opts_proxy == '':
3204 proxies = {}
3205 else:
3206 proxies = {'http': opts_proxy, 'https': opts_proxy}
3207 else:
3208 proxies = compat_urllib_request.getproxies()
067aa17e 3209 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3210 if 'http' in proxies and 'https' not in proxies:
3211 proxies['https'] = proxies['http']
91410c9b 3212 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3213
3214 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3215 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3216 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3217 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3218 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3219
3220 # When passing our own FileHandler instance, build_opener won't add the
3221 # default FileHandler and allows us to disable the file protocol, which
3222 # can be used for malicious purposes (see
067aa17e 3223 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3224 file_handler = compat_urllib_request.FileHandler()
3225
3226 def file_open(*args, **kwargs):
7a5c1cfe 3227 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3228 file_handler.file_open = file_open
3229
3230 opener = compat_urllib_request.build_opener(
fca6dba8 3231 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3232
dca08720
PH
3233 # Delete the default user-agent header, which would otherwise apply in
3234 # cases where our custom HTTP handler doesn't come into play
067aa17e 3235 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3236 opener.addheaders = []
3237 self._opener = opener
62fec3b2
PH
3238
3239 def encode(self, s):
3240 if isinstance(s, bytes):
3241 return s # Already encoded
3242
3243 try:
3244 return s.encode(self.get_encoding())
3245 except UnicodeEncodeError as err:
3246 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3247 raise
3248
3249 def get_encoding(self):
3250 encoding = self.params.get('encoding')
3251 if encoding is None:
3252 encoding = preferredencoding()
3253 return encoding
ec82d85a 3254
de6000d9 3255 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3256 write_all = self.params.get('write_all_thumbnails', False)
3257 thumbnails = []
3258 if write_all or self.params.get('writethumbnail', False):
0202b52a 3259 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3260 multiple = write_all and len(thumbnails) > 1
ec82d85a 3261
0202b52a 3262 ret = []
6c4fd172 3263 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3264 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3265 suffix = '%s.' % t['id'] if multiple else ''
3266 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3267 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3268
0c3d0f51 3269 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3270 ret.append(suffix + thumb_ext)
8ba87148 3271 t['filepath'] = thumb_filename
ec82d85a
PH
3272 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3273 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3274 else:
5ef7d9bd 3275 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3276 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3277 try:
3278 uf = self.urlopen(t['url'])
d3d89c32 3279 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3280 shutil.copyfileobj(uf, thumbf)
de6000d9 3281 ret.append(suffix + thumb_ext)
ec82d85a
PH
3282 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3283 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3284 t['filepath'] = thumb_filename
3158150c 3285 except network_exceptions as err:
ec82d85a 3286 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3287 (t['url'], error_to_compat_str(err)))
6c4fd172 3288 if ret and not write_all:
3289 break
0202b52a 3290 return ret