]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[utils] Generalize `traverse_dict` to `traverse_obj`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
ce02ed60 51 determine_ext,
b5559424 52 determine_protocol,
732044af 53 DOT_DESKTOP_LINK_TEMPLATE,
54 DOT_URL_LINK_TEMPLATE,
55 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 56 DownloadError,
c0384f22 57 encode_compat_str,
ce02ed60 58 encodeFilename,
498f5606 59 EntryNotInPlaylist,
a06916d9 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
e29663c6 64 float_or_none,
02dbf93f 65 format_bytes,
76d321f6 66 format_field,
752cda38 67 STR_FORMAT_RE,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
56a8fb4f 73 LazyList,
ce02ed60 74 locked_file,
0202b52a 75 make_dir,
dca08720 76 make_HTTPS_handler,
ce02ed60 77 MaxDownloadsReached,
3158150c 78 network_exceptions,
cd6fc19e 79 orderedSet,
a06916d9 80 OUTTMPL_TYPES,
b7ab0590 81 PagedList,
083c9df9 82 parse_filesize,
91410c9b 83 PerRequestProxyHandler,
dca08720 84 platform_name,
eedb7ba5 85 PostProcessingError,
ce02ed60 86 preferredencoding,
eedb7ba5 87 prepend_extension,
a06916d9 88 process_communicate_or_kill,
e8e73840 89 random_uuidv4,
51fb4995 90 register_socks_protocols,
a06916d9 91 RejectedVideoReached,
cfb56d1a 92 render_table,
eedb7ba5 93 replace_extension,
ce02ed60
PH
94 SameFileError,
95 sanitize_filename,
1bb5c511 96 sanitize_path,
dcf77cf1 97 sanitize_url,
67dda517 98 sanitized_Request,
e5660ee6 99 std_headers,
1211bb6d 100 str_or_none,
e29663c6 101 strftime_or_none,
ce02ed60 102 subtitles_filename,
732044af 103 to_high_limit_path,
324ad820 104 traverse_obj,
ce02ed60 105 UnavailableVideoError,
29eb5174 106 url_basename,
58b1f00d 107 version_tuple,
ce02ed60
PH
108 write_json_file,
109 write_string,
1bab3437 110 YoutubeDLCookieJar,
6a3f4c3f 111 YoutubeDLCookieProcessor,
dca08720 112 YoutubeDLHandler,
fca6dba8 113 YoutubeDLRedirectHandler,
ce02ed60 114)
a0e07d31 115from .cache import Cache
52a8a1e1 116from .extractor import (
117 gen_extractor_classes,
118 get_info_extractor,
119 _LAZY_LOADER,
120 _PLUGIN_CLASSES
121)
4c54b89e 122from .extractor.openload import PhantomJSwrapper
52a8a1e1 123from .downloader import (
124 get_suitable_downloader,
125 shorten_protocol_name
126)
4c83c967 127from .downloader.rtmp import rtmpdump_version
4f026faf 128from .postprocessor import (
f17f8651 129 FFmpegFixupM3u8PP,
62cd676c 130 FFmpegFixupM4aPP,
6271f1ca 131 FFmpegFixupStretchedPP,
4f026faf
PH
132 FFmpegMergerPP,
133 FFmpegPostProcessor,
0202b52a 134 # FFmpegSubtitlesConvertorPP,
4f026faf 135 get_postprocessor,
0202b52a 136 MoveFilesAfterDownloadPP,
4f026faf 137)
dca08720 138from .version import __version__
8222d8de 139
e9c0cdd3
YCH
140if compat_os_name == 'nt':
141 import ctypes
142
2459b6e1 143
8222d8de
JMF
144class YoutubeDL(object):
145 """YoutubeDL class.
146
147 YoutubeDL objects are the ones responsible of downloading the
148 actual video file and writing it to disk if the user has requested
149 it, among some other tasks. In most cases there should be one per
150 program. As, given a video URL, the downloader doesn't know how to
151 extract all the needed information, task that InfoExtractors do, it
152 has to pass the URL to one of them.
153
154 For this, YoutubeDL objects have a method that allows
155 InfoExtractors to be registered in a given order. When it is passed
156 a URL, the YoutubeDL object handles it to the first InfoExtractor it
157 finds that reports being able to handle it. The InfoExtractor extracts
158 all the information about the video or videos the URL refers to, and
159 YoutubeDL process the extracted information, possibly using a File
160 Downloader to download the video.
161
162 YoutubeDL objects accept a lot of parameters. In order not to saturate
163 the object constructor with arguments, it receives a dictionary of
164 options instead. These options are available through the params
165 attribute for the InfoExtractors to use. The YoutubeDL also
166 registers itself as the downloader in charge for the InfoExtractors
167 that are added to it, so this is a "mutual registration".
168
169 Available options:
170
171 username: Username for authentication purposes.
172 password: Password for authentication purposes.
180940e0 173 videopassword: Password for accessing a video.
1da50aa3
S
174 ap_mso: Adobe Pass multiple-system operator identifier.
175 ap_username: Multiple-system operator account username.
176 ap_password: Multiple-system operator account password.
8222d8de
JMF
177 usenetrc: Use netrc for authentication instead.
178 verbose: Print additional info to stdout.
179 quiet: Do not print messages to stdout.
ad8915b7 180 no_warnings: Do not print out anything for warnings.
53c18592 181 forceprint: A list of templates to force print
182 forceurl: Force printing final URL. (Deprecated)
183 forcetitle: Force printing title. (Deprecated)
184 forceid: Force printing ID. (Deprecated)
185 forcethumbnail: Force printing thumbnail URL. (Deprecated)
186 forcedescription: Force printing description. (Deprecated)
187 forcefilename: Force printing final filename. (Deprecated)
188 forceduration: Force printing duration. (Deprecated)
8694c600 189 forcejson: Force printing info_dict as JSON.
63e0be34
PH
190 dump_single_json: Force printing the info_dict of the whole playlist
191 (or video) as a single JSON line.
c25228e5 192 force_write_download_archive: Force writing download archive regardless
193 of 'skip_download' or 'simulate'.
8222d8de 194 simulate: Do not download the video files.
eb8a4433 195 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 196 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 197 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
198 extracting metadata even if the video is not actually
199 available for download (experimental)
c25228e5 200 format_sort: How to sort the video formats. see "Sorting Formats"
201 for more details.
202 format_sort_force: Force the given format_sort. see "Sorting Formats"
203 for more details.
204 allow_multiple_video_streams: Allow multiple video streams to be merged
205 into a single file
206 allow_multiple_audio_streams: Allow multiple audio streams to be merged
207 into a single file
4524baf0 208 paths: Dictionary of output paths. The allowed keys are 'home'
209 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 210 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 211 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
212 A string a also accepted for backward compatibility
a820dc72
RA
213 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
214 restrictfilenames: Do not allow "&" and spaces in file names
215 trim_file_name: Limit length of filename (extension excluded)
4524baf0 216 windowsfilenames: Force the filenames to be windows compatible
a820dc72 217 ignoreerrors: Do not stop on download errors
7a5c1cfe 218 (Default True when running yt-dlp,
a820dc72 219 but False when directly accessing YoutubeDL class)
26e2805c 220 skip_playlist_after_errors: Number of allowed failures until the rest of
221 the playlist is skipped
d22dec74 222 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 223 overwrites: Overwrite all video and metadata files if True,
224 overwrite only non-video files if None
225 and don't overwrite any file if False
8222d8de
JMF
226 playliststart: Playlist item to start at.
227 playlistend: Playlist item to end at.
c14e88f0 228 playlist_items: Specific indices of playlist to download.
ff815fe6 229 playlistreverse: Download playlist items in reverse order.
75822ca7 230 playlistrandom: Download playlist items in random order.
8222d8de
JMF
231 matchtitle: Download only matching titles.
232 rejecttitle: Reject downloads for matching titles.
8bf9319e 233 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
234 logtostderr: Log messages to stderr instead of stdout.
235 writedescription: Write the video description to a .description file
236 writeinfojson: Write the video description to a .info.json file
75d43ca0 237 clean_infojson: Remove private fields from the infojson
06167fbb 238 writecomments: Extract video comments. This will not be written to disk
239 unless writeinfojson is also given
1fb07d10 240 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 241 writethumbnail: Write the thumbnail image to a file
c25228e5 242 allow_playlist_files: Whether to write playlists' description, infojson etc
243 also to disk when using the 'write*' options
ec82d85a 244 write_all_thumbnails: Write all thumbnail formats to files
732044af 245 writelink: Write an internet shortcut file, depending on the
246 current platform (.url/.webloc/.desktop)
247 writeurllink: Write a Windows internet shortcut file (.url)
248 writewebloclink: Write a macOS internet shortcut file (.webloc)
249 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 250 writesubtitles: Write the video subtitles to a file
741dd8ea 251 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 252 allsubtitles: Deprecated - Use subtitlelangs = ['all']
253 Downloads all the subtitles of the video
0b7f3118 254 (requires writesubtitles or writeautomaticsub)
8222d8de 255 listsubtitles: Lists all available subtitles for the video
a504ced0 256 subtitlesformat: The format code for subtitles
c32b0aab 257 subtitleslangs: List of languages of the subtitles to download (can be regex).
258 The list may contain "all" to refer to all the available
259 subtitles. The language can be prefixed with a "-" to
260 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
261 keepvideo: Keep the video file after post-processing
262 daterange: A DateRange object, download only if the upload_date is in the range.
263 skip_download: Skip the actual download of the video file
c35f9e72 264 cachedir: Location of the cache files in the filesystem.
a0e07d31 265 False to disable filesystem cache.
47192f92 266 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
267 age_limit: An integer representing the user's age in years.
268 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
269 min_views: An integer representing the minimum view count the video
270 must have in order to not be skipped.
271 Videos without view count information are always
272 downloaded. None for no limit.
273 max_views: An integer representing the maximum view count.
274 Videos that are more popular than that are not
275 downloaded.
276 Videos without view count information are always
277 downloaded. None for no limit.
278 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
279 Videos already present in the file are not downloaded
280 again.
8a51f564 281 break_on_existing: Stop the download process after attempting to download a
282 file that is in the archive.
283 break_on_reject: Stop the download process when encountering a video that
284 has been filtered out.
285 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 286 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
287 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
288 At the moment, this is only supported by YouTube.
a1ee09e8 289 proxy: URL of the proxy server to use
38cce791 290 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 291 on geo-restricted sites.
e344693b 292 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
293 bidi_workaround: Work around buggy terminals without bidirectional text
294 support, using fridibi
a0ddb8a2 295 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 296 include_ads: Download ads as well
04b4d394
PH
297 default_search: Prepend this string if an input url is not valid.
298 'auto' for elaborate guessing
62fec3b2 299 encoding: Use this encoding instead of the system-specified.
e8ee972c 300 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
301 Pass in 'in_playlist' to only show this behavior for
302 playlist items.
4f026faf 303 postprocessors: A list of dictionaries, each with an entry
71b640cc 304 * key: The name of the postprocessor. See
7a5c1cfe 305 yt_dlp/postprocessor/__init__.py for a list.
56d868db 306 * when: When to run the postprocessor. Can be one of
307 pre_process|before_dl|post_process|after_move.
308 Assumed to be 'post_process' if not given
ab8e5e51
AM
309 post_hooks: A list of functions that get called as the final step
310 for each video file, after all postprocessors have been
311 called. The filename will be passed as the only argument.
71b640cc
PH
312 progress_hooks: A list of functions that get called on download
313 progress, with a dictionary with the entries
5cda4eda 314 * status: One of "downloading", "error", or "finished".
ee69b99a 315 Check this first and ignore unknown values.
71b640cc 316
5cda4eda 317 If status is one of "downloading", or "finished", the
ee69b99a
PH
318 following properties may also be present:
319 * filename: The final filename (always present)
5cda4eda 320 * tmpfilename: The filename we're currently writing to
71b640cc
PH
321 * downloaded_bytes: Bytes on disk
322 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
323 * total_bytes_estimate: Guess of the eventual file size,
324 None if unavailable.
325 * elapsed: The number of seconds since download started.
71b640cc
PH
326 * eta: The estimated time in seconds, None if unknown
327 * speed: The download speed in bytes/second, None if
328 unknown
5cda4eda
PH
329 * fragment_index: The counter of the currently
330 downloaded video fragment.
331 * fragment_count: The number of fragments (= individual
332 files that will be merged)
71b640cc
PH
333
334 Progress hooks are guaranteed to be called at least once
335 (with status "finished") if the download is successful.
45598f15 336 merge_output_format: Extension to use when merging formats.
6b591b29 337 final_ext: Expected final extension; used to detect when the file was
338 already downloaded and converted. "merge_output_format" is
339 replaced by this extension when given
6271f1ca
PH
340 fixup: Automatically correct known faults of the file.
341 One of:
342 - "never": do nothing
343 - "warn": only emit a warning
344 - "detect_or_warn": check whether we can do anything
62cd676c 345 about it, warn otherwise (default)
504f20dd 346 source_address: Client-side IP address to bind to.
6ec6cb4e 347 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 348 yt-dlp servers for debugging. (BROKEN)
1cf376f5 349 sleep_interval_requests: Number of seconds to sleep between requests
350 during extraction
7aa589a5
S
351 sleep_interval: Number of seconds to sleep before each download when
352 used alone or a lower bound of a range for randomized
353 sleep before each download (minimum possible number
354 of seconds to sleep) when used along with
355 max_sleep_interval.
356 max_sleep_interval:Upper bound of a range for randomized sleep before each
357 download (maximum possible number of seconds to sleep).
358 Must only be used along with sleep_interval.
359 Actual sleep time will be a random float from range
360 [sleep_interval; max_sleep_interval].
1cf376f5 361 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
362 listformats: Print an overview of available video formats and exit.
363 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
364 match_filter: A function that gets called with the info_dict of
365 every video.
366 If it returns a message, the video is ignored.
367 If it returns None, the video is downloaded.
368 match_filter_func in utils.py is one example for this.
7e5db8c9 369 no_color: Do not emit color codes in output.
0a840f58 370 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 371 HTTP header
0a840f58 372 geo_bypass_country:
773f291d
S
373 Two-letter ISO 3166-2 country code that will be used for
374 explicit geographic restriction bypassing via faking
504f20dd 375 X-Forwarded-For HTTP header
5f95927a
S
376 geo_bypass_ip_block:
377 IP range in CIDR notation that will be used similarly to
504f20dd 378 geo_bypass_country
71b640cc 379
85729c51 380 The following options determine which downloader is picked:
52a8a1e1 381 external_downloader: A dictionary of protocol keys and the executable of the
382 external downloader to use for it. The allowed protocols
383 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
384 Set the value to 'native' to use the native downloader
385 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
386 or {'m3u8': 'ffmpeg'} instead.
387 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
388 if True, otherwise use ffmpeg/avconv if False, otherwise
389 use downloader suggested by extractor if None.
53ed7066 390 compat_opts: Compatibility options. See "Differences in default behavior".
18e674b4 391 Note that only format-sort, format-spec, no-live-chat,
392 no-attach-info-json, playlist-index, list-formats,
393 no-direct-merge, no-youtube-channel-redirect,
53ed7066 394 and no-youtube-unavailable-videos works when used via the API
fe7e0c98 395
8222d8de 396 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 397 the downloader (see yt_dlp/downloader/common.py):
8222d8de 398 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 399 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 400 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 401 http_chunk_size.
76b1bd67
JMF
402
403 The following options are used by the post processors:
d4a24f40 404 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 405 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
406 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
407 to the binary or its containing directory.
43820c03 408 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
409 and a list of additional command-line arguments for the
410 postprocessor/executable. The dict can also have "PP+EXE" keys
411 which are used when the given exe is used by the given PP.
412 Use 'default' as the name for arguments to passed to all PP
e409895f 413
414 The following options are used by the extractors:
62bff2c1 415 extractor_retries: Number of times to retry for known errors
416 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 417 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 418 discontinuities such as ad breaks (default: False)
3600fd59 419 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 420 data will be downloaded and processed by extractor.
421 You can reduce network I/O by disabling it if you don't
422 care about DASH. (only for youtube)
e409895f 423 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about HLS. (only for youtube)
8222d8de
JMF
427 """
428
c9969434
S
429 _NUMERIC_FIELDS = set((
430 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
431 'timestamp', 'upload_year', 'upload_month', 'upload_day',
432 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
433 'average_rating', 'comment_count', 'age_limit',
434 'start_time', 'end_time',
435 'chapter_number', 'season_number', 'episode_number',
436 'track_number', 'disc_number', 'release_year',
437 'playlist_index',
438 ))
439
8222d8de
JMF
440 params = None
441 _ies = []
56d868db 442 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 443 __prepare_filename_warned = False
1cf376f5 444 _first_webpage_request = True
8222d8de
JMF
445 _download_retcode = None
446 _num_downloads = None
30a074c2 447 _playlist_level = 0
448 _playlist_urls = set()
8222d8de
JMF
449 _screen_file = None
450
3511266b 451 def __init__(self, params=None, auto_init=True):
8222d8de 452 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
453 if params is None:
454 params = {}
8222d8de 455 self._ies = []
56c73665 456 self._ies_instances = {}
56d868db 457 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 458 self.__prepare_filename_warned = False
1cf376f5 459 self._first_webpage_request = True
ab8e5e51 460 self._post_hooks = []
933605d7 461 self._progress_hooks = []
8222d8de
JMF
462 self._download_retcode = 0
463 self._num_downloads = 0
464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 465 self._err_file = sys.stderr
4abf617b
S
466 self.params = {
467 # Default parameters
468 'nocheckcertificate': False,
469 }
470 self.params.update(params)
a0e07d31 471 self.cache = Cache(self)
34308b30 472
a61f4b28 473 if sys.version_info < (3, 6):
474 self.report_warning(
475 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
476 'Update to Python 3.6 or above' % sys.version_info[:2])
477
be5df5ee
S
478 def check_deprecated(param, option, suggestion):
479 if self.params.get(param) is not None:
53ed7066 480 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
481 return True
482 return False
483
484 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
485 if self.params.get('geo_verification_proxy') is None:
486 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
487
0d1bb027 488 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
489 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 490 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 491
492 for msg in self.params.get('warnings', []):
493 self.report_warning(msg)
494
6b591b29 495 if self.params.get('final_ext'):
496 if self.params.get('merge_output_format'):
497 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
498 self.params['merge_output_format'] = self.params['final_ext']
499
b9d973be 500 if 'overwrites' in self.params and self.params['overwrites'] is None:
501 del self.params['overwrites']
502
0783b09b 503 if params.get('bidi_workaround', False):
1c088fa8
PH
504 try:
505 import pty
506 master, slave = pty.openpty()
003c69a8 507 width = compat_get_terminal_size().columns
1c088fa8
PH
508 if width is None:
509 width_args = []
510 else:
511 width_args = ['-w', str(width)]
5d681e96 512 sp_kwargs = dict(
1c088fa8
PH
513 stdin=subprocess.PIPE,
514 stdout=slave,
515 stderr=self._err_file)
5d681e96
PH
516 try:
517 self._output_process = subprocess.Popen(
518 ['bidiv'] + width_args, **sp_kwargs
519 )
520 except OSError:
5d681e96
PH
521 self._output_process = subprocess.Popen(
522 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
523 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 524 except OSError as ose:
66e7ace1 525 if ose.errno == errno.ENOENT:
6febd1c1 526 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
527 else:
528 raise
0783b09b 529
3089bc74
S
530 if (sys.platform != 'win32'
531 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
532 and not params.get('restrictfilenames', False)):
e9137224 533 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 534 self.report_warning(
6febd1c1 535 'Assuming --restrict-filenames since file system encoding '
1b725173 536 'cannot encode all characters. '
6febd1c1 537 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 538 self.params['restrictfilenames'] = True
34308b30 539
de6000d9 540 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 541
dca08720
PH
542 self._setup_opener()
543
4cd0a709 544 """Preload the archive, if any is specified"""
545 def preload_download_archive(fn):
546 if fn is None:
547 return False
0760b0a7 548 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 549 try:
550 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
551 for line in archive_file:
552 self.archive.add(line.strip())
553 except IOError as ioe:
554 if ioe.errno != errno.ENOENT:
555 raise
556 return False
557 return True
558
559 self.archive = set()
560 preload_download_archive(self.params.get('download_archive'))
561
3511266b
PH
562 if auto_init:
563 self.print_debug_header()
564 self.add_default_info_extractors()
565
4f026faf
PH
566 for pp_def_raw in self.params.get('postprocessors', []):
567 pp_class = get_postprocessor(pp_def_raw['key'])
568 pp_def = dict(pp_def_raw)
569 del pp_def['key']
5bfa4862 570 if 'when' in pp_def:
571 when = pp_def['when']
572 del pp_def['when']
573 else:
56d868db 574 when = 'post_process'
4f026faf 575 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 576 self.add_post_processor(pp, when=when)
4f026faf 577
ab8e5e51
AM
578 for ph in self.params.get('post_hooks', []):
579 self.add_post_hook(ph)
580
71b640cc
PH
581 for ph in self.params.get('progress_hooks', []):
582 self.add_progress_hook(ph)
583
51fb4995
YCH
584 register_socks_protocols()
585
7d4111ed
PH
586 def warn_if_short_id(self, argv):
587 # short YouTube ID starting with dash?
588 idxs = [
589 i for i, a in enumerate(argv)
590 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
591 if idxs:
592 correct_argv = (
7a5c1cfe 593 ['yt-dlp']
3089bc74
S
594 + [a for i, a in enumerate(argv) if i not in idxs]
595 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
596 )
597 self.report_warning(
598 'Long argument string detected. '
599 'Use -- to separate parameters and URLs, like this:\n%s\n' %
600 args_to_str(correct_argv))
601
8222d8de
JMF
602 def add_info_extractor(self, ie):
603 """Add an InfoExtractor object to the end of the list."""
604 self._ies.append(ie)
e52d7f85
JMF
605 if not isinstance(ie, type):
606 self._ies_instances[ie.ie_key()] = ie
607 ie.set_downloader(self)
8222d8de 608
56c73665
JMF
609 def get_info_extractor(self, ie_key):
610 """
611 Get an instance of an IE with name ie_key, it will try to get one from
612 the _ies list, if there's no instance it will create a new one and add
613 it to the extractor list.
614 """
615 ie = self._ies_instances.get(ie_key)
616 if ie is None:
617 ie = get_info_extractor(ie_key)()
618 self.add_info_extractor(ie)
619 return ie
620
023fa8c4
JMF
621 def add_default_info_extractors(self):
622 """
623 Add the InfoExtractors returned by gen_extractors to the end of the list
624 """
e52d7f85 625 for ie in gen_extractor_classes():
023fa8c4
JMF
626 self.add_info_extractor(ie)
627
56d868db 628 def add_post_processor(self, pp, when='post_process'):
8222d8de 629 """Add a PostProcessor object to the end of the chain."""
5bfa4862 630 self._pps[when].append(pp)
8222d8de
JMF
631 pp.set_downloader(self)
632
ab8e5e51
AM
633 def add_post_hook(self, ph):
634 """Add the post hook"""
635 self._post_hooks.append(ph)
636
933605d7
JMF
637 def add_progress_hook(self, ph):
638 """Add the progress hook (currently only for the file downloader)"""
639 self._progress_hooks.append(ph)
8ab470f1 640
1c088fa8 641 def _bidi_workaround(self, message):
5d681e96 642 if not hasattr(self, '_output_channel'):
1c088fa8
PH
643 return message
644
5d681e96 645 assert hasattr(self, '_output_process')
11b85ce6 646 assert isinstance(message, compat_str)
6febd1c1
PH
647 line_count = message.count('\n') + 1
648 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 649 self._output_process.stdin.flush()
6febd1c1 650 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 651 for _ in range(line_count))
6febd1c1 652 return res[:-len('\n')]
1c088fa8 653
734f90bb 654 def _write_string(self, s, out=None):
b58ddb32 655 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 656
848887eb 657 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 658 """Print message to stdout"""
8bf9319e 659 if self.params.get('logger'):
43afe285 660 self.params['logger'].debug(message)
835a1478 661 elif not quiet or self.params.get('verbose'):
662 self._write_string(
663 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
664 self._err_file if quiet else self._screen_file)
8222d8de
JMF
665
666 def to_stderr(self, message):
0760b0a7 667 """Print message to stderr"""
11b85ce6 668 assert isinstance(message, compat_str)
8bf9319e 669 if self.params.get('logger'):
43afe285
IB
670 self.params['logger'].error(message)
671 else:
835a1478 672 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
8222d8de 673
1e5b9a95
PH
674 def to_console_title(self, message):
675 if not self.params.get('consoletitle', False):
676 return
4bede0d8
C
677 if compat_os_name == 'nt':
678 if ctypes.windll.kernel32.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 682 elif 'TERM' in os.environ:
b46696bd 683 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 684
bdde425c
PH
685 def save_console_title(self):
686 if not self.params.get('consoletitle', False):
687 return
94c3442e
S
688 if self.params.get('simulate', False):
689 return
4bede0d8 690 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 691 # Save the title on stack
734f90bb 692 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
693
694 def restore_console_title(self):
695 if not self.params.get('consoletitle', False):
696 return
94c3442e
S
697 if self.params.get('simulate', False):
698 return
4bede0d8 699 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 700 # Restore the title from stack
734f90bb 701 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
702
703 def __enter__(self):
704 self.save_console_title()
705 return self
706
707 def __exit__(self, *args):
708 self.restore_console_title()
f89197d7 709
dca08720 710 if self.params.get('cookiefile') is not None:
1bab3437 711 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 712
8222d8de
JMF
713 def trouble(self, message=None, tb=None):
714 """Determine action to take when a download problem appears.
715
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
719
720 tb, if given, is additional traceback information.
721 """
722 if message is not None:
723 self.to_stderr(message)
724 if self.params.get('verbose'):
725 if tb is None:
726 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 727 tb = ''
8222d8de 728 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 729 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 730 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
731 else:
732 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 733 tb = ''.join(tb_data)
c19bc311 734 if tb:
735 self.to_stderr(tb)
8222d8de
JMF
736 if not self.params.get('ignoreerrors', False):
737 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
738 exc_info = sys.exc_info()[1].exc_info
739 else:
740 exc_info = sys.exc_info()
741 raise DownloadError(message, exc_info)
742 self._download_retcode = 1
743
0760b0a7 744 def to_screen(self, message, skip_eol=False):
745 """Print message to stdout if not in quiet mode"""
746 self.to_stdout(
747 message, skip_eol, quiet=self.params.get('quiet', False))
748
8222d8de
JMF
749 def report_warning(self, message):
750 '''
751 Print the message to stderr, it will be prefixed with 'WARNING:'
752 If stderr is a tty file the 'WARNING:' will be colored
753 '''
6d07ce01
JMF
754 if self.params.get('logger') is not None:
755 self.params['logger'].warning(message)
8222d8de 756 else:
ad8915b7
PH
757 if self.params.get('no_warnings'):
758 return
e9c0cdd3 759 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
760 _msg_header = '\033[0;33mWARNING:\033[0m'
761 else:
762 _msg_header = 'WARNING:'
763 warning_message = '%s %s' % (_msg_header, message)
764 self.to_stderr(warning_message)
8222d8de
JMF
765
766 def report_error(self, message, tb=None):
767 '''
768 Do the same as trouble, but prefixes the message with 'ERROR:', colored
769 in red if stderr is a tty file.
770 '''
e9c0cdd3 771 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 772 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 773 else:
6febd1c1
PH
774 _msg_header = 'ERROR:'
775 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
776 self.trouble(error_message, tb)
777
0760b0a7 778 def write_debug(self, message):
779 '''Log debug message or Print message to stderr'''
780 if not self.params.get('verbose', False):
781 return
782 message = '[debug] %s' % message
783 if self.params.get('logger'):
784 self.params['logger'].debug(message)
785 else:
786 self._write_string('%s\n' % message)
787
8222d8de
JMF
788 def report_file_already_downloaded(self, file_name):
789 """Report file has already been fully downloaded."""
790 try:
6febd1c1 791 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 792 except UnicodeEncodeError:
6febd1c1 793 self.to_screen('[download] The file has already been downloaded')
8222d8de 794
0c3d0f51 795 def report_file_delete(self, file_name):
796 """Report that existing file will be deleted."""
797 try:
c25228e5 798 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 799 except UnicodeEncodeError:
c25228e5 800 self.to_screen('Deleting existing file')
0c3d0f51 801
de6000d9 802 def parse_outtmpl(self):
803 outtmpl_dict = self.params.get('outtmpl', {})
804 if not isinstance(outtmpl_dict, dict):
805 outtmpl_dict = {'default': outtmpl_dict}
806 outtmpl_dict.update({
807 k: v for k, v in DEFAULT_OUTTMPL.items()
808 if not outtmpl_dict.get(k)})
809 for key, val in outtmpl_dict.items():
810 if isinstance(val, bytes):
811 self.report_warning(
812 'Parameter outtmpl is bytes, but should be a unicode string. '
813 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
814 return outtmpl_dict
815
143db31d 816 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
817 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
752cda38 818 info_dict = dict(info_dict)
a439a3a4 819 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 820
752cda38 821 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 822 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 823 if info_dict.get('duration', None) is not None
824 else None)
752cda38 825 info_dict['epoch'] = int(time.time())
826 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
827 if info_dict.get('resolution') is None:
828 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 829
143db31d 830 # For fields playlist_index and autonumber convert all occurrences
831 # of %(field)s to %(field)0Nd for backward compatibility
832 field_size_compat_map = {
752cda38 833 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
834 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 835 }
752cda38 836
837 EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
e625be0d 838 # Field is of the form key1.key2...
839 # where keys (except first) can be string, int or slice
840 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
841 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
842 (?P<negate>-)?
843 (?P<fields>{0})
844 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
845 (?:>(?P<strf_format>.+?))?
846 (?:\|(?P<default>.*?))?
847 $'''.format(FIELD_RE))
848 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
849 MATH_FUNCTIONS = {
850 '+': float.__add__,
851 '-': float.__sub__,
852 }
752cda38 853 tmpl_dict = {}
854
855 def get_value(mdict):
856 # Object traversal
857 fields = mdict['fields'].split('.')
324ad820 858 value = traverse_obj(info_dict, fields)
752cda38 859 # Negative
860 if mdict['negate']:
861 value = float_or_none(value)
862 if value is not None:
863 value *= -1
864 # Do maths
865 if mdict['maths']:
866 value = float_or_none(value)
867 operator = None
868 for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
869 if item == '' or value is None:
870 return None
871 if operator:
872 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
873 offset = float_or_none(item)
874 if offset is None:
324ad820 875 offset = float_or_none(traverse_obj(info_dict, item.split('.')))
752cda38 876 try:
877 value = operator(value, multiplier * offset)
878 except (TypeError, ZeroDivisionError):
879 return None
880 operator = None
881 else:
882 operator = MATH_FUNCTIONS[item]
883 # Datetime formatting
884 if mdict['strf_format']:
885 value = strftime_or_none(value, mdict['strf_format'])
886
887 return value
888
889 def create_key(outer_mobj):
890 if not outer_mobj.group('has_key'):
891 return '%{}'.format(outer_mobj.group(0))
892
893 key = outer_mobj.group('key')
894 fmt = outer_mobj.group('format')
895 mobj = re.match(INTERNAL_FORMAT_RE, key)
896 if mobj is None:
897 value, default = None, na
898 else:
e625be0d 899 mobj = mobj.groupdict()
752cda38 900 default = mobj['default'] if mobj['default'] is not None else na
901 value = get_value(mobj)
902
903 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
904 fmt = '0{:d}d'.format(field_size_compat_map[key])
905
906 value = default if value is None else value
907 key += '\0%s' % fmt
908
909 if fmt[-1] not in 'crs': # numeric
a439a3a4 910 value = float_or_none(value)
752cda38 911 if value is None:
912 value, fmt = default, 's'
913 if sanitize:
914 if fmt[-1] == 'r':
915 # If value is an object, sanitize might convert it to a string
916 # So we convert it to repr first
917 value, fmt = repr(value), '%ss' % fmt[:-1]
87ea7dfc 918 value = sanitize(key, value)
752cda38 919 tmpl_dict[key] = value
920 return '%({key}){fmt}'.format(key=key, fmt=fmt)
921
922 return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
143db31d 923
de6000d9 924 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 925 try:
586a91b6 926 sanitize = lambda k, v: sanitize_filename(
45598aab 927 compat_str(v),
1bb5c511 928 restricted=self.params.get('restrictfilenames'),
40df485f 929 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 930 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 931 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 932
15da37c7
S
933 # expand_path translates '%%' into '%' and '$$' into '$'
934 # correspondingly that is not what we want since we need to keep
935 # '%%' intact for template dict substitution step. Working around
936 # with boundary-alike separator hack.
961ea474 937 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
938 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
939
940 # outtmpl should be expand_path'ed before template dict substitution
941 # because meta fields may contain env variables we don't want to
942 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
943 # title "Hello $PATH", we don't want `$PATH` to be expanded.
944 filename = expand_path(outtmpl).replace(sep, '') % template_dict
945
143db31d 946 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 947 if force_ext is not None:
752cda38 948 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 949
bdc3fd2f
U
950 # https://github.com/blackjack4494/youtube-dlc/issues/85
951 trim_file_name = self.params.get('trim_file_name', False)
952 if trim_file_name:
953 fn_groups = filename.rsplit('.')
954 ext = fn_groups[-1]
955 sub_ext = ''
956 if len(fn_groups) > 2:
957 sub_ext = fn_groups[-2]
958 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
959
0202b52a 960 return filename
8222d8de 961 except ValueError as err:
6febd1c1 962 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
963 return None
964
de6000d9 965 def prepare_filename(self, info_dict, dir_type='', warn=False):
966 """Generate the output filename."""
0202b52a 967 paths = self.params.get('paths', {})
968 assert isinstance(paths, dict)
de6000d9 969 filename = self._prepare_filename(info_dict, dir_type or 'default')
970
971 if warn and not self.__prepare_filename_warned:
972 if not paths:
973 pass
974 elif filename == '-':
975 self.report_warning('--paths is ignored when an outputting to stdout')
976 elif os.path.isabs(filename):
977 self.report_warning('--paths is ignored since an absolute path is given in output template')
978 self.__prepare_filename_warned = True
979 if filename == '-' or not filename:
980 return filename
981
0202b52a 982 homepath = expand_path(paths.get('home', '').strip())
983 assert isinstance(homepath, compat_str)
984 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
985 assert isinstance(subdir, compat_str)
c2934512 986 path = os.path.join(homepath, subdir, filename)
987
988 # Temporary fix for #4787
989 # 'Treat' all problem characters by passing filename through preferredencoding
990 # to workaround encoding issues with subprocess on python2 @ Windows
991 if sys.version_info < (3, 0) and sys.platform == 'win32':
992 path = encodeFilename(path, True).decode(preferredencoding())
993 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 994
120fe513 995 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 996 """ Returns None if the file should be downloaded """
8222d8de 997
c77495e3 998 video_title = info_dict.get('title', info_dict.get('id', 'video'))
999
8b0d7497 1000 def check_filter():
8b0d7497 1001 if 'title' in info_dict:
1002 # This can happen when we're just evaluating the playlist
1003 title = info_dict['title']
1004 matchtitle = self.params.get('matchtitle', False)
1005 if matchtitle:
1006 if not re.search(matchtitle, title, re.IGNORECASE):
1007 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1008 rejecttitle = self.params.get('rejecttitle', False)
1009 if rejecttitle:
1010 if re.search(rejecttitle, title, re.IGNORECASE):
1011 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1012 date = info_dict.get('upload_date')
1013 if date is not None:
1014 dateRange = self.params.get('daterange', DateRange())
1015 if date not in dateRange:
1016 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1017 view_count = info_dict.get('view_count')
1018 if view_count is not None:
1019 min_views = self.params.get('min_views')
1020 if min_views is not None and view_count < min_views:
1021 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1022 max_views = self.params.get('max_views')
1023 if max_views is not None and view_count > max_views:
1024 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1025 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1026 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1027
1028 if not incomplete:
1029 match_filter = self.params.get('match_filter')
1030 if match_filter is not None:
1031 ret = match_filter(info_dict)
1032 if ret is not None:
1033 return ret
1034 return None
1035
c77495e3 1036 if self.in_download_archive(info_dict):
1037 reason = '%s has already been recorded in the archive' % video_title
1038 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1039 else:
1040 reason = check_filter()
1041 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1042 if reason is not None:
120fe513 1043 if not silent:
1044 self.to_screen('[download] ' + reason)
c77495e3 1045 if self.params.get(break_opt, False):
1046 raise break_err()
8b0d7497 1047 return reason
fe7e0c98 1048
b6c45014
JMF
1049 @staticmethod
1050 def add_extra_info(info_dict, extra_info):
1051 '''Set the keys from extra_info in info dict if they are missing'''
1052 for key, value in extra_info.items():
1053 info_dict.setdefault(key, value)
1054
58f197b7 1055 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1056 process=True, force_generic_extractor=False):
41d1cca3 1057 """
1058 Return a list with a dictionary for each video extracted.
1059
1060 Arguments:
1061 url -- URL to extract
1062
1063 Keyword arguments:
1064 download -- whether to download videos during extraction
1065 ie_key -- extractor key hint
1066 extra_info -- dictionary containing the extra values to add to each result
1067 process -- whether to resolve all unresolved references (URLs, playlist items),
1068 must be True for download to work.
1069 force_generic_extractor -- force using the generic extractor
1070 """
fe7e0c98 1071
61aa5ba3 1072 if not ie_key and force_generic_extractor:
d22dec74
S
1073 ie_key = 'Generic'
1074
8222d8de 1075 if ie_key:
56c73665 1076 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1077 else:
1078 ies = self._ies
1079
1080 for ie in ies:
1081 if not ie.suitable(url):
1082 continue
1083
9a68de12 1084 ie_key = ie.ie_key()
1085 ie = self.get_info_extractor(ie_key)
8222d8de 1086 if not ie.working():
6febd1c1
PH
1087 self.report_warning('The program functionality for this site has been marked as broken, '
1088 'and will probably not work.')
8222d8de
JMF
1089
1090 try:
d0757229 1091 temp_id = str_or_none(
63be1aab 1092 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1093 else ie._match_id(url))
a0566bbf 1094 except (AssertionError, IndexError, AttributeError):
1095 temp_id = None
1096 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1097 self.to_screen("[%s] %s: has already been recorded in archive" % (
1098 ie_key, temp_id))
1099 break
58f197b7 1100 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1101 else:
1102 self.report_error('no suitable InfoExtractor for URL %s' % url)
1103
1104 def __handle_extraction_exceptions(func):
1105 def wrapper(self, *args, **kwargs):
1106 try:
1107 return func(self, *args, **kwargs)
773f291d
S
1108 except GeoRestrictedError as e:
1109 msg = e.msg
1110 if e.countries:
1111 msg += '\nThis video is available in %s.' % ', '.join(
1112 map(ISO3166Utils.short2full, e.countries))
1113 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1114 self.report_error(msg)
fb043a6e 1115 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1116 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1117 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1118 raise
8222d8de
JMF
1119 except Exception as e:
1120 if self.params.get('ignoreerrors', False):
9b9c5355 1121 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1122 else:
1123 raise
a0566bbf 1124 return wrapper
1125
1126 @__handle_extraction_exceptions
58f197b7 1127 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1128 ie_result = ie.extract(url)
1129 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1130 return
1131 if isinstance(ie_result, list):
1132 # Backwards compatibility: old IE result format
1133 ie_result = {
1134 '_type': 'compat_list',
1135 'entries': ie_result,
1136 }
a0566bbf 1137 self.add_default_extra_info(ie_result, ie, url)
1138 if process:
1139 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1140 else:
a0566bbf 1141 return ie_result
fe7e0c98 1142
ea38e55f
PH
1143 def add_default_extra_info(self, ie_result, ie, url):
1144 self.add_extra_info(ie_result, {
1145 'extractor': ie.IE_NAME,
1146 'webpage_url': url,
bd99f6e6 1147 'original_url': url,
ea38e55f
PH
1148 'webpage_url_basename': url_basename(url),
1149 'extractor_key': ie.ie_key(),
1150 })
1151
8222d8de
JMF
1152 def process_ie_result(self, ie_result, download=True, extra_info={}):
1153 """
1154 Take the result of the ie(may be modified) and resolve all unresolved
1155 references (URLs, playlist items).
1156
1157 It will also download the videos if 'download'.
1158 Returns the resolved ie_result.
1159 """
e8ee972c
PH
1160 result_type = ie_result.get('_type', 'video')
1161
057a5206 1162 if result_type in ('url', 'url_transparent'):
134c6ea8 1163 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1164 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1165 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1166 or extract_flat is True):
ecb54191 1167 info_copy = ie_result.copy()
1168 self.add_extra_info(info_copy, extra_info)
1169 self.add_default_extra_info(
1170 info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1171 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1172 return ie_result
1173
8222d8de 1174 if result_type == 'video':
b6c45014 1175 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1176 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1177 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1178 if additional_urls:
1179 # TODO: Improve MetadataFromFieldPP to allow setting a list
1180 if isinstance(additional_urls, compat_str):
1181 additional_urls = [additional_urls]
1182 self.to_screen(
1183 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1184 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1185 ie_result['additional_entries'] = [
1186 self.extract_info(
1187 url, download, extra_info,
1188 force_generic_extractor=self.params.get('force_generic_extractor'))
1189 for url in additional_urls
1190 ]
1191 return ie_result
8222d8de
JMF
1192 elif result_type == 'url':
1193 # We have to add extra_info to the results because it may be
1194 # contained in a playlist
07cce701 1195 return self.extract_info(
1196 ie_result['url'], download,
1197 ie_key=ie_result.get('ie_key'),
1198 extra_info=extra_info)
7fc3fa05
PH
1199 elif result_type == 'url_transparent':
1200 # Use the information from the embedding page
1201 info = self.extract_info(
1202 ie_result['url'], ie_key=ie_result.get('ie_key'),
1203 extra_info=extra_info, download=False, process=False)
1204
1640eb09
S
1205 # extract_info may return None when ignoreerrors is enabled and
1206 # extraction failed with an error, don't crash and return early
1207 # in this case
1208 if not info:
1209 return info
1210
412c617d
PH
1211 force_properties = dict(
1212 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1213 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1214 if f in force_properties:
1215 del force_properties[f]
1216 new_result = info.copy()
1217 new_result.update(force_properties)
7fc3fa05 1218
0563f7ac
S
1219 # Extracted info may not be a video result (i.e.
1220 # info.get('_type', 'video') != video) but rather an url or
1221 # url_transparent. In such cases outer metadata (from ie_result)
1222 # should be propagated to inner one (info). For this to happen
1223 # _type of info should be overridden with url_transparent. This
067aa17e 1224 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1225 if new_result.get('_type') == 'url':
1226 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1227
1228 return self.process_ie_result(
1229 new_result, download=download, extra_info=extra_info)
40fcba5e 1230 elif result_type in ('playlist', 'multi_video'):
30a074c2 1231 # Protect from infinite recursion due to recursively nested playlists
1232 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1233 webpage_url = ie_result['webpage_url']
1234 if webpage_url in self._playlist_urls:
7e85e872 1235 self.to_screen(
30a074c2 1236 '[download] Skipping already downloaded playlist: %s'
1237 % ie_result.get('title') or ie_result.get('id'))
1238 return
7e85e872 1239
30a074c2 1240 self._playlist_level += 1
1241 self._playlist_urls.add(webpage_url)
bc516a3f 1242 self._sanitize_thumbnails(ie_result)
30a074c2 1243 try:
1244 return self.__process_playlist(ie_result, download)
1245 finally:
1246 self._playlist_level -= 1
1247 if not self._playlist_level:
1248 self._playlist_urls.clear()
8222d8de 1249 elif result_type == 'compat_list':
c9bf4114
PH
1250 self.report_warning(
1251 'Extractor %s returned a compat_list result. '
1252 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1253
8222d8de 1254 def _fixup(r):
9e1a5b84
JW
1255 self.add_extra_info(
1256 r,
9103bbc5
JMF
1257 {
1258 'extractor': ie_result['extractor'],
1259 'webpage_url': ie_result['webpage_url'],
29eb5174 1260 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1261 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1262 }
1263 )
8222d8de
JMF
1264 return r
1265 ie_result['entries'] = [
b6c45014 1266 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1267 for r in ie_result['entries']
1268 ]
1269 return ie_result
1270 else:
1271 raise Exception('Invalid result type: %s' % result_type)
1272
e92caff5 1273 def _ensure_dir_exists(self, path):
1274 return make_dir(path, self.report_error)
1275
30a074c2 1276 def __process_playlist(self, ie_result, download):
1277 # We process each entry in the playlist
1278 playlist = ie_result.get('title') or ie_result.get('id')
1279 self.to_screen('[download] Downloading playlist: %s' % playlist)
1280
498f5606 1281 if 'entries' not in ie_result:
1282 raise EntryNotInPlaylist()
1283 incomplete_entries = bool(ie_result.get('requested_entries'))
1284 if incomplete_entries:
1285 def fill_missing_entries(entries, indexes):
1286 ret = [None] * max(*indexes)
1287 for i, entry in zip(indexes, entries):
1288 ret[i - 1] = entry
1289 return ret
1290 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1291
30a074c2 1292 playlist_results = []
1293
56a8fb4f 1294 playliststart = self.params.get('playliststart', 1)
30a074c2 1295 playlistend = self.params.get('playlistend')
1296 # For backwards compatibility, interpret -1 as whole list
1297 if playlistend == -1:
1298 playlistend = None
1299
1300 playlistitems_str = self.params.get('playlist_items')
1301 playlistitems = None
1302 if playlistitems_str is not None:
1303 def iter_playlistitems(format):
1304 for string_segment in format.split(','):
1305 if '-' in string_segment:
1306 start, end = string_segment.split('-')
1307 for item in range(int(start), int(end) + 1):
1308 yield int(item)
1309 else:
1310 yield int(string_segment)
1311 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1312
1313 ie_entries = ie_result['entries']
56a8fb4f 1314 msg = (
1315 'Downloading %d videos' if not isinstance(ie_entries, list)
1316 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1317 if not isinstance(ie_entries, (list, PagedList)):
1318 ie_entries = LazyList(ie_entries)
1319
1320 entries = []
1321 for i in playlistitems or itertools.count(playliststart):
1322 if playlistitems is None and playlistend is not None and playlistend < i:
1323 break
1324 entry = None
1325 try:
1326 entry = ie_entries[i - 1]
1327 if entry is None:
498f5606 1328 raise EntryNotInPlaylist()
56a8fb4f 1329 except (IndexError, EntryNotInPlaylist):
1330 if incomplete_entries:
1331 raise EntryNotInPlaylist()
1332 elif not playlistitems:
1333 break
1334 entries.append(entry)
120fe513 1335 try:
1336 if entry is not None:
1337 self._match_entry(entry, incomplete=True, silent=True)
1338 except (ExistingVideoReached, RejectedVideoReached):
1339 break
56a8fb4f 1340 ie_result['entries'] = entries
30a074c2 1341
56a8fb4f 1342 # Save playlist_index before re-ordering
1343 entries = [
1344 ((playlistitems[i - 1] if playlistitems else i), entry)
1345 for i, entry in enumerate(entries, 1)
1346 if entry is not None]
1347 n_entries = len(entries)
498f5606 1348
498f5606 1349 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1350 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1351 ie_result['requested_entries'] = playlistitems
1352
1353 if self.params.get('allow_playlist_files', True):
1354 ie_copy = {
1355 'playlist': playlist,
1356 'playlist_id': ie_result.get('id'),
1357 'playlist_title': ie_result.get('title'),
1358 'playlist_uploader': ie_result.get('uploader'),
1359 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1360 'playlist_index': 0,
498f5606 1361 }
1362 ie_copy.update(dict(ie_result))
1363
1364 if self.params.get('writeinfojson', False):
1365 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1366 if not self._ensure_dir_exists(encodeFilename(infofn)):
1367 return
1368 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1369 self.to_screen('[info] Playlist metadata is already present')
1370 else:
1371 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1372 try:
1373 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1374 except (OSError, IOError):
1375 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1376
681de68e 1377 # TODO: This should be passed to ThumbnailsConvertor if necessary
1378 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1379
498f5606 1380 if self.params.get('writedescription', False):
1381 descfn = self.prepare_filename(ie_copy, 'pl_description')
1382 if not self._ensure_dir_exists(encodeFilename(descfn)):
1383 return
1384 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1385 self.to_screen('[info] Playlist description is already present')
1386 elif ie_result.get('description') is None:
1387 self.report_warning('There\'s no playlist description to write.')
1388 else:
1389 try:
1390 self.to_screen('[info] Writing playlist description to: ' + descfn)
1391 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1392 descfile.write(ie_result['description'])
1393 except (OSError, IOError):
1394 self.report_error('Cannot write playlist description file ' + descfn)
1395 return
30a074c2 1396
1397 if self.params.get('playlistreverse', False):
1398 entries = entries[::-1]
30a074c2 1399 if self.params.get('playlistrandom', False):
1400 random.shuffle(entries)
1401
1402 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1403
56a8fb4f 1404 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1405 failures = 0
1406 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1407 for i, entry_tuple in enumerate(entries, 1):
1408 playlist_index, entry = entry_tuple
53ed7066 1409 if 'playlist_index' in self.params.get('compat_options', []):
1410 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1411 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1412 # This __x_forwarded_for_ip thing is a bit ugly but requires
1413 # minimal changes
1414 if x_forwarded_for:
1415 entry['__x_forwarded_for_ip'] = x_forwarded_for
1416 extra = {
1417 'n_entries': n_entries,
f59ae581 1418 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1419 'playlist_index': playlist_index,
1420 'playlist_autonumber': i,
30a074c2 1421 'playlist': playlist,
1422 'playlist_id': ie_result.get('id'),
1423 'playlist_title': ie_result.get('title'),
1424 'playlist_uploader': ie_result.get('uploader'),
1425 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1426 'extractor': ie_result['extractor'],
1427 'webpage_url': ie_result['webpage_url'],
1428 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1429 'extractor_key': ie_result['extractor_key'],
1430 }
1431
1432 if self._match_entry(entry, incomplete=True) is not None:
1433 continue
1434
1435 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1436 if not entry_result:
1437 failures += 1
1438 if failures >= max_failures:
1439 self.report_error(
1440 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1441 break
30a074c2 1442 # TODO: skip failed (empty) entries?
1443 playlist_results.append(entry_result)
1444 ie_result['entries'] = playlist_results
1445 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1446 return ie_result
1447
a0566bbf 1448 @__handle_extraction_exceptions
1449 def __process_iterable_entry(self, entry, download, extra_info):
1450 return self.process_ie_result(
1451 entry, download=download, extra_info=extra_info)
1452
67134eab
JMF
1453 def _build_format_filter(self, filter_spec):
1454 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1455
1456 OPERATORS = {
1457 '<': operator.lt,
1458 '<=': operator.le,
1459 '>': operator.gt,
1460 '>=': operator.ge,
1461 '=': operator.eq,
1462 '!=': operator.ne,
1463 }
67134eab 1464 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1465 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1466 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1467 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1468 $
083c9df9 1469 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1470 m = operator_rex.search(filter_spec)
9ddb6925
S
1471 if m:
1472 try:
1473 comparison_value = int(m.group('value'))
1474 except ValueError:
1475 comparison_value = parse_filesize(m.group('value'))
1476 if comparison_value is None:
1477 comparison_value = parse_filesize(m.group('value') + 'B')
1478 if comparison_value is None:
1479 raise ValueError(
1480 'Invalid value %r in format specification %r' % (
67134eab 1481 m.group('value'), filter_spec))
9ddb6925
S
1482 op = OPERATORS[m.group('op')]
1483
083c9df9 1484 if not m:
9ddb6925
S
1485 STR_OPERATORS = {
1486 '=': operator.eq,
10d33b34
YCH
1487 '^=': lambda attr, value: attr.startswith(value),
1488 '$=': lambda attr, value: attr.endswith(value),
1489 '*=': lambda attr, value: value in attr,
9ddb6925 1490 }
67134eab 1491 str_operator_rex = re.compile(r'''(?x)
f96bff99 1492 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1493 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1494 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1495 \s*$
9ddb6925 1496 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1497 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1498 if m:
1499 comparison_value = m.group('value')
2cc779f4
S
1500 str_op = STR_OPERATORS[m.group('op')]
1501 if m.group('negation'):
e118a879 1502 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1503 else:
1504 op = str_op
083c9df9 1505
9ddb6925 1506 if not m:
67134eab 1507 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1508
1509 def _filter(f):
1510 actual_value = f.get(m.group('key'))
1511 if actual_value is None:
1512 return m.group('none_inclusive')
1513 return op(actual_value, comparison_value)
67134eab
JMF
1514 return _filter
1515
0017d9ad 1516 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1517
af0f7428
S
1518 def can_merge():
1519 merger = FFmpegMergerPP(self)
1520 return merger.available and merger.can_merge()
1521
91ebc640 1522 prefer_best = (
1523 not self.params.get('simulate', False)
1524 and download
1525 and (
1526 not can_merge()
19807826 1527 or info_dict.get('is_live', False)
de6000d9 1528 or self.outtmpl_dict['default'] == '-'))
53ed7066 1529 compat = (
1530 prefer_best
1531 or self.params.get('allow_multiple_audio_streams', False)
1532 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1533
1534 return (
53ed7066 1535 'best/bestvideo+bestaudio' if prefer_best
1536 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1537 else 'bestvideo+bestaudio/best')
0017d9ad 1538
67134eab
JMF
1539 def build_format_selector(self, format_spec):
1540 def syntax_error(note, start):
1541 message = (
1542 'Invalid format specification: '
1543 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1544 return SyntaxError(message)
1545
1546 PICKFIRST = 'PICKFIRST'
1547 MERGE = 'MERGE'
1548 SINGLE = 'SINGLE'
0130afb7 1549 GROUP = 'GROUP'
67134eab
JMF
1550 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1551
91ebc640 1552 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1553 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1554
e8e73840 1555 check_formats = self.params.get('check_formats')
1556
67134eab
JMF
1557 def _parse_filter(tokens):
1558 filter_parts = []
1559 for type, string, start, _, _ in tokens:
1560 if type == tokenize.OP and string == ']':
1561 return ''.join(filter_parts)
1562 else:
1563 filter_parts.append(string)
1564
232541df 1565 def _remove_unused_ops(tokens):
17cc1534 1566 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1567 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1568 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1569 last_string, last_start, last_end, last_line = None, None, None, None
1570 for type, string, start, end, line in tokens:
1571 if type == tokenize.OP and string == '[':
1572 if last_string:
1573 yield tokenize.NAME, last_string, last_start, last_end, last_line
1574 last_string = None
1575 yield type, string, start, end, line
1576 # everything inside brackets will be handled by _parse_filter
1577 for type, string, start, end, line in tokens:
1578 yield type, string, start, end, line
1579 if type == tokenize.OP and string == ']':
1580 break
1581 elif type == tokenize.OP and string in ALLOWED_OPS:
1582 if last_string:
1583 yield tokenize.NAME, last_string, last_start, last_end, last_line
1584 last_string = None
1585 yield type, string, start, end, line
1586 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1587 if not last_string:
1588 last_string = string
1589 last_start = start
1590 last_end = end
1591 else:
1592 last_string += string
1593 if last_string:
1594 yield tokenize.NAME, last_string, last_start, last_end, last_line
1595
cf2ac6df 1596 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1597 selectors = []
1598 current_selector = None
1599 for type, string, start, _, _ in tokens:
1600 # ENCODING is only defined in python 3.x
1601 if type == getattr(tokenize, 'ENCODING', None):
1602 continue
1603 elif type in [tokenize.NAME, tokenize.NUMBER]:
1604 current_selector = FormatSelector(SINGLE, string, [])
1605 elif type == tokenize.OP:
cf2ac6df
JMF
1606 if string == ')':
1607 if not inside_group:
1608 # ')' will be handled by the parentheses group
1609 tokens.restore_last_token()
67134eab 1610 break
cf2ac6df 1611 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1612 tokens.restore_last_token()
1613 break
cf2ac6df
JMF
1614 elif inside_choice and string == ',':
1615 tokens.restore_last_token()
1616 break
1617 elif string == ',':
0a31a350
JMF
1618 if not current_selector:
1619 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1620 selectors.append(current_selector)
1621 current_selector = None
1622 elif string == '/':
d96d604e
JMF
1623 if not current_selector:
1624 raise syntax_error('"/" must follow a format selector', start)
67134eab 1625 first_choice = current_selector
cf2ac6df 1626 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1627 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1628 elif string == '[':
1629 if not current_selector:
1630 current_selector = FormatSelector(SINGLE, 'best', [])
1631 format_filter = _parse_filter(tokens)
1632 current_selector.filters.append(format_filter)
0130afb7
JMF
1633 elif string == '(':
1634 if current_selector:
1635 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1636 group = _parse_format_selection(tokens, inside_group=True)
1637 current_selector = FormatSelector(GROUP, group, [])
67134eab 1638 elif string == '+':
d03cfdce 1639 if not current_selector:
1640 raise syntax_error('Unexpected "+"', start)
1641 selector_1 = current_selector
1642 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1643 if not selector_2:
1644 raise syntax_error('Expected a selector', start)
1645 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1646 else:
1647 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1648 elif type == tokenize.ENDMARKER:
1649 break
1650 if current_selector:
1651 selectors.append(current_selector)
1652 return selectors
1653
f8d4ad9a 1654 def _merge(formats_pair):
1655 format_1, format_2 = formats_pair
1656
1657 formats_info = []
1658 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1659 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1660
1661 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1662 get_no_more = {"video": False, "audio": False}
1663 for (i, fmt_info) in enumerate(formats_info):
1664 for aud_vid in ["audio", "video"]:
1665 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1666 if get_no_more[aud_vid]:
1667 formats_info.pop(i)
1668 get_no_more[aud_vid] = True
1669
1670 if len(formats_info) == 1:
1671 return formats_info[0]
1672
1673 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1674 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1675
1676 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1677 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1678
1679 output_ext = self.params.get('merge_output_format')
1680 if not output_ext:
1681 if the_only_video:
1682 output_ext = the_only_video['ext']
1683 elif the_only_audio and not video_fmts:
1684 output_ext = the_only_audio['ext']
1685 else:
1686 output_ext = 'mkv'
1687
1688 new_dict = {
1689 'requested_formats': formats_info,
1690 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1691 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1692 'ext': output_ext,
1693 }
1694
1695 if the_only_video:
1696 new_dict.update({
1697 'width': the_only_video.get('width'),
1698 'height': the_only_video.get('height'),
1699 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1700 'fps': the_only_video.get('fps'),
1701 'vcodec': the_only_video.get('vcodec'),
1702 'vbr': the_only_video.get('vbr'),
1703 'stretched_ratio': the_only_video.get('stretched_ratio'),
1704 })
1705
1706 if the_only_audio:
1707 new_dict.update({
1708 'acodec': the_only_audio.get('acodec'),
1709 'abr': the_only_audio.get('abr'),
1710 })
1711
1712 return new_dict
1713
e8e73840 1714 def _check_formats(formats):
1715 for f in formats:
1716 self.to_screen('[info] Testing format %s' % f['format_id'])
1717 paths = self.params.get('paths', {})
1718 temp_file = os.path.join(
1719 expand_path(paths.get('home', '').strip()),
1720 expand_path(paths.get('temp', '').strip()),
1721 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
fe346461 1722 try:
1723 dl, _ = self.dl(temp_file, f, test=True)
1724 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1725 dl = False
1726 finally:
1727 if os.path.exists(temp_file):
1728 os.remove(temp_file)
e8e73840 1729 if dl:
1730 yield f
1731 else:
1732 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1733
67134eab 1734 def _build_selector_function(selector):
909d24dd 1735 if isinstance(selector, list): # ,
67134eab
JMF
1736 fs = [_build_selector_function(s) for s in selector]
1737
317f7ab6 1738 def selector_function(ctx):
67134eab 1739 for f in fs:
317f7ab6 1740 for format in f(ctx):
67134eab
JMF
1741 yield format
1742 return selector_function
909d24dd 1743
1744 elif selector.type == GROUP: # ()
0130afb7 1745 selector_function = _build_selector_function(selector.selector)
909d24dd 1746
1747 elif selector.type == PICKFIRST: # /
67134eab
JMF
1748 fs = [_build_selector_function(s) for s in selector.selector]
1749
317f7ab6 1750 def selector_function(ctx):
67134eab 1751 for f in fs:
317f7ab6 1752 picked_formats = list(f(ctx))
67134eab
JMF
1753 if picked_formats:
1754 return picked_formats
1755 return []
67134eab 1756
909d24dd 1757 elif selector.type == SINGLE: # atom
598d185d 1758 format_spec = selector.selector or 'best'
909d24dd 1759
f8d4ad9a 1760 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1761 if format_spec == 'all':
1762 def selector_function(ctx):
1763 formats = list(ctx['formats'])
e8e73840 1764 if check_formats:
1765 formats = _check_formats(formats)
1766 for f in formats:
1767 yield f
f8d4ad9a 1768 elif format_spec == 'mergeall':
1769 def selector_function(ctx):
e8e73840 1770 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1771 if not formats:
1772 return
921b76ca 1773 merged_format = formats[-1]
1774 for f in formats[-2::-1]:
f8d4ad9a 1775 merged_format = _merge((merged_format, f))
1776 yield merged_format
909d24dd 1777
1778 else:
e8e73840 1779 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1780 mobj = re.match(
1781 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1782 format_spec)
1783 if mobj is not None:
1784 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1785 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1786 format_type = (mobj.group('type') or [None])[0]
1787 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1788 format_modified = mobj.group('mod') is not None
909d24dd 1789
1790 format_fallback = not format_type and not format_modified # for b, w
eff63539 1791 filter_f = (
1792 (lambda f: f.get('%scodec' % format_type) != 'none')
1793 if format_type and format_modified # bv*, ba*, wv*, wa*
1794 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1795 if format_type # bv, ba, wv, wa
1796 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1797 if not format_modified # b, w
1798 else None) # b*, w*
67134eab 1799 else:
909d24dd 1800 filter_f = ((lambda f: f.get('ext') == format_spec)
1801 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1802 else (lambda f: f.get('format_id') == format_spec)) # id
1803
1804 def selector_function(ctx):
1805 formats = list(ctx['formats'])
1806 if not formats:
1807 return
1808 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1809 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1810 # for extractors with incomplete formats (audio only (soundcloud)
1811 # or video only (imgur)) best/worst will fallback to
1812 # best/worst {video,audio}-only format
e8e73840 1813 matches = formats
1814 if format_reverse:
1815 matches = matches[::-1]
1816 if check_formats:
1817 matches = list(itertools.islice(_check_formats(matches), format_idx))
1818 n = len(matches)
1819 if -n <= format_idx - 1 < n:
1820 yield matches[format_idx - 1]
909d24dd 1821
1822 elif selector.type == MERGE: # +
d03cfdce 1823 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1824
317f7ab6
S
1825 def selector_function(ctx):
1826 for pair in itertools.product(
d03cfdce 1827 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1828 yield _merge(pair)
083c9df9 1829
67134eab 1830 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1831
317f7ab6
S
1832 def final_selector(ctx):
1833 ctx_copy = copy.deepcopy(ctx)
67134eab 1834 for _filter in filters:
317f7ab6
S
1835 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1836 return selector_function(ctx_copy)
67134eab 1837 return final_selector
083c9df9 1838
67134eab 1839 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1840 try:
232541df 1841 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1842 except tokenize.TokenError:
1843 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1844
1845 class TokenIterator(object):
1846 def __init__(self, tokens):
1847 self.tokens = tokens
1848 self.counter = 0
1849
1850 def __iter__(self):
1851 return self
1852
1853 def __next__(self):
1854 if self.counter >= len(self.tokens):
1855 raise StopIteration()
1856 value = self.tokens[self.counter]
1857 self.counter += 1
1858 return value
1859
1860 next = __next__
1861
1862 def restore_last_token(self):
1863 self.counter -= 1
1864
1865 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1866 return _build_selector_function(parsed_selector)
a9c58ad9 1867
e5660ee6
JMF
1868 def _calc_headers(self, info_dict):
1869 res = std_headers.copy()
1870
1871 add_headers = info_dict.get('http_headers')
1872 if add_headers:
1873 res.update(add_headers)
1874
1875 cookies = self._calc_cookies(info_dict)
1876 if cookies:
1877 res['Cookie'] = cookies
1878
0016b84e
S
1879 if 'X-Forwarded-For' not in res:
1880 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1881 if x_forwarded_for_ip:
1882 res['X-Forwarded-For'] = x_forwarded_for_ip
1883
e5660ee6
JMF
1884 return res
1885
1886 def _calc_cookies(self, info_dict):
5c2266df 1887 pr = sanitized_Request(info_dict['url'])
e5660ee6 1888 self.cookiejar.add_cookie_header(pr)
662435f7 1889 return pr.get_header('Cookie')
e5660ee6 1890
bc516a3f 1891 @staticmethod
1892 def _sanitize_thumbnails(info_dict):
1893 thumbnails = info_dict.get('thumbnails')
1894 if thumbnails is None:
1895 thumbnail = info_dict.get('thumbnail')
1896 if thumbnail:
1897 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1898 if thumbnails:
1899 thumbnails.sort(key=lambda t: (
1900 t.get('preference') if t.get('preference') is not None else -1,
1901 t.get('width') if t.get('width') is not None else -1,
1902 t.get('height') if t.get('height') is not None else -1,
1903 t.get('id') if t.get('id') is not None else '',
1904 t.get('url')))
1905 for i, t in enumerate(thumbnails):
1906 t['url'] = sanitize_url(t['url'])
1907 if t.get('width') and t.get('height'):
1908 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1909 if t.get('id') is None:
1910 t['id'] = '%d' % i
1911
dd82ffea
JMF
1912 def process_video_result(self, info_dict, download=True):
1913 assert info_dict.get('_type', 'video') == 'video'
1914
bec1fad2
PH
1915 if 'id' not in info_dict:
1916 raise ExtractorError('Missing "id" field in extractor result')
1917 if 'title' not in info_dict:
1918 raise ExtractorError('Missing "title" field in extractor result')
1919
c9969434
S
1920 def report_force_conversion(field, field_not, conversion):
1921 self.report_warning(
1922 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1923 % (field, field_not, conversion))
1924
1925 def sanitize_string_field(info, string_field):
1926 field = info.get(string_field)
1927 if field is None or isinstance(field, compat_str):
1928 return
1929 report_force_conversion(string_field, 'a string', 'string')
1930 info[string_field] = compat_str(field)
1931
1932 def sanitize_numeric_fields(info):
1933 for numeric_field in self._NUMERIC_FIELDS:
1934 field = info.get(numeric_field)
1935 if field is None or isinstance(field, compat_numeric_types):
1936 continue
1937 report_force_conversion(numeric_field, 'numeric', 'int')
1938 info[numeric_field] = int_or_none(field)
1939
1940 sanitize_string_field(info_dict, 'id')
1941 sanitize_numeric_fields(info_dict)
be6217b2 1942
dd82ffea
JMF
1943 if 'playlist' not in info_dict:
1944 # It isn't part of a playlist
1945 info_dict['playlist'] = None
1946 info_dict['playlist_index'] = None
1947
bc516a3f 1948 self._sanitize_thumbnails(info_dict)
d5519808 1949
b7b72db9 1950 if self.params.get('list_thumbnails'):
1951 self.list_thumbnails(info_dict)
1952 return
1953
536a55da 1954 thumbnail = info_dict.get('thumbnail')
bc516a3f 1955 thumbnails = info_dict.get('thumbnails')
536a55da
S
1956 if thumbnail:
1957 info_dict['thumbnail'] = sanitize_url(thumbnail)
1958 elif thumbnails:
d5519808
PH
1959 info_dict['thumbnail'] = thumbnails[-1]['url']
1960
c9ae7b95 1961 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1962 info_dict['display_id'] = info_dict['id']
1963
10db0d2f 1964 for ts_key, date_key in (
1965 ('timestamp', 'upload_date'),
1966 ('release_timestamp', 'release_date'),
1967 ):
1968 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1969 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1970 # see http://bugs.python.org/issue1646728)
1971 try:
1972 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1973 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1974 except (ValueError, OverflowError, OSError):
1975 pass
9d2ecdbc 1976
33d2fc2f
S
1977 # Auto generate title fields corresponding to the *_number fields when missing
1978 # in order to always have clean titles. This is very common for TV series.
1979 for field in ('chapter', 'season', 'episode'):
1980 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1981 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1982
05108a49
S
1983 for cc_kind in ('subtitles', 'automatic_captions'):
1984 cc = info_dict.get(cc_kind)
1985 if cc:
1986 for _, subtitle in cc.items():
1987 for subtitle_format in subtitle:
1988 if subtitle_format.get('url'):
1989 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1990 if subtitle_format.get('ext') is None:
1991 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1992
1993 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1994 subtitles = info_dict.get('subtitles')
4bba3716 1995
a504ced0 1996 if self.params.get('listsubtitles', False):
360e1ca5 1997 if 'automatic_captions' in info_dict:
05108a49
S
1998 self.list_subtitles(
1999 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2000 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2001 return
05108a49 2002
360e1ca5 2003 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2004 info_dict['id'], subtitles, automatic_captions)
a504ced0 2005
dd82ffea
JMF
2006 # We now pick which formats have to be downloaded
2007 if info_dict.get('formats') is None:
2008 # There's only one format available
2009 formats = [info_dict]
2010 else:
2011 formats = info_dict['formats']
2012
db95dc13 2013 if not formats:
b7da73eb 2014 if not self.params.get('ignore_no_formats_error'):
2015 raise ExtractorError('No video formats found!')
2016 else:
2017 self.report_warning('No video formats found!')
db95dc13 2018
73af5cc8
S
2019 def is_wellformed(f):
2020 url = f.get('url')
a5ac0c47 2021 if not url:
73af5cc8
S
2022 self.report_warning(
2023 '"url" field is missing or empty - skipping format, '
2024 'there is an error in extractor')
a5ac0c47
S
2025 return False
2026 if isinstance(url, bytes):
2027 sanitize_string_field(f, 'url')
2028 return True
73af5cc8
S
2029
2030 # Filter out malformed formats for better extraction robustness
2031 formats = list(filter(is_wellformed, formats))
2032
181c7053
S
2033 formats_dict = {}
2034
dd82ffea 2035 # We check that all the formats have the format and format_id fields
db95dc13 2036 for i, format in enumerate(formats):
c9969434
S
2037 sanitize_string_field(format, 'format_id')
2038 sanitize_numeric_fields(format)
dcf77cf1 2039 format['url'] = sanitize_url(format['url'])
e74e3b63 2040 if not format.get('format_id'):
8016c922 2041 format['format_id'] = compat_str(i)
e2effb08
S
2042 else:
2043 # Sanitize format_id from characters used in format selector expression
ec85ded8 2044 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2045 format_id = format['format_id']
2046 if format_id not in formats_dict:
2047 formats_dict[format_id] = []
2048 formats_dict[format_id].append(format)
2049
2050 # Make sure all formats have unique format_id
2051 for format_id, ambiguous_formats in formats_dict.items():
2052 if len(ambiguous_formats) > 1:
2053 for i, format in enumerate(ambiguous_formats):
2054 format['format_id'] = '%s-%d' % (format_id, i)
2055
2056 for i, format in enumerate(formats):
8c51aa65 2057 if format.get('format') is None:
6febd1c1 2058 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2059 id=format['format_id'],
2060 res=self.format_resolution(format),
6febd1c1 2061 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2062 )
c1002e96 2063 # Automatically determine file extension if missing
5b1d8575 2064 if format.get('ext') is None:
cce929ea 2065 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2066 # Automatically determine protocol if missing (useful for format
2067 # selection purposes)
6f0be937 2068 if format.get('protocol') is None:
b5559424 2069 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2070 # Add HTTP headers, so that external programs can use them from the
2071 # json output
2072 full_format_info = info_dict.copy()
2073 full_format_info.update(format)
2074 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2075 # Remove private housekeeping stuff
2076 if '__x_forwarded_for_ip' in info_dict:
2077 del info_dict['__x_forwarded_for_ip']
dd82ffea 2078
4bcc7bd1 2079 # TODO Central sorting goes here
99e206d5 2080
b7da73eb 2081 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2082 # only set the 'formats' fields if the original info_dict list them
2083 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2084 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2085 # which can't be exported to json
b3d9ef88 2086 info_dict['formats'] = formats
4ec82a72 2087
2088 info_dict, _ = self.pre_process(info_dict)
2089
cfb56d1a 2090 if self.params.get('listformats'):
b7da73eb 2091 if not info_dict.get('formats'):
2092 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2093 self.list_formats(info_dict)
2094 return
2095
de3ef3ed 2096 req_format = self.params.get('format')
a9c58ad9 2097 if req_format is None:
0017d9ad 2098 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2099 self.write_debug('Default format spec: %s' % req_format)
0017d9ad 2100
5acfa126 2101 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2102
2103 # While in format selection we may need to have an access to the original
2104 # format set in order to calculate some metrics or do some processing.
2105 # For now we need to be able to guess whether original formats provided
2106 # by extractor are incomplete or not (i.e. whether extractor provides only
2107 # video-only or audio-only formats) for proper formats selection for
2108 # extractors with such incomplete formats (see
067aa17e 2109 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2110 # Since formats may be filtered during format selection and may not match
2111 # the original formats the results may be incorrect. Thus original formats
2112 # or pre-calculated metrics should be passed to format selection routines
2113 # as well.
2114 # We will pass a context object containing all necessary additional data
2115 # instead of just formats.
2116 # This fixes incorrect format selection issue (see
067aa17e 2117 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2118 incomplete_formats = (
317f7ab6 2119 # All formats are video-only or
3089bc74 2120 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2121 # all formats are audio-only
3089bc74 2122 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2123
2124 ctx = {
2125 'formats': formats,
2126 'incomplete_formats': incomplete_formats,
2127 }
2128
2129 formats_to_download = list(format_selector(ctx))
dd82ffea 2130 if not formats_to_download:
b7da73eb 2131 if not self.params.get('ignore_no_formats_error'):
2132 raise ExtractorError('Requested format is not available', expected=True)
2133 else:
2134 self.report_warning('Requested format is not available')
2135 elif download:
2136 self.to_screen(
07cce701 2137 '[info] %s: Downloading %d format(s): %s' % (
2138 info_dict['id'], len(formats_to_download),
2139 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2140 for fmt in formats_to_download:
dd82ffea 2141 new_info = dict(info_dict)
4ec82a72 2142 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2143 new_info['__original_infodict'] = info_dict
b7da73eb 2144 new_info.update(fmt)
dd82ffea
JMF
2145 self.process_info(new_info)
2146 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2147 if formats_to_download:
2148 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2149 return info_dict
2150
98c70d6f 2151 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2152 """Select the requested subtitles and their format"""
98c70d6f
JMF
2153 available_subs = {}
2154 if normal_subtitles and self.params.get('writesubtitles'):
2155 available_subs.update(normal_subtitles)
2156 if automatic_captions and self.params.get('writeautomaticsub'):
2157 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2158 if lang not in available_subs:
2159 available_subs[lang] = cap_info
2160
4d171848
JMF
2161 if (not self.params.get('writesubtitles') and not
2162 self.params.get('writeautomaticsub') or not
2163 available_subs):
2164 return None
a504ced0 2165
c32b0aab 2166 all_sub_langs = available_subs.keys()
a504ced0 2167 if self.params.get('allsubtitles', False):
c32b0aab 2168 requested_langs = all_sub_langs
2169 elif self.params.get('subtitleslangs', False):
2170 requested_langs = set()
2171 for lang in self.params.get('subtitleslangs'):
2172 if lang == 'all':
2173 requested_langs.update(all_sub_langs)
2174 continue
2175 discard = lang[0] == '-'
2176 if discard:
2177 lang = lang[1:]
2178 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2179 if discard:
2180 for lang in current_langs:
2181 requested_langs.discard(lang)
2182 else:
2183 requested_langs.update(current_langs)
2184 elif 'en' in available_subs:
2185 requested_langs = ['en']
a504ced0 2186 else:
c32b0aab 2187 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2188 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2189
2190 formats_query = self.params.get('subtitlesformat', 'best')
2191 formats_preference = formats_query.split('/') if formats_query else []
2192 subs = {}
2193 for lang in requested_langs:
2194 formats = available_subs.get(lang)
2195 if formats is None:
2196 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2197 continue
a504ced0
JMF
2198 for ext in formats_preference:
2199 if ext == 'best':
2200 f = formats[-1]
2201 break
2202 matches = list(filter(lambda f: f['ext'] == ext, formats))
2203 if matches:
2204 f = matches[-1]
2205 break
2206 else:
2207 f = formats[-1]
2208 self.report_warning(
2209 'No subtitle format found matching "%s" for language %s, '
2210 'using %s' % (formats_query, lang, f['ext']))
2211 subs[lang] = f
2212 return subs
2213
d06daf23 2214 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2215 def print_mandatory(field, actual_field=None):
2216 if actual_field is None:
2217 actual_field = field
d06daf23 2218 if (self.params.get('force%s' % field, False)
53c18592 2219 and (not incomplete or info_dict.get(actual_field) is not None)):
2220 self.to_stdout(info_dict[actual_field])
d06daf23
S
2221
2222 def print_optional(field):
2223 if (self.params.get('force%s' % field, False)
2224 and info_dict.get(field) is not None):
2225 self.to_stdout(info_dict[field])
2226
53c18592 2227 info_dict = info_dict.copy()
2228 if filename is not None:
2229 info_dict['filename'] = filename
2230 if info_dict.get('requested_formats') is not None:
2231 # For RTMP URLs, also include the playpath
2232 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2233 elif 'url' in info_dict:
2234 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2235
2236 for tmpl in self.params.get('forceprint', []):
2237 if re.match(r'\w+$', tmpl):
2238 tmpl = '%({})s'.format(tmpl)
2239 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2240 self.to_stdout(tmpl % info_copy)
2241
d06daf23
S
2242 print_mandatory('title')
2243 print_mandatory('id')
53c18592 2244 print_mandatory('url', 'urls')
d06daf23
S
2245 print_optional('thumbnail')
2246 print_optional('description')
53c18592 2247 print_optional('filename')
d06daf23
S
2248 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2249 self.to_stdout(formatSeconds(info_dict['duration']))
2250 print_mandatory('format')
53c18592 2251
d06daf23 2252 if self.params.get('forcejson', False):
277d6ff5 2253 self.post_extract(info_dict)
75d43ca0 2254 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2255
e8e73840 2256 def dl(self, name, info, subtitle=False, test=False):
2257
2258 if test:
2259 verbose = self.params.get('verbose')
2260 params = {
2261 'test': True,
2262 'quiet': not verbose,
2263 'verbose': verbose,
2264 'noprogress': not verbose,
2265 'nopart': True,
2266 'skip_unavailable_fragments': False,
2267 'keep_fragments': False,
2268 'overwrites': True,
2269 '_no_ytdl_file': True,
2270 }
2271 else:
2272 params = self.params
2273 fd = get_suitable_downloader(info, params)(self, params)
2274 if not test:
2275 for ph in self._progress_hooks:
2276 fd.add_progress_hook(ph)
18e674b4 2277 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2278 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2279 new_info = dict(info)
2280 if new_info.get('http_headers') is None:
2281 new_info['http_headers'] = self._calc_headers(new_info)
2282 return fd.download(name, new_info, subtitle)
2283
8222d8de
JMF
2284 def process_info(self, info_dict):
2285 """Process a single resolved IE result."""
2286
2287 assert info_dict.get('_type', 'video') == 'video'
fd288278 2288
0202b52a 2289 info_dict.setdefault('__postprocessors', [])
2290
fd288278
PH
2291 max_downloads = self.params.get('max_downloads')
2292 if max_downloads is not None:
2293 if self._num_downloads >= int(max_downloads):
2294 raise MaxDownloadsReached()
8222d8de 2295
d06daf23 2296 # TODO: backward compatibility, to be removed
8222d8de 2297 info_dict['fulltitle'] = info_dict['title']
8222d8de 2298
11b85ce6 2299 if 'format' not in info_dict:
8222d8de
JMF
2300 info_dict['format'] = info_dict['ext']
2301
c77495e3 2302 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2303 return
2304
277d6ff5 2305 self.post_extract(info_dict)
fd288278 2306 self._num_downloads += 1
8222d8de 2307
dcf64d43 2308 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2309 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2310 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2311 files_to_move = {}
8222d8de
JMF
2312
2313 # Forced printings
0202b52a 2314 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2315
8222d8de 2316 if self.params.get('simulate', False):
2d30509f 2317 if self.params.get('force_write_download_archive', False):
2318 self.record_download_archive(info_dict)
2319
2320 # Do nothing else if in simulate mode
8222d8de
JMF
2321 return
2322
de6000d9 2323 if full_filename is None:
8222d8de
JMF
2324 return
2325
e92caff5 2326 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2327 return
e92caff5 2328 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2329 return
2330
2331 if self.params.get('writedescription', False):
de6000d9 2332 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2333 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2334 return
0c3d0f51 2335 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2336 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2337 elif info_dict.get('description') is None:
2338 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2339 else:
2340 try:
6febd1c1 2341 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2342 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2343 descfile.write(info_dict['description'])
7b6fefc9 2344 except (OSError, IOError):
6febd1c1 2345 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2346 return
8222d8de 2347
1fb07d10 2348 if self.params.get('writeannotations', False):
de6000d9 2349 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2350 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2351 return
0c3d0f51 2352 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2353 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2354 elif not info_dict.get('annotations'):
2355 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2356 else:
2357 try:
6febd1c1 2358 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2359 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2360 annofile.write(info_dict['annotations'])
2361 except (KeyError, TypeError):
6febd1c1 2362 self.report_warning('There are no annotations to write.')
7b6fefc9 2363 except (OSError, IOError):
6febd1c1 2364 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2365 return
1fb07d10 2366
c4a91be7 2367 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2368 self.params.get('writeautomaticsub')])
c4a91be7 2369
c84dd8a9 2370 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2371 # subtitles download errors are already managed as troubles in relevant IE
2372 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2373 subtitles = info_dict['requested_subtitles']
fa57af1e 2374 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2375 for sub_lang, sub_info in subtitles.items():
2376 sub_format = sub_info['ext']
56d868db 2377 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2378 sub_filename_final = subtitles_filename(
2379 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2380 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2381 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2382 sub_info['filepath'] = sub_filename
0202b52a 2383 files_to_move[sub_filename] = sub_filename_final
a504ced0 2384 else:
0c9df79e 2385 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2386 if sub_info.get('data') is not None:
2387 try:
2388 # Use newline='' to prevent conversion of newline characters
067aa17e 2389 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2390 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2391 subfile.write(sub_info['data'])
dcf64d43 2392 sub_info['filepath'] = sub_filename
0202b52a 2393 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2394 except (OSError, IOError):
2395 self.report_error('Cannot write subtitles file ' + sub_filename)
2396 return
7b6fefc9 2397 else:
5ff1bc0c 2398 try:
e8e73840 2399 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2400 sub_info['filepath'] = sub_filename
0202b52a 2401 files_to_move[sub_filename] = sub_filename_final
fe346461 2402 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2403 self.report_warning('Unable to download subtitle for "%s": %s' %
2404 (sub_lang, error_to_compat_str(err)))
2405 continue
8222d8de 2406
8222d8de 2407 if self.params.get('writeinfojson', False):
de6000d9 2408 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2409 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2410 return
0c3d0f51 2411 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2412 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2413 else:
66c935fb 2414 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2415 try:
75d43ca0 2416 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2417 except (OSError, IOError):
66c935fb 2418 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2419 return
de6000d9 2420 info_dict['__infojson_filename'] = infofn
8222d8de 2421
56d868db 2422 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2423 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2424 thumb_filename = replace_extension(
2425 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2426 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2427
732044af 2428 # Write internet shortcut files
2429 url_link = webloc_link = desktop_link = False
2430 if self.params.get('writelink', False):
2431 if sys.platform == "darwin": # macOS.
2432 webloc_link = True
2433 elif sys.platform.startswith("linux"):
2434 desktop_link = True
2435 else: # if sys.platform in ['win32', 'cygwin']:
2436 url_link = True
2437 if self.params.get('writeurllink', False):
2438 url_link = True
2439 if self.params.get('writewebloclink', False):
2440 webloc_link = True
2441 if self.params.get('writedesktoplink', False):
2442 desktop_link = True
2443
2444 if url_link or webloc_link or desktop_link:
2445 if 'webpage_url' not in info_dict:
2446 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2447 return
2448 ascii_url = iri_to_uri(info_dict['webpage_url'])
2449
2450 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2451 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2452 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2453 self.to_screen('[info] Internet shortcut is already present')
2454 else:
2455 try:
2456 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2457 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2458 template_vars = {'url': ascii_url}
2459 if embed_filename:
2460 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2461 linkfile.write(template % template_vars)
2462 except (OSError, IOError):
2463 self.report_error('Cannot write internet shortcut ' + linkfn)
2464 return False
2465 return True
2466
2467 if url_link:
2468 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2469 return
2470 if webloc_link:
2471 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2472 return
2473 if desktop_link:
2474 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2475 return
2476
56d868db 2477 try:
2478 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2479 except PostProcessingError as err:
2480 self.report_error('Preprocessing: %s' % str(err))
2481 return
2482
732044af 2483 must_record_download_archive = False
56d868db 2484 if self.params.get('skip_download', False):
2485 info_dict['filepath'] = temp_filename
2486 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2487 info_dict['__files_to_move'] = files_to_move
2488 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2489 else:
2490 # Download
4340deca 2491 try:
0202b52a 2492
6b591b29 2493 def existing_file(*filepaths):
2494 ext = info_dict.get('ext')
2495 final_ext = self.params.get('final_ext', ext)
2496 existing_files = []
2497 for file in orderedSet(filepaths):
2498 if final_ext != ext:
2499 converted = replace_extension(file, final_ext, ext)
2500 if os.path.exists(encodeFilename(converted)):
2501 existing_files.append(converted)
2502 if os.path.exists(encodeFilename(file)):
2503 existing_files.append(file)
2504
2505 if not existing_files or self.params.get('overwrites', False):
2506 for file in orderedSet(existing_files):
2507 self.report_file_delete(file)
2508 os.remove(encodeFilename(file))
2509 return None
2510
2511 self.report_file_already_downloaded(existing_files[0])
2512 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2513 return existing_files[0]
0202b52a 2514
2515 success = True
4340deca 2516 if info_dict.get('requested_formats') is not None:
81cd954a
S
2517
2518 def compatible_formats(formats):
d03cfdce 2519 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2520 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2521 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2522 if len(video_formats) > 2 or len(audio_formats) > 2:
2523 return False
2524
81cd954a 2525 # Check extension
d03cfdce 2526 exts = set(format.get('ext') for format in formats)
2527 COMPATIBLE_EXTS = (
2528 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2529 set(('webm',)),
2530 )
2531 for ext_sets in COMPATIBLE_EXTS:
2532 if ext_sets.issuperset(exts):
2533 return True
81cd954a
S
2534 # TODO: Check acodec/vcodec
2535 return False
2536
2537 requested_formats = info_dict['requested_formats']
0202b52a 2538 old_ext = info_dict['ext']
4d971a16 2539 if self.params.get('merge_output_format') is None:
2540 if not compatible_formats(requested_formats):
2541 info_dict['ext'] = 'mkv'
2542 self.report_warning(
2543 'Requested formats are incompatible for merge and will be merged into mkv.')
2544 if (info_dict['ext'] == 'webm'
2545 and self.params.get('writethumbnail', False)
2546 and info_dict.get('thumbnails')):
2547 info_dict['ext'] = 'mkv'
2548 self.report_warning(
2549 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2550
2551 def correct_ext(filename):
2552 filename_real_ext = os.path.splitext(filename)[1][1:]
2553 filename_wo_ext = (
2554 os.path.splitext(filename)[0]
2555 if filename_real_ext == old_ext
2556 else filename)
2557 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2558
38c6902b 2559 # Ensure filename always has a correct extension for successful merge
0202b52a 2560 full_filename = correct_ext(full_filename)
2561 temp_filename = correct_ext(temp_filename)
2562 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2563 info_dict['__real_download'] = False
18e674b4 2564
2565 _protocols = set(determine_protocol(f) for f in requested_formats)
2566 if len(_protocols) == 1:
2567 info_dict['protocol'] = _protocols.pop()
2568 directly_mergable = (
2569 'no-direct-merge' not in self.params.get('compat_opts', [])
2570 and info_dict.get('protocol') is not None # All requested formats have same protocol
2571 and not self.params.get('allow_unplayable_formats')
2572 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2573 if directly_mergable:
2574 info_dict['url'] = requested_formats[0]['url']
2575 # Treat it as a single download
2576 dl_filename = existing_file(full_filename, temp_filename)
2577 if dl_filename is None:
2578 success, real_download = self.dl(temp_filename, info_dict)
2579 info_dict['__real_download'] = real_download
2580 else:
2581 downloaded = []
2582 merger = FFmpegMergerPP(self)
2583 if self.params.get('allow_unplayable_formats'):
2584 self.report_warning(
2585 'You have requested merging of multiple formats '
2586 'while also allowing unplayable formats to be downloaded. '
2587 'The formats won\'t be merged to prevent data corruption.')
2588 elif not merger.available:
2589 self.report_warning(
2590 'You have requested merging of multiple formats but ffmpeg is not installed. '
2591 'The formats won\'t be merged.')
2592
2593 if dl_filename is None:
2594 for f in requested_formats:
2595 new_info = dict(info_dict)
2596 del new_info['requested_formats']
2597 new_info.update(f)
2598 fname = prepend_extension(
2599 self.prepare_filename(new_info, 'temp'),
2600 'f%s' % f['format_id'], new_info['ext'])
2601 if not self._ensure_dir_exists(fname):
2602 return
2603 downloaded.append(fname)
2604 partial_success, real_download = self.dl(fname, new_info)
2605 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2606 success = success and partial_success
2607 if merger.available and not self.params.get('allow_unplayable_formats'):
2608 info_dict['__postprocessors'].append(merger)
2609 info_dict['__files_to_merge'] = downloaded
2610 # Even if there were no downloads, it is being merged only now
2611 info_dict['__real_download'] = True
2612 else:
2613 for file in downloaded:
2614 files_to_move[file] = None
4340deca
P
2615 else:
2616 # Just a single file
0202b52a 2617 dl_filename = existing_file(full_filename, temp_filename)
2618 if dl_filename is None:
e8e73840 2619 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2620 info_dict['__real_download'] = real_download
2621
0202b52a 2622 dl_filename = dl_filename or temp_filename
c571435f 2623 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2624
3158150c 2625 except network_exceptions as err:
7960b056 2626 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2627 return
2628 except (OSError, IOError) as err:
2629 raise UnavailableVideoError(err)
2630 except (ContentTooShortError, ) as err:
2631 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2632 return
8222d8de 2633
de6000d9 2634 if success and full_filename != '-':
6271f1ca 2635 # Fixup content
62cd676c
PH
2636 fixup_policy = self.params.get('fixup')
2637 if fixup_policy is None:
2638 fixup_policy = 'detect_or_warn'
2639
e4172ac9 2640 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2641
6271f1ca
PH
2642 stretched_ratio = info_dict.get('stretched_ratio')
2643 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2644 if fixup_policy == 'warn':
2645 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2646 info_dict['id'], stretched_ratio))
2647 elif fixup_policy == 'detect_or_warn':
2648 stretched_pp = FFmpegFixupStretchedPP(self)
2649 if stretched_pp.available:
6271f1ca
PH
2650 info_dict['__postprocessors'].append(stretched_pp)
2651 else:
2652 self.report_warning(
d1e4a464
S
2653 '%s: Non-uniform pixel ratio (%s). %s'
2654 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2655 else:
62cd676c
PH
2656 assert fixup_policy in ('ignore', 'never')
2657
3089bc74 2658 if (info_dict.get('requested_formats') is None
6b591b29 2659 and info_dict.get('container') == 'm4a_dash'
2660 and info_dict.get('ext') == 'm4a'):
62cd676c 2661 if fixup_policy == 'warn':
d1e4a464
S
2662 self.report_warning(
2663 '%s: writing DASH m4a. '
2664 'Only some players support this container.'
2665 % info_dict['id'])
62cd676c
PH
2666 elif fixup_policy == 'detect_or_warn':
2667 fixup_pp = FFmpegFixupM4aPP(self)
2668 if fixup_pp.available:
62cd676c
PH
2669 info_dict['__postprocessors'].append(fixup_pp)
2670 else:
2671 self.report_warning(
d1e4a464
S
2672 '%s: writing DASH m4a. '
2673 'Only some players support this container. %s'
2674 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2675 else:
2676 assert fixup_policy in ('ignore', 'never')
6271f1ca 2677
0a473f2f 2678 if ('protocol' in info_dict
2679 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2680 if fixup_policy == 'warn':
a02682fd 2681 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2682 info_dict['id']))
2683 elif fixup_policy == 'detect_or_warn':
2684 fixup_pp = FFmpegFixupM3u8PP(self)
2685 if fixup_pp.available:
f17f8651 2686 info_dict['__postprocessors'].append(fixup_pp)
2687 else:
2688 self.report_warning(
a02682fd 2689 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2690 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2691 else:
2692 assert fixup_policy in ('ignore', 'never')
2693
8222d8de 2694 try:
23c1a667 2695 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2696 except PostProcessingError as err:
2697 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2698 return
ab8e5e51
AM
2699 try:
2700 for ph in self._post_hooks:
23c1a667 2701 ph(info_dict['filepath'])
ab8e5e51
AM
2702 except Exception as err:
2703 self.report_error('post hooks: %s' % str(err))
2704 return
2d30509f 2705 must_record_download_archive = True
2706
2707 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2708 self.record_download_archive(info_dict)
c3e6ffba 2709 max_downloads = self.params.get('max_downloads')
2710 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2711 raise MaxDownloadsReached()
8222d8de
JMF
2712
2713 def download(self, url_list):
2714 """Download a given list of URLs."""
de6000d9 2715 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2716 if (len(url_list) > 1
2717 and outtmpl != '-'
2718 and '%' not in outtmpl
2719 and self.params.get('max_downloads') != 1):
acd69589 2720 raise SameFileError(outtmpl)
8222d8de
JMF
2721
2722 for url in url_list:
2723 try:
5f6a1245 2724 # It also downloads the videos
61aa5ba3
S
2725 res = self.extract_info(
2726 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2727 except UnavailableVideoError:
6febd1c1 2728 self.report_error('unable to download video')
8222d8de 2729 except MaxDownloadsReached:
8b0d7497 2730 self.to_screen('[info] Maximum number of downloaded files reached')
2731 raise
2732 except ExistingVideoReached:
d83cb531 2733 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2734 raise
2735 except RejectedVideoReached:
d83cb531 2736 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2737 raise
63e0be34
PH
2738 else:
2739 if self.params.get('dump_single_json', False):
277d6ff5 2740 self.post_extract(res)
75d43ca0 2741 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2742
2743 return self._download_retcode
2744
1dcc4c0c 2745 def download_with_info_file(self, info_filename):
31bd3925
JMF
2746 with contextlib.closing(fileinput.FileInput(
2747 [info_filename], mode='r',
2748 openhook=fileinput.hook_encoded('utf-8'))) as f:
2749 # FileInput doesn't have a read method, we can't call json.load
498f5606 2750 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2751 try:
2752 self.process_ie_result(info, download=True)
498f5606 2753 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2754 webpage_url = info.get('webpage_url')
2755 if webpage_url is not None:
6febd1c1 2756 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2757 return self.download([webpage_url])
2758 else:
2759 raise
2760 return self._download_retcode
1dcc4c0c 2761
cb202fd2 2762 @staticmethod
75d43ca0 2763 def filter_requested_info(info_dict, actually_filter=True):
ae8f99e6 2764 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2765 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2766 if actually_filter:
bd99f6e6 2767 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
ae8f99e6 2768 empty_values = (None, {}, [], set(), tuple())
2769 reject = lambda k, v: k not in keep_keys and (
2770 k.startswith('_') or k in remove_keys or v in empty_values)
2771 else:
394dcd44 2772 info_dict['epoch'] = int(time.time())
ae8f99e6 2773 reject = lambda k, v: k in remove_keys
5226731e 2774 filter_fn = lambda obj: (
ae8f99e6 2775 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
a515a78d 2776 else obj if not isinstance(obj, dict)
ae8f99e6 2777 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2778 return filter_fn(info_dict)
cb202fd2 2779
dcf64d43 2780 def run_pp(self, pp, infodict):
5bfa4862 2781 files_to_delete = []
dcf64d43 2782 if '__files_to_move' not in infodict:
2783 infodict['__files_to_move'] = {}
af819c21 2784 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2785 if not files_to_delete:
dcf64d43 2786 return infodict
5bfa4862 2787
2788 if self.params.get('keepvideo', False):
2789 for f in files_to_delete:
dcf64d43 2790 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2791 else:
2792 for old_filename in set(files_to_delete):
2793 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2794 try:
2795 os.remove(encodeFilename(old_filename))
2796 except (IOError, OSError):
2797 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2798 if old_filename in infodict['__files_to_move']:
2799 del infodict['__files_to_move'][old_filename]
2800 return infodict
5bfa4862 2801
277d6ff5 2802 @staticmethod
2803 def post_extract(info_dict):
2804 def actual_post_extract(info_dict):
2805 if info_dict.get('_type') in ('playlist', 'multi_video'):
2806 for video_dict in info_dict.get('entries', {}):
b050d210 2807 actual_post_extract(video_dict or {})
277d6ff5 2808 return
2809
07cce701 2810 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2811 extra = post_extractor().items()
2812 info_dict.update(extra)
07cce701 2813 info_dict.pop('__post_extractor', None)
277d6ff5 2814
4ec82a72 2815 original_infodict = info_dict.get('__original_infodict') or {}
2816 original_infodict.update(extra)
2817 original_infodict.pop('__post_extractor', None)
2818
b050d210 2819 actual_post_extract(info_dict or {})
277d6ff5 2820
56d868db 2821 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2822 info = dict(ie_info)
56d868db 2823 info['__files_to_move'] = files_to_move or {}
2824 for pp in self._pps[key]:
dcf64d43 2825 info = self.run_pp(pp, info)
56d868db 2826 return info, info.pop('__files_to_move', None)
5bfa4862 2827
dcf64d43 2828 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2829 """Run all the postprocessors on the given file."""
2830 info = dict(ie_info)
2831 info['filepath'] = filename
dcf64d43 2832 info['__files_to_move'] = files_to_move or {}
0202b52a 2833
56d868db 2834 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2835 info = self.run_pp(pp, info)
2836 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2837 del info['__files_to_move']
56d868db 2838 for pp in self._pps['after_move']:
dcf64d43 2839 info = self.run_pp(pp, info)
23c1a667 2840 return info
c1c9a79c 2841
5db07df6 2842 def _make_archive_id(self, info_dict):
e9fef7ee
S
2843 video_id = info_dict.get('id')
2844 if not video_id:
2845 return
5db07df6
PH
2846 # Future-proof against any change in case
2847 # and backwards compatibility with prior versions
e9fef7ee 2848 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2849 if extractor is None:
1211bb6d
S
2850 url = str_or_none(info_dict.get('url'))
2851 if not url:
2852 return
e9fef7ee
S
2853 # Try to find matching extractor for the URL and take its ie_key
2854 for ie in self._ies:
1211bb6d 2855 if ie.suitable(url):
e9fef7ee
S
2856 extractor = ie.ie_key()
2857 break
2858 else:
2859 return
d0757229 2860 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2861
2862 def in_download_archive(self, info_dict):
2863 fn = self.params.get('download_archive')
2864 if fn is None:
2865 return False
2866
2867 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2868 if not vid_id:
7012b23c 2869 return False # Incomplete video information
5db07df6 2870
a45e8619 2871 return vid_id in self.archive
c1c9a79c
PH
2872
2873 def record_download_archive(self, info_dict):
2874 fn = self.params.get('download_archive')
2875 if fn is None:
2876 return
5db07df6
PH
2877 vid_id = self._make_archive_id(info_dict)
2878 assert vid_id
c1c9a79c 2879 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2880 archive_file.write(vid_id + '\n')
a45e8619 2881 self.archive.add(vid_id)
dd82ffea 2882
8c51aa65 2883 @staticmethod
8abeeb94 2884 def format_resolution(format, default='unknown'):
fb04e403
PH
2885 if format.get('vcodec') == 'none':
2886 return 'audio only'
f49d89ee
PH
2887 if format.get('resolution') is not None:
2888 return format['resolution']
35615307
DA
2889 if format.get('width') and format.get('height'):
2890 res = '%dx%d' % (format['width'], format['height'])
2891 elif format.get('height'):
2892 res = '%sp' % format['height']
2893 elif format.get('width'):
388ae76b 2894 res = '%dx?' % format['width']
8c51aa65 2895 else:
8abeeb94 2896 res = default
8c51aa65
JMF
2897 return res
2898
c57f7757
PH
2899 def _format_note(self, fdict):
2900 res = ''
2901 if fdict.get('ext') in ['f4f', 'f4m']:
2902 res += '(unsupported) '
32f90364
PH
2903 if fdict.get('language'):
2904 if res:
2905 res += ' '
9016d76f 2906 res += '[%s] ' % fdict['language']
c57f7757
PH
2907 if fdict.get('format_note') is not None:
2908 res += fdict['format_note'] + ' '
2909 if fdict.get('tbr') is not None:
2910 res += '%4dk ' % fdict['tbr']
2911 if fdict.get('container') is not None:
2912 if res:
2913 res += ', '
2914 res += '%s container' % fdict['container']
3089bc74
S
2915 if (fdict.get('vcodec') is not None
2916 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2917 if res:
2918 res += ', '
2919 res += fdict['vcodec']
91c7271a 2920 if fdict.get('vbr') is not None:
c57f7757
PH
2921 res += '@'
2922 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2923 res += 'video@'
2924 if fdict.get('vbr') is not None:
2925 res += '%4dk' % fdict['vbr']
fbb21cf5 2926 if fdict.get('fps') is not None:
5d583bdf
S
2927 if res:
2928 res += ', '
2929 res += '%sfps' % fdict['fps']
c57f7757
PH
2930 if fdict.get('acodec') is not None:
2931 if res:
2932 res += ', '
2933 if fdict['acodec'] == 'none':
2934 res += 'video only'
2935 else:
2936 res += '%-5s' % fdict['acodec']
2937 elif fdict.get('abr') is not None:
2938 if res:
2939 res += ', '
2940 res += 'audio'
2941 if fdict.get('abr') is not None:
2942 res += '@%3dk' % fdict['abr']
2943 if fdict.get('asr') is not None:
2944 res += ' (%5dHz)' % fdict['asr']
2945 if fdict.get('filesize') is not None:
2946 if res:
2947 res += ', '
2948 res += format_bytes(fdict['filesize'])
9732d77e
PH
2949 elif fdict.get('filesize_approx') is not None:
2950 if res:
2951 res += ', '
2952 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2953 return res
91c7271a 2954
76d321f6 2955 def _format_note_table(self, f):
2956 def join_fields(*vargs):
2957 return ', '.join((val for val in vargs if val != ''))
2958
2959 return join_fields(
2960 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2961 format_field(f, 'language', '[%s]'),
2962 format_field(f, 'format_note'),
2963 format_field(f, 'container', ignore=(None, f.get('ext'))),
2964 format_field(f, 'asr', '%5dHz'))
2965
c57f7757 2966 def list_formats(self, info_dict):
94badb25 2967 formats = info_dict.get('formats', [info_dict])
53ed7066 2968 new_format = (
2969 'list-formats' not in self.params.get('compat_opts', [])
2970 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 2971 if new_format:
2972 table = [
2973 [
2974 format_field(f, 'format_id'),
2975 format_field(f, 'ext'),
2976 self.format_resolution(f),
2977 format_field(f, 'fps', '%d'),
2978 '|',
2979 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2980 format_field(f, 'tbr', '%4dk'),
52a8a1e1 2981 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 2982 '|',
2983 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2984 format_field(f, 'vbr', '%4dk'),
2985 format_field(f, 'acodec', default='unknown').replace('none', ''),
2986 format_field(f, 'abr', '%3dk'),
2987 format_field(f, 'asr', '%5dHz'),
2988 self._format_note_table(f)]
2989 for f in formats
2990 if f.get('preference') is None or f['preference'] >= -1000]
2991 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2992 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2993 else:
2994 table = [
2995 [
2996 format_field(f, 'format_id'),
2997 format_field(f, 'ext'),
2998 self.format_resolution(f),
2999 self._format_note(f)]
3000 for f in formats
3001 if f.get('preference') is None or f['preference'] >= -1000]
3002 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3003
cfb56d1a 3004 self.to_screen(
76d321f6 3005 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3006 header_line,
3007 table,
3008 delim=new_format,
3009 extraGap=(0 if new_format else 1),
3010 hideEmpty=new_format)))
cfb56d1a
PH
3011
3012 def list_thumbnails(self, info_dict):
3013 thumbnails = info_dict.get('thumbnails')
3014 if not thumbnails:
b7b72db9 3015 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3016 return
cfb56d1a
PH
3017
3018 self.to_screen(
3019 '[info] Thumbnails for %s:' % info_dict['id'])
3020 self.to_screen(render_table(
3021 ['ID', 'width', 'height', 'URL'],
3022 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3023
360e1ca5 3024 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3025 if not subtitles:
360e1ca5 3026 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3027 return
a504ced0 3028 self.to_screen(
edab9dbf 3029 'Available %s for %s:' % (name, video_id))
2412044c 3030
3031 def _row(lang, formats):
7aee40c1 3032 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
2412044c 3033 if len(set(names)) == 1:
7aee40c1 3034 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3035 return [lang, ', '.join(names), ', '.join(exts)]
3036
edab9dbf 3037 self.to_screen(render_table(
2412044c 3038 ['Language', 'Name', 'Formats'],
3039 [_row(lang, formats) for lang, formats in subtitles.items()],
3040 hideEmpty=True))
a504ced0 3041
dca08720
PH
3042 def urlopen(self, req):
3043 """ Start an HTTP download """
82d8a8b6 3044 if isinstance(req, compat_basestring):
67dda517 3045 req = sanitized_Request(req)
19a41fc6 3046 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3047
3048 def print_debug_header(self):
3049 if not self.params.get('verbose'):
3050 return
62fec3b2 3051
4192b51c 3052 if type('') is not compat_str:
067aa17e 3053 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3054 self.report_warning(
3055 'Your Python is broken! Update to a newer and supported version')
3056
c6afed48
PH
3057 stdout_encoding = getattr(
3058 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3059 encoding_str = (
734f90bb
PH
3060 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3061 locale.getpreferredencoding(),
3062 sys.getfilesystemencoding(),
c6afed48 3063 stdout_encoding,
b0472057 3064 self.get_encoding()))
4192b51c 3065 write_string(encoding_str, encoding=None)
734f90bb 3066
e5813e53 3067 source = (
3068 '(exe)' if hasattr(sys, 'frozen')
3069 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3070 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3071 else '')
3072 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3073 if _LAZY_LOADER:
f74980cb 3074 self._write_string('[debug] Lazy loading extractors enabled\n')
3075 if _PLUGIN_CLASSES:
3076 self._write_string(
3077 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3078 if self.params.get('compat_opts'):
3079 self._write_string(
3080 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3081 try:
3082 sp = subprocess.Popen(
3083 ['git', 'rev-parse', '--short', 'HEAD'],
3084 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3085 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3086 out, err = process_communicate_or_kill(sp)
dca08720
PH
3087 out = out.decode().strip()
3088 if re.match('[0-9a-f]+', out):
f74980cb 3089 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3090 except Exception:
dca08720
PH
3091 try:
3092 sys.exc_clear()
70a1165b 3093 except Exception:
dca08720 3094 pass
b300cda4
S
3095
3096 def python_implementation():
3097 impl_name = platform.python_implementation()
3098 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3099 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3100 return impl_name
3101
e5813e53 3102 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3103 platform.python_version(),
3104 python_implementation(),
3105 platform.architecture()[0],
b300cda4 3106 platform_name()))
d28b5171 3107
73fac4e9 3108 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3109 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3110 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3111 exe_str = ', '.join(
3112 '%s %s' % (exe, v)
3113 for exe, v in sorted(exe_versions.items())
3114 if v
3115 )
3116 if not exe_str:
3117 exe_str = 'none'
3118 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3119
3120 proxy_map = {}
3121 for handler in self._opener.handlers:
3122 if hasattr(handler, 'proxies'):
3123 proxy_map.update(handler.proxies)
734f90bb 3124 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3125
58b1f00d
PH
3126 if self.params.get('call_home', False):
3127 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3128 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3129 return
58b1f00d
PH
3130 latest_version = self.urlopen(
3131 'https://yt-dl.org/latest/version').read().decode('utf-8')
3132 if version_tuple(latest_version) > version_tuple(__version__):
3133 self.report_warning(
3134 'You are using an outdated version (newest version: %s)! '
3135 'See https://yt-dl.org/update if you need help updating.' %
3136 latest_version)
3137
e344693b 3138 def _setup_opener(self):
6ad14cab 3139 timeout_val = self.params.get('socket_timeout')
19a41fc6 3140 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3141
dca08720
PH
3142 opts_cookiefile = self.params.get('cookiefile')
3143 opts_proxy = self.params.get('proxy')
3144
3145 if opts_cookiefile is None:
3146 self.cookiejar = compat_cookiejar.CookieJar()
3147 else:
590bc6f6 3148 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3149 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3150 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3151 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3152
6a3f4c3f 3153 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3154 if opts_proxy is not None:
3155 if opts_proxy == '':
3156 proxies = {}
3157 else:
3158 proxies = {'http': opts_proxy, 'https': opts_proxy}
3159 else:
3160 proxies = compat_urllib_request.getproxies()
067aa17e 3161 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3162 if 'http' in proxies and 'https' not in proxies:
3163 proxies['https'] = proxies['http']
91410c9b 3164 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3165
3166 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3167 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3168 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3169 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3170 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3171
3172 # When passing our own FileHandler instance, build_opener won't add the
3173 # default FileHandler and allows us to disable the file protocol, which
3174 # can be used for malicious purposes (see
067aa17e 3175 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3176 file_handler = compat_urllib_request.FileHandler()
3177
3178 def file_open(*args, **kwargs):
7a5c1cfe 3179 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3180 file_handler.file_open = file_open
3181
3182 opener = compat_urllib_request.build_opener(
fca6dba8 3183 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3184
dca08720
PH
3185 # Delete the default user-agent header, which would otherwise apply in
3186 # cases where our custom HTTP handler doesn't come into play
067aa17e 3187 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3188 opener.addheaders = []
3189 self._opener = opener
62fec3b2
PH
3190
3191 def encode(self, s):
3192 if isinstance(s, bytes):
3193 return s # Already encoded
3194
3195 try:
3196 return s.encode(self.get_encoding())
3197 except UnicodeEncodeError as err:
3198 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3199 raise
3200
3201 def get_encoding(self):
3202 encoding = self.params.get('encoding')
3203 if encoding is None:
3204 encoding = preferredencoding()
3205 return encoding
ec82d85a 3206
de6000d9 3207 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3208 write_all = self.params.get('write_all_thumbnails', False)
3209 thumbnails = []
3210 if write_all or self.params.get('writethumbnail', False):
0202b52a 3211 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3212 multiple = write_all and len(thumbnails) > 1
ec82d85a 3213
0202b52a 3214 ret = []
6c4fd172 3215 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3216 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3217 suffix = '%s.' % t['id'] if multiple else ''
3218 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3219 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3220
0c3d0f51 3221 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3222 ret.append(suffix + thumb_ext)
ec82d85a
PH
3223 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3224 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3225 else:
5ef7d9bd 3226 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3227 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3228 try:
3229 uf = self.urlopen(t['url'])
d3d89c32 3230 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3231 shutil.copyfileobj(uf, thumbf)
de6000d9 3232 ret.append(suffix + thumb_ext)
ec82d85a
PH
3233 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3234 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3235 t['filepath'] = thumb_filename
3158150c 3236 except network_exceptions as err:
ec82d85a 3237 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3238 (t['url'], error_to_compat_str(err)))
6c4fd172 3239 if ret and not write_all:
3240 break
0202b52a 3241 return ret