]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[version] update
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
ce02ed60 51 determine_ext,
b5559424 52 determine_protocol,
732044af 53 DOT_DESKTOP_LINK_TEMPLATE,
54 DOT_URL_LINK_TEMPLATE,
55 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 56 DownloadError,
c0384f22 57 encode_compat_str,
ce02ed60 58 encodeFilename,
498f5606 59 EntryNotInPlaylist,
a06916d9 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
e29663c6 64 float_or_none,
02dbf93f 65 format_bytes,
76d321f6 66 format_field,
752cda38 67 STR_FORMAT_RE,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
56a8fb4f 73 LazyList,
ce02ed60 74 locked_file,
0202b52a 75 make_dir,
dca08720 76 make_HTTPS_handler,
ce02ed60 77 MaxDownloadsReached,
3158150c 78 network_exceptions,
cd6fc19e 79 orderedSet,
a06916d9 80 OUTTMPL_TYPES,
b7ab0590 81 PagedList,
083c9df9 82 parse_filesize,
91410c9b 83 PerRequestProxyHandler,
dca08720 84 platform_name,
eedb7ba5 85 PostProcessingError,
ce02ed60 86 preferredencoding,
eedb7ba5 87 prepend_extension,
a06916d9 88 process_communicate_or_kill,
e8e73840 89 random_uuidv4,
51fb4995 90 register_socks_protocols,
a06916d9 91 RejectedVideoReached,
cfb56d1a 92 render_table,
eedb7ba5 93 replace_extension,
ce02ed60
PH
94 SameFileError,
95 sanitize_filename,
1bb5c511 96 sanitize_path,
dcf77cf1 97 sanitize_url,
67dda517 98 sanitized_Request,
e5660ee6 99 std_headers,
1211bb6d 100 str_or_none,
e29663c6 101 strftime_or_none,
ce02ed60 102 subtitles_filename,
732044af 103 to_high_limit_path,
324ad820 104 traverse_obj,
ce02ed60 105 UnavailableVideoError,
29eb5174 106 url_basename,
58b1f00d 107 version_tuple,
ce02ed60
PH
108 write_json_file,
109 write_string,
1bab3437 110 YoutubeDLCookieJar,
6a3f4c3f 111 YoutubeDLCookieProcessor,
dca08720 112 YoutubeDLHandler,
fca6dba8 113 YoutubeDLRedirectHandler,
ce02ed60 114)
a0e07d31 115from .cache import Cache
52a8a1e1 116from .extractor import (
117 gen_extractor_classes,
118 get_info_extractor,
119 _LAZY_LOADER,
120 _PLUGIN_CLASSES
121)
4c54b89e 122from .extractor.openload import PhantomJSwrapper
52a8a1e1 123from .downloader import (
124 get_suitable_downloader,
125 shorten_protocol_name
126)
4c83c967 127from .downloader.rtmp import rtmpdump_version
4f026faf 128from .postprocessor import (
f17f8651 129 FFmpegFixupM3u8PP,
62cd676c 130 FFmpegFixupM4aPP,
6271f1ca 131 FFmpegFixupStretchedPP,
4f026faf
PH
132 FFmpegMergerPP,
133 FFmpegPostProcessor,
0202b52a 134 # FFmpegSubtitlesConvertorPP,
4f026faf 135 get_postprocessor,
0202b52a 136 MoveFilesAfterDownloadPP,
4f026faf 137)
dca08720 138from .version import __version__
8222d8de 139
e9c0cdd3
YCH
140if compat_os_name == 'nt':
141 import ctypes
142
2459b6e1 143
8222d8de
JMF
144class YoutubeDL(object):
145 """YoutubeDL class.
146
147 YoutubeDL objects are the ones responsible of downloading the
148 actual video file and writing it to disk if the user has requested
149 it, among some other tasks. In most cases there should be one per
150 program. As, given a video URL, the downloader doesn't know how to
151 extract all the needed information, task that InfoExtractors do, it
152 has to pass the URL to one of them.
153
154 For this, YoutubeDL objects have a method that allows
155 InfoExtractors to be registered in a given order. When it is passed
156 a URL, the YoutubeDL object handles it to the first InfoExtractor it
157 finds that reports being able to handle it. The InfoExtractor extracts
158 all the information about the video or videos the URL refers to, and
159 YoutubeDL process the extracted information, possibly using a File
160 Downloader to download the video.
161
162 YoutubeDL objects accept a lot of parameters. In order not to saturate
163 the object constructor with arguments, it receives a dictionary of
164 options instead. These options are available through the params
165 attribute for the InfoExtractors to use. The YoutubeDL also
166 registers itself as the downloader in charge for the InfoExtractors
167 that are added to it, so this is a "mutual registration".
168
169 Available options:
170
171 username: Username for authentication purposes.
172 password: Password for authentication purposes.
180940e0 173 videopassword: Password for accessing a video.
1da50aa3
S
174 ap_mso: Adobe Pass multiple-system operator identifier.
175 ap_username: Multiple-system operator account username.
176 ap_password: Multiple-system operator account password.
8222d8de
JMF
177 usenetrc: Use netrc for authentication instead.
178 verbose: Print additional info to stdout.
179 quiet: Do not print messages to stdout.
ad8915b7 180 no_warnings: Do not print out anything for warnings.
53c18592 181 forceprint: A list of templates to force print
182 forceurl: Force printing final URL. (Deprecated)
183 forcetitle: Force printing title. (Deprecated)
184 forceid: Force printing ID. (Deprecated)
185 forcethumbnail: Force printing thumbnail URL. (Deprecated)
186 forcedescription: Force printing description. (Deprecated)
187 forcefilename: Force printing final filename. (Deprecated)
188 forceduration: Force printing duration. (Deprecated)
8694c600 189 forcejson: Force printing info_dict as JSON.
63e0be34
PH
190 dump_single_json: Force printing the info_dict of the whole playlist
191 (or video) as a single JSON line.
c25228e5 192 force_write_download_archive: Force writing download archive regardless
193 of 'skip_download' or 'simulate'.
8222d8de 194 simulate: Do not download the video files.
eb8a4433 195 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 196 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 197 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
198 extracting metadata even if the video is not actually
199 available for download (experimental)
c25228e5 200 format_sort: How to sort the video formats. see "Sorting Formats"
201 for more details.
202 format_sort_force: Force the given format_sort. see "Sorting Formats"
203 for more details.
204 allow_multiple_video_streams: Allow multiple video streams to be merged
205 into a single file
206 allow_multiple_audio_streams: Allow multiple audio streams to be merged
207 into a single file
4524baf0 208 paths: Dictionary of output paths. The allowed keys are 'home'
209 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 210 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 211 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
212 A string a also accepted for backward compatibility
a820dc72
RA
213 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
214 restrictfilenames: Do not allow "&" and spaces in file names
215 trim_file_name: Limit length of filename (extension excluded)
4524baf0 216 windowsfilenames: Force the filenames to be windows compatible
a820dc72 217 ignoreerrors: Do not stop on download errors
7a5c1cfe 218 (Default True when running yt-dlp,
a820dc72 219 but False when directly accessing YoutubeDL class)
26e2805c 220 skip_playlist_after_errors: Number of allowed failures until the rest of
221 the playlist is skipped
d22dec74 222 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 223 overwrites: Overwrite all video and metadata files if True,
224 overwrite only non-video files if None
225 and don't overwrite any file if False
8222d8de
JMF
226 playliststart: Playlist item to start at.
227 playlistend: Playlist item to end at.
c14e88f0 228 playlist_items: Specific indices of playlist to download.
ff815fe6 229 playlistreverse: Download playlist items in reverse order.
75822ca7 230 playlistrandom: Download playlist items in random order.
8222d8de
JMF
231 matchtitle: Download only matching titles.
232 rejecttitle: Reject downloads for matching titles.
8bf9319e 233 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
234 logtostderr: Log messages to stderr instead of stdout.
235 writedescription: Write the video description to a .description file
236 writeinfojson: Write the video description to a .info.json file
75d43ca0 237 clean_infojson: Remove private fields from the infojson
06167fbb 238 writecomments: Extract video comments. This will not be written to disk
239 unless writeinfojson is also given
1fb07d10 240 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 241 writethumbnail: Write the thumbnail image to a file
c25228e5 242 allow_playlist_files: Whether to write playlists' description, infojson etc
243 also to disk when using the 'write*' options
ec82d85a 244 write_all_thumbnails: Write all thumbnail formats to files
732044af 245 writelink: Write an internet shortcut file, depending on the
246 current platform (.url/.webloc/.desktop)
247 writeurllink: Write a Windows internet shortcut file (.url)
248 writewebloclink: Write a macOS internet shortcut file (.webloc)
249 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 250 writesubtitles: Write the video subtitles to a file
741dd8ea 251 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 252 allsubtitles: Deprecated - Use subtitlelangs = ['all']
253 Downloads all the subtitles of the video
0b7f3118 254 (requires writesubtitles or writeautomaticsub)
8222d8de 255 listsubtitles: Lists all available subtitles for the video
a504ced0 256 subtitlesformat: The format code for subtitles
c32b0aab 257 subtitleslangs: List of languages of the subtitles to download (can be regex).
258 The list may contain "all" to refer to all the available
259 subtitles. The language can be prefixed with a "-" to
260 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
261 keepvideo: Keep the video file after post-processing
262 daterange: A DateRange object, download only if the upload_date is in the range.
263 skip_download: Skip the actual download of the video file
c35f9e72 264 cachedir: Location of the cache files in the filesystem.
a0e07d31 265 False to disable filesystem cache.
47192f92 266 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
267 age_limit: An integer representing the user's age in years.
268 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
269 min_views: An integer representing the minimum view count the video
270 must have in order to not be skipped.
271 Videos without view count information are always
272 downloaded. None for no limit.
273 max_views: An integer representing the maximum view count.
274 Videos that are more popular than that are not
275 downloaded.
276 Videos without view count information are always
277 downloaded. None for no limit.
278 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
279 Videos already present in the file are not downloaded
280 again.
8a51f564 281 break_on_existing: Stop the download process after attempting to download a
282 file that is in the archive.
283 break_on_reject: Stop the download process when encountering a video that
284 has been filtered out.
285 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 286 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
287 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
288 At the moment, this is only supported by YouTube.
a1ee09e8 289 proxy: URL of the proxy server to use
38cce791 290 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 291 on geo-restricted sites.
e344693b 292 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
293 bidi_workaround: Work around buggy terminals without bidirectional text
294 support, using fridibi
a0ddb8a2 295 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 296 include_ads: Download ads as well
04b4d394
PH
297 default_search: Prepend this string if an input url is not valid.
298 'auto' for elaborate guessing
62fec3b2 299 encoding: Use this encoding instead of the system-specified.
e8ee972c 300 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
301 Pass in 'in_playlist' to only show this behavior for
302 playlist items.
4f026faf 303 postprocessors: A list of dictionaries, each with an entry
71b640cc 304 * key: The name of the postprocessor. See
7a5c1cfe 305 yt_dlp/postprocessor/__init__.py for a list.
56d868db 306 * when: When to run the postprocessor. Can be one of
307 pre_process|before_dl|post_process|after_move.
308 Assumed to be 'post_process' if not given
ab8e5e51
AM
309 post_hooks: A list of functions that get called as the final step
310 for each video file, after all postprocessors have been
311 called. The filename will be passed as the only argument.
71b640cc
PH
312 progress_hooks: A list of functions that get called on download
313 progress, with a dictionary with the entries
5cda4eda 314 * status: One of "downloading", "error", or "finished".
ee69b99a 315 Check this first and ignore unknown values.
71b640cc 316
5cda4eda 317 If status is one of "downloading", or "finished", the
ee69b99a
PH
318 following properties may also be present:
319 * filename: The final filename (always present)
5cda4eda 320 * tmpfilename: The filename we're currently writing to
71b640cc
PH
321 * downloaded_bytes: Bytes on disk
322 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
323 * total_bytes_estimate: Guess of the eventual file size,
324 None if unavailable.
325 * elapsed: The number of seconds since download started.
71b640cc
PH
326 * eta: The estimated time in seconds, None if unknown
327 * speed: The download speed in bytes/second, None if
328 unknown
5cda4eda
PH
329 * fragment_index: The counter of the currently
330 downloaded video fragment.
331 * fragment_count: The number of fragments (= individual
332 files that will be merged)
71b640cc
PH
333
334 Progress hooks are guaranteed to be called at least once
335 (with status "finished") if the download is successful.
45598f15 336 merge_output_format: Extension to use when merging formats.
6b591b29 337 final_ext: Expected final extension; used to detect when the file was
338 already downloaded and converted. "merge_output_format" is
339 replaced by this extension when given
6271f1ca
PH
340 fixup: Automatically correct known faults of the file.
341 One of:
342 - "never": do nothing
343 - "warn": only emit a warning
344 - "detect_or_warn": check whether we can do anything
62cd676c 345 about it, warn otherwise (default)
504f20dd 346 source_address: Client-side IP address to bind to.
6ec6cb4e 347 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 348 yt-dlp servers for debugging. (BROKEN)
1cf376f5 349 sleep_interval_requests: Number of seconds to sleep between requests
350 during extraction
7aa589a5
S
351 sleep_interval: Number of seconds to sleep before each download when
352 used alone or a lower bound of a range for randomized
353 sleep before each download (minimum possible number
354 of seconds to sleep) when used along with
355 max_sleep_interval.
356 max_sleep_interval:Upper bound of a range for randomized sleep before each
357 download (maximum possible number of seconds to sleep).
358 Must only be used along with sleep_interval.
359 Actual sleep time will be a random float from range
360 [sleep_interval; max_sleep_interval].
1cf376f5 361 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
362 listformats: Print an overview of available video formats and exit.
363 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
364 match_filter: A function that gets called with the info_dict of
365 every video.
366 If it returns a message, the video is ignored.
367 If it returns None, the video is downloaded.
368 match_filter_func in utils.py is one example for this.
7e5db8c9 369 no_color: Do not emit color codes in output.
0a840f58 370 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 371 HTTP header
0a840f58 372 geo_bypass_country:
773f291d
S
373 Two-letter ISO 3166-2 country code that will be used for
374 explicit geographic restriction bypassing via faking
504f20dd 375 X-Forwarded-For HTTP header
5f95927a
S
376 geo_bypass_ip_block:
377 IP range in CIDR notation that will be used similarly to
504f20dd 378 geo_bypass_country
71b640cc 379
85729c51 380 The following options determine which downloader is picked:
52a8a1e1 381 external_downloader: A dictionary of protocol keys and the executable of the
382 external downloader to use for it. The allowed protocols
383 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
384 Set the value to 'native' to use the native downloader
385 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
386 or {'m3u8': 'ffmpeg'} instead.
387 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
388 if True, otherwise use ffmpeg/avconv if False, otherwise
389 use downloader suggested by extractor if None.
53ed7066 390 compat_opts: Compatibility options. See "Differences in default behavior".
18e674b4 391 Note that only format-sort, format-spec, no-live-chat,
392 no-attach-info-json, playlist-index, list-formats,
393 no-direct-merge, no-youtube-channel-redirect,
53ed7066 394 and no-youtube-unavailable-videos works when used via the API
fe7e0c98 395
8222d8de 396 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 397 the downloader (see yt_dlp/downloader/common.py):
8222d8de 398 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 399 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 400 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 401 http_chunk_size.
76b1bd67
JMF
402
403 The following options are used by the post processors:
d4a24f40 404 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 405 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
406 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
407 to the binary or its containing directory.
43820c03 408 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
409 and a list of additional command-line arguments for the
410 postprocessor/executable. The dict can also have "PP+EXE" keys
411 which are used when the given exe is used by the given PP.
412 Use 'default' as the name for arguments to passed to all PP
e409895f 413
414 The following options are used by the extractors:
62bff2c1 415 extractor_retries: Number of times to retry for known errors
416 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 417 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 418 discontinuities such as ad breaks (default: False)
3600fd59 419 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 420 data will be downloaded and processed by extractor.
421 You can reduce network I/O by disabling it if you don't
422 care about DASH. (only for youtube)
e409895f 423 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about HLS. (only for youtube)
8222d8de
JMF
427 """
428
c9969434
S
429 _NUMERIC_FIELDS = set((
430 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
431 'timestamp', 'upload_year', 'upload_month', 'upload_day',
432 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
433 'average_rating', 'comment_count', 'age_limit',
434 'start_time', 'end_time',
435 'chapter_number', 'season_number', 'episode_number',
436 'track_number', 'disc_number', 'release_year',
437 'playlist_index',
438 ))
439
8222d8de
JMF
440 params = None
441 _ies = []
56d868db 442 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 443 __prepare_filename_warned = False
1cf376f5 444 _first_webpage_request = True
8222d8de
JMF
445 _download_retcode = None
446 _num_downloads = None
30a074c2 447 _playlist_level = 0
448 _playlist_urls = set()
8222d8de
JMF
449 _screen_file = None
450
3511266b 451 def __init__(self, params=None, auto_init=True):
8222d8de 452 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
453 if params is None:
454 params = {}
8222d8de 455 self._ies = []
56c73665 456 self._ies_instances = {}
56d868db 457 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 458 self.__prepare_filename_warned = False
1cf376f5 459 self._first_webpage_request = True
ab8e5e51 460 self._post_hooks = []
933605d7 461 self._progress_hooks = []
8222d8de
JMF
462 self._download_retcode = 0
463 self._num_downloads = 0
464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 465 self._err_file = sys.stderr
4abf617b
S
466 self.params = {
467 # Default parameters
468 'nocheckcertificate': False,
469 }
470 self.params.update(params)
a0e07d31 471 self.cache = Cache(self)
34308b30 472
a61f4b28 473 if sys.version_info < (3, 6):
474 self.report_warning(
b5c5d84f 475 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
476 'Update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 477
be5df5ee
S
478 def check_deprecated(param, option, suggestion):
479 if self.params.get(param) is not None:
53ed7066 480 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
481 return True
482 return False
483
484 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
485 if self.params.get('geo_verification_proxy') is None:
486 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
487
0d1bb027 488 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
489 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 490 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 491
492 for msg in self.params.get('warnings', []):
493 self.report_warning(msg)
494
6b591b29 495 if self.params.get('final_ext'):
496 if self.params.get('merge_output_format'):
497 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
498 self.params['merge_output_format'] = self.params['final_ext']
499
b9d973be 500 if 'overwrites' in self.params and self.params['overwrites'] is None:
501 del self.params['overwrites']
502
0783b09b 503 if params.get('bidi_workaround', False):
1c088fa8
PH
504 try:
505 import pty
506 master, slave = pty.openpty()
003c69a8 507 width = compat_get_terminal_size().columns
1c088fa8
PH
508 if width is None:
509 width_args = []
510 else:
511 width_args = ['-w', str(width)]
5d681e96 512 sp_kwargs = dict(
1c088fa8
PH
513 stdin=subprocess.PIPE,
514 stdout=slave,
515 stderr=self._err_file)
5d681e96
PH
516 try:
517 self._output_process = subprocess.Popen(
518 ['bidiv'] + width_args, **sp_kwargs
519 )
520 except OSError:
5d681e96
PH
521 self._output_process = subprocess.Popen(
522 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
523 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 524 except OSError as ose:
66e7ace1 525 if ose.errno == errno.ENOENT:
6febd1c1 526 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
527 else:
528 raise
0783b09b 529
3089bc74
S
530 if (sys.platform != 'win32'
531 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
532 and not params.get('restrictfilenames', False)):
e9137224 533 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 534 self.report_warning(
6febd1c1 535 'Assuming --restrict-filenames since file system encoding '
1b725173 536 'cannot encode all characters. '
6febd1c1 537 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 538 self.params['restrictfilenames'] = True
34308b30 539
de6000d9 540 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 541
dca08720
PH
542 self._setup_opener()
543
4cd0a709 544 """Preload the archive, if any is specified"""
545 def preload_download_archive(fn):
546 if fn is None:
547 return False
0760b0a7 548 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 549 try:
550 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
551 for line in archive_file:
552 self.archive.add(line.strip())
553 except IOError as ioe:
554 if ioe.errno != errno.ENOENT:
555 raise
556 return False
557 return True
558
559 self.archive = set()
560 preload_download_archive(self.params.get('download_archive'))
561
3511266b
PH
562 if auto_init:
563 self.print_debug_header()
564 self.add_default_info_extractors()
565
4f026faf
PH
566 for pp_def_raw in self.params.get('postprocessors', []):
567 pp_class = get_postprocessor(pp_def_raw['key'])
568 pp_def = dict(pp_def_raw)
569 del pp_def['key']
5bfa4862 570 if 'when' in pp_def:
571 when = pp_def['when']
572 del pp_def['when']
573 else:
56d868db 574 when = 'post_process'
4f026faf 575 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 576 self.add_post_processor(pp, when=when)
4f026faf 577
ab8e5e51
AM
578 for ph in self.params.get('post_hooks', []):
579 self.add_post_hook(ph)
580
71b640cc
PH
581 for ph in self.params.get('progress_hooks', []):
582 self.add_progress_hook(ph)
583
51fb4995
YCH
584 register_socks_protocols()
585
7d4111ed
PH
586 def warn_if_short_id(self, argv):
587 # short YouTube ID starting with dash?
588 idxs = [
589 i for i, a in enumerate(argv)
590 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
591 if idxs:
592 correct_argv = (
7a5c1cfe 593 ['yt-dlp']
3089bc74
S
594 + [a for i, a in enumerate(argv) if i not in idxs]
595 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
596 )
597 self.report_warning(
598 'Long argument string detected. '
599 'Use -- to separate parameters and URLs, like this:\n%s\n' %
600 args_to_str(correct_argv))
601
8222d8de
JMF
602 def add_info_extractor(self, ie):
603 """Add an InfoExtractor object to the end of the list."""
604 self._ies.append(ie)
e52d7f85
JMF
605 if not isinstance(ie, type):
606 self._ies_instances[ie.ie_key()] = ie
607 ie.set_downloader(self)
8222d8de 608
56c73665
JMF
609 def get_info_extractor(self, ie_key):
610 """
611 Get an instance of an IE with name ie_key, it will try to get one from
612 the _ies list, if there's no instance it will create a new one and add
613 it to the extractor list.
614 """
615 ie = self._ies_instances.get(ie_key)
616 if ie is None:
617 ie = get_info_extractor(ie_key)()
618 self.add_info_extractor(ie)
619 return ie
620
023fa8c4
JMF
621 def add_default_info_extractors(self):
622 """
623 Add the InfoExtractors returned by gen_extractors to the end of the list
624 """
e52d7f85 625 for ie in gen_extractor_classes():
023fa8c4
JMF
626 self.add_info_extractor(ie)
627
56d868db 628 def add_post_processor(self, pp, when='post_process'):
8222d8de 629 """Add a PostProcessor object to the end of the chain."""
5bfa4862 630 self._pps[when].append(pp)
8222d8de
JMF
631 pp.set_downloader(self)
632
ab8e5e51
AM
633 def add_post_hook(self, ph):
634 """Add the post hook"""
635 self._post_hooks.append(ph)
636
933605d7
JMF
637 def add_progress_hook(self, ph):
638 """Add the progress hook (currently only for the file downloader)"""
639 self._progress_hooks.append(ph)
8ab470f1 640
1c088fa8 641 def _bidi_workaround(self, message):
5d681e96 642 if not hasattr(self, '_output_channel'):
1c088fa8
PH
643 return message
644
5d681e96 645 assert hasattr(self, '_output_process')
11b85ce6 646 assert isinstance(message, compat_str)
6febd1c1
PH
647 line_count = message.count('\n') + 1
648 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 649 self._output_process.stdin.flush()
6febd1c1 650 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 651 for _ in range(line_count))
6febd1c1 652 return res[:-len('\n')]
1c088fa8 653
734f90bb 654 def _write_string(self, s, out=None):
b58ddb32 655 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 656
848887eb 657 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 658 """Print message to stdout"""
8bf9319e 659 if self.params.get('logger'):
43afe285 660 self.params['logger'].debug(message)
835a1478 661 elif not quiet or self.params.get('verbose'):
662 self._write_string(
663 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
664 self._err_file if quiet else self._screen_file)
8222d8de
JMF
665
666 def to_stderr(self, message):
0760b0a7 667 """Print message to stderr"""
11b85ce6 668 assert isinstance(message, compat_str)
8bf9319e 669 if self.params.get('logger'):
43afe285
IB
670 self.params['logger'].error(message)
671 else:
835a1478 672 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
8222d8de 673
1e5b9a95
PH
674 def to_console_title(self, message):
675 if not self.params.get('consoletitle', False):
676 return
4bede0d8
C
677 if compat_os_name == 'nt':
678 if ctypes.windll.kernel32.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 682 elif 'TERM' in os.environ:
b46696bd 683 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 684
bdde425c
PH
685 def save_console_title(self):
686 if not self.params.get('consoletitle', False):
687 return
94c3442e
S
688 if self.params.get('simulate', False):
689 return
4bede0d8 690 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 691 # Save the title on stack
734f90bb 692 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
693
694 def restore_console_title(self):
695 if not self.params.get('consoletitle', False):
696 return
94c3442e
S
697 if self.params.get('simulate', False):
698 return
4bede0d8 699 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 700 # Restore the title from stack
734f90bb 701 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
702
703 def __enter__(self):
704 self.save_console_title()
705 return self
706
707 def __exit__(self, *args):
708 self.restore_console_title()
f89197d7 709
dca08720 710 if self.params.get('cookiefile') is not None:
1bab3437 711 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 712
8222d8de
JMF
713 def trouble(self, message=None, tb=None):
714 """Determine action to take when a download problem appears.
715
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
719
720 tb, if given, is additional traceback information.
721 """
722 if message is not None:
723 self.to_stderr(message)
724 if self.params.get('verbose'):
725 if tb is None:
726 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 727 tb = ''
8222d8de 728 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 729 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 730 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
731 else:
732 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 733 tb = ''.join(tb_data)
c19bc311 734 if tb:
735 self.to_stderr(tb)
8222d8de
JMF
736 if not self.params.get('ignoreerrors', False):
737 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
738 exc_info = sys.exc_info()[1].exc_info
739 else:
740 exc_info = sys.exc_info()
741 raise DownloadError(message, exc_info)
742 self._download_retcode = 1
743
0760b0a7 744 def to_screen(self, message, skip_eol=False):
745 """Print message to stdout if not in quiet mode"""
746 self.to_stdout(
747 message, skip_eol, quiet=self.params.get('quiet', False))
748
8222d8de
JMF
749 def report_warning(self, message):
750 '''
751 Print the message to stderr, it will be prefixed with 'WARNING:'
752 If stderr is a tty file the 'WARNING:' will be colored
753 '''
6d07ce01
JMF
754 if self.params.get('logger') is not None:
755 self.params['logger'].warning(message)
8222d8de 756 else:
ad8915b7
PH
757 if self.params.get('no_warnings'):
758 return
e9c0cdd3 759 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
760 _msg_header = '\033[0;33mWARNING:\033[0m'
761 else:
762 _msg_header = 'WARNING:'
763 warning_message = '%s %s' % (_msg_header, message)
764 self.to_stderr(warning_message)
8222d8de
JMF
765
766 def report_error(self, message, tb=None):
767 '''
768 Do the same as trouble, but prefixes the message with 'ERROR:', colored
769 in red if stderr is a tty file.
770 '''
e9c0cdd3 771 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 772 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 773 else:
6febd1c1
PH
774 _msg_header = 'ERROR:'
775 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
776 self.trouble(error_message, tb)
777
0760b0a7 778 def write_debug(self, message):
779 '''Log debug message or Print message to stderr'''
780 if not self.params.get('verbose', False):
781 return
782 message = '[debug] %s' % message
783 if self.params.get('logger'):
784 self.params['logger'].debug(message)
785 else:
786 self._write_string('%s\n' % message)
787
8222d8de
JMF
788 def report_file_already_downloaded(self, file_name):
789 """Report file has already been fully downloaded."""
790 try:
6febd1c1 791 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 792 except UnicodeEncodeError:
6febd1c1 793 self.to_screen('[download] The file has already been downloaded')
8222d8de 794
0c3d0f51 795 def report_file_delete(self, file_name):
796 """Report that existing file will be deleted."""
797 try:
c25228e5 798 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 799 except UnicodeEncodeError:
c25228e5 800 self.to_screen('Deleting existing file')
0c3d0f51 801
de6000d9 802 def parse_outtmpl(self):
803 outtmpl_dict = self.params.get('outtmpl', {})
804 if not isinstance(outtmpl_dict, dict):
805 outtmpl_dict = {'default': outtmpl_dict}
806 outtmpl_dict.update({
807 k: v for k, v in DEFAULT_OUTTMPL.items()
808 if not outtmpl_dict.get(k)})
809 for key, val in outtmpl_dict.items():
810 if isinstance(val, bytes):
811 self.report_warning(
812 'Parameter outtmpl is bytes, but should be a unicode string. '
813 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
814 return outtmpl_dict
815
76a264ac 816 @staticmethod
817 def validate_outtmpl(tmpl):
818 ''' @return None or Exception object '''
819 try:
820 re.sub(
821 STR_FORMAT_RE.format(''),
822 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
823 tmpl
824 ) % collections.defaultdict(int)
825 return None
826 except ValueError as err:
827 return err
828
143db31d 829 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
830 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
752cda38 831 info_dict = dict(info_dict)
a439a3a4 832 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 833
752cda38 834 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 835 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 836 if info_dict.get('duration', None) is not None
837 else None)
752cda38 838 info_dict['epoch'] = int(time.time())
839 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
840 if info_dict.get('resolution') is None:
841 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 842
143db31d 843 # For fields playlist_index and autonumber convert all occurrences
844 # of %(field)s to %(field)0Nd for backward compatibility
845 field_size_compat_map = {
752cda38 846 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
847 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 848 }
752cda38 849
385a27fa 850 TMPL_DICT = {}
851 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
852 MATH_FUNCTIONS = {
853 '+': float.__add__,
854 '-': float.__sub__,
855 }
e625be0d 856 # Field is of the form key1.key2...
857 # where keys (except first) can be string, int or slice
385a27fa 858 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
859 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
860 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 861 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
862 (?P<negate>-)?
385a27fa 863 (?P<fields>{field})
864 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 865 (?:>(?P<strf_format>.+?))?
866 (?:\|(?P<default>.*?))?
385a27fa 867 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 868
76a264ac 869 get_key = lambda k: traverse_obj(
870 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
871
752cda38 872 def get_value(mdict):
873 # Object traversal
76a264ac 874 value = get_key(mdict['fields'])
752cda38 875 # Negative
876 if mdict['negate']:
877 value = float_or_none(value)
878 if value is not None:
879 value *= -1
880 # Do maths
385a27fa 881 offset_key = mdict['maths']
882 if offset_key:
752cda38 883 value = float_or_none(value)
884 operator = None
385a27fa 885 while offset_key:
886 item = re.match(
887 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
888 offset_key).group(0)
889 offset_key = offset_key[len(item):]
890 if operator is None:
752cda38 891 operator = MATH_FUNCTIONS[item]
385a27fa 892 continue
893 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
894 offset = float_or_none(item)
895 if offset is None:
896 offset = float_or_none(get_key(item))
897 try:
898 value = operator(value, multiplier * offset)
899 except (TypeError, ZeroDivisionError):
900 return None
901 operator = None
752cda38 902 # Datetime formatting
903 if mdict['strf_format']:
904 value = strftime_or_none(value, mdict['strf_format'])
905
906 return value
907
908 def create_key(outer_mobj):
909 if not outer_mobj.group('has_key'):
910 return '%{}'.format(outer_mobj.group(0))
911
912 key = outer_mobj.group('key')
913 fmt = outer_mobj.group('format')
914 mobj = re.match(INTERNAL_FORMAT_RE, key)
915 if mobj is None:
916 value, default = None, na
917 else:
e625be0d 918 mobj = mobj.groupdict()
752cda38 919 default = mobj['default'] if mobj['default'] is not None else na
920 value = get_value(mobj)
921
922 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
923 fmt = '0{:d}d'.format(field_size_compat_map[key])
924
925 value = default if value is None else value
926 key += '\0%s' % fmt
927
76a264ac 928 if fmt == 'c':
929 value = compat_str(value)
930 if value is None:
931 value, fmt = default, 's'
932 else:
933 value = value[0]
934 elif fmt[-1] not in 'rs': # numeric
a439a3a4 935 value = float_or_none(value)
752cda38 936 if value is None:
937 value, fmt = default, 's'
938 if sanitize:
939 if fmt[-1] == 'r':
940 # If value is an object, sanitize might convert it to a string
941 # So we convert it to repr first
942 value, fmt = repr(value), '%ss' % fmt[:-1]
639f1cea 943 if fmt[-1] in 'csr':
944 value = sanitize(key, value)
385a27fa 945 TMPL_DICT[key] = value
752cda38 946 return '%({key}){fmt}'.format(key=key, fmt=fmt)
947
385a27fa 948 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 949
de6000d9 950 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 951 try:
586a91b6 952 sanitize = lambda k, v: sanitize_filename(
45598aab 953 compat_str(v),
1bb5c511 954 restricted=self.params.get('restrictfilenames'),
40df485f 955 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 956 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 957 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 958
15da37c7
S
959 # expand_path translates '%%' into '%' and '$$' into '$'
960 # correspondingly that is not what we want since we need to keep
961 # '%%' intact for template dict substitution step. Working around
962 # with boundary-alike separator hack.
961ea474 963 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
964 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
965
966 # outtmpl should be expand_path'ed before template dict substitution
967 # because meta fields may contain env variables we don't want to
968 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
969 # title "Hello $PATH", we don't want `$PATH` to be expanded.
970 filename = expand_path(outtmpl).replace(sep, '') % template_dict
971
143db31d 972 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 973 if force_ext is not None:
752cda38 974 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 975
bdc3fd2f
U
976 # https://github.com/blackjack4494/youtube-dlc/issues/85
977 trim_file_name = self.params.get('trim_file_name', False)
978 if trim_file_name:
979 fn_groups = filename.rsplit('.')
980 ext = fn_groups[-1]
981 sub_ext = ''
982 if len(fn_groups) > 2:
983 sub_ext = fn_groups[-2]
984 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
985
0202b52a 986 return filename
8222d8de 987 except ValueError as err:
6febd1c1 988 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
989 return None
990
de6000d9 991 def prepare_filename(self, info_dict, dir_type='', warn=False):
992 """Generate the output filename."""
0202b52a 993 paths = self.params.get('paths', {})
994 assert isinstance(paths, dict)
de6000d9 995 filename = self._prepare_filename(info_dict, dir_type or 'default')
996
997 if warn and not self.__prepare_filename_warned:
998 if not paths:
999 pass
1000 elif filename == '-':
1001 self.report_warning('--paths is ignored when an outputting to stdout')
1002 elif os.path.isabs(filename):
1003 self.report_warning('--paths is ignored since an absolute path is given in output template')
1004 self.__prepare_filename_warned = True
1005 if filename == '-' or not filename:
1006 return filename
1007
0202b52a 1008 homepath = expand_path(paths.get('home', '').strip())
1009 assert isinstance(homepath, compat_str)
1010 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1011 assert isinstance(subdir, compat_str)
c2934512 1012 path = os.path.join(homepath, subdir, filename)
1013
1014 # Temporary fix for #4787
1015 # 'Treat' all problem characters by passing filename through preferredencoding
1016 # to workaround encoding issues with subprocess on python2 @ Windows
1017 if sys.version_info < (3, 0) and sys.platform == 'win32':
1018 path = encodeFilename(path, True).decode(preferredencoding())
1019 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 1020
120fe513 1021 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1022 """ Returns None if the file should be downloaded """
8222d8de 1023
c77495e3 1024 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1025
8b0d7497 1026 def check_filter():
8b0d7497 1027 if 'title' in info_dict:
1028 # This can happen when we're just evaluating the playlist
1029 title = info_dict['title']
1030 matchtitle = self.params.get('matchtitle', False)
1031 if matchtitle:
1032 if not re.search(matchtitle, title, re.IGNORECASE):
1033 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1034 rejecttitle = self.params.get('rejecttitle', False)
1035 if rejecttitle:
1036 if re.search(rejecttitle, title, re.IGNORECASE):
1037 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1038 date = info_dict.get('upload_date')
1039 if date is not None:
1040 dateRange = self.params.get('daterange', DateRange())
1041 if date not in dateRange:
1042 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1043 view_count = info_dict.get('view_count')
1044 if view_count is not None:
1045 min_views = self.params.get('min_views')
1046 if min_views is not None and view_count < min_views:
1047 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1048 max_views = self.params.get('max_views')
1049 if max_views is not None and view_count > max_views:
1050 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1051 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1052 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1053
1054 if not incomplete:
1055 match_filter = self.params.get('match_filter')
1056 if match_filter is not None:
1057 ret = match_filter(info_dict)
1058 if ret is not None:
1059 return ret
1060 return None
1061
c77495e3 1062 if self.in_download_archive(info_dict):
1063 reason = '%s has already been recorded in the archive' % video_title
1064 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1065 else:
1066 reason = check_filter()
1067 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1068 if reason is not None:
120fe513 1069 if not silent:
1070 self.to_screen('[download] ' + reason)
c77495e3 1071 if self.params.get(break_opt, False):
1072 raise break_err()
8b0d7497 1073 return reason
fe7e0c98 1074
b6c45014
JMF
1075 @staticmethod
1076 def add_extra_info(info_dict, extra_info):
1077 '''Set the keys from extra_info in info dict if they are missing'''
1078 for key, value in extra_info.items():
1079 info_dict.setdefault(key, value)
1080
58f197b7 1081 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1082 process=True, force_generic_extractor=False):
41d1cca3 1083 """
1084 Return a list with a dictionary for each video extracted.
1085
1086 Arguments:
1087 url -- URL to extract
1088
1089 Keyword arguments:
1090 download -- whether to download videos during extraction
1091 ie_key -- extractor key hint
1092 extra_info -- dictionary containing the extra values to add to each result
1093 process -- whether to resolve all unresolved references (URLs, playlist items),
1094 must be True for download to work.
1095 force_generic_extractor -- force using the generic extractor
1096 """
fe7e0c98 1097
61aa5ba3 1098 if not ie_key and force_generic_extractor:
d22dec74
S
1099 ie_key = 'Generic'
1100
8222d8de 1101 if ie_key:
56c73665 1102 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1103 else:
1104 ies = self._ies
1105
1106 for ie in ies:
1107 if not ie.suitable(url):
1108 continue
1109
9a68de12 1110 ie_key = ie.ie_key()
1111 ie = self.get_info_extractor(ie_key)
8222d8de 1112 if not ie.working():
6febd1c1
PH
1113 self.report_warning('The program functionality for this site has been marked as broken, '
1114 'and will probably not work.')
8222d8de
JMF
1115
1116 try:
d0757229 1117 temp_id = str_or_none(
63be1aab 1118 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1119 else ie._match_id(url))
a0566bbf 1120 except (AssertionError, IndexError, AttributeError):
1121 temp_id = None
1122 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1123 self.to_screen("[%s] %s: has already been recorded in archive" % (
1124 ie_key, temp_id))
1125 break
58f197b7 1126 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1127 else:
1128 self.report_error('no suitable InfoExtractor for URL %s' % url)
1129
1130 def __handle_extraction_exceptions(func):
1131 def wrapper(self, *args, **kwargs):
1132 try:
1133 return func(self, *args, **kwargs)
773f291d
S
1134 except GeoRestrictedError as e:
1135 msg = e.msg
1136 if e.countries:
1137 msg += '\nThis video is available in %s.' % ', '.join(
1138 map(ISO3166Utils.short2full, e.countries))
1139 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1140 self.report_error(msg)
fb043a6e 1141 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1142 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1143 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1144 raise
8222d8de
JMF
1145 except Exception as e:
1146 if self.params.get('ignoreerrors', False):
9b9c5355 1147 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1148 else:
1149 raise
a0566bbf 1150 return wrapper
1151
1152 @__handle_extraction_exceptions
58f197b7 1153 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1154 ie_result = ie.extract(url)
1155 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1156 return
1157 if isinstance(ie_result, list):
1158 # Backwards compatibility: old IE result format
1159 ie_result = {
1160 '_type': 'compat_list',
1161 'entries': ie_result,
1162 }
a0566bbf 1163 self.add_default_extra_info(ie_result, ie, url)
1164 if process:
1165 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1166 else:
a0566bbf 1167 return ie_result
fe7e0c98 1168
ea38e55f
PH
1169 def add_default_extra_info(self, ie_result, ie, url):
1170 self.add_extra_info(ie_result, {
1171 'extractor': ie.IE_NAME,
1172 'webpage_url': url,
bd99f6e6 1173 'original_url': url,
ea38e55f
PH
1174 'webpage_url_basename': url_basename(url),
1175 'extractor_key': ie.ie_key(),
1176 })
1177
8222d8de
JMF
1178 def process_ie_result(self, ie_result, download=True, extra_info={}):
1179 """
1180 Take the result of the ie(may be modified) and resolve all unresolved
1181 references (URLs, playlist items).
1182
1183 It will also download the videos if 'download'.
1184 Returns the resolved ie_result.
1185 """
e8ee972c
PH
1186 result_type = ie_result.get('_type', 'video')
1187
057a5206 1188 if result_type in ('url', 'url_transparent'):
134c6ea8 1189 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1190 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1191 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1192 or extract_flat is True):
ecb54191 1193 info_copy = ie_result.copy()
1194 self.add_extra_info(info_copy, extra_info)
1195 self.add_default_extra_info(
1196 info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1197 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1198 return ie_result
1199
8222d8de 1200 if result_type == 'video':
b6c45014 1201 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1202 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1203 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1204 if additional_urls:
1205 # TODO: Improve MetadataFromFieldPP to allow setting a list
1206 if isinstance(additional_urls, compat_str):
1207 additional_urls = [additional_urls]
1208 self.to_screen(
1209 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1210 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1211 ie_result['additional_entries'] = [
1212 self.extract_info(
1213 url, download, extra_info,
1214 force_generic_extractor=self.params.get('force_generic_extractor'))
1215 for url in additional_urls
1216 ]
1217 return ie_result
8222d8de
JMF
1218 elif result_type == 'url':
1219 # We have to add extra_info to the results because it may be
1220 # contained in a playlist
07cce701 1221 return self.extract_info(
1222 ie_result['url'], download,
1223 ie_key=ie_result.get('ie_key'),
1224 extra_info=extra_info)
7fc3fa05
PH
1225 elif result_type == 'url_transparent':
1226 # Use the information from the embedding page
1227 info = self.extract_info(
1228 ie_result['url'], ie_key=ie_result.get('ie_key'),
1229 extra_info=extra_info, download=False, process=False)
1230
1640eb09
S
1231 # extract_info may return None when ignoreerrors is enabled and
1232 # extraction failed with an error, don't crash and return early
1233 # in this case
1234 if not info:
1235 return info
1236
412c617d
PH
1237 force_properties = dict(
1238 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1239 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1240 if f in force_properties:
1241 del force_properties[f]
1242 new_result = info.copy()
1243 new_result.update(force_properties)
7fc3fa05 1244
0563f7ac
S
1245 # Extracted info may not be a video result (i.e.
1246 # info.get('_type', 'video') != video) but rather an url or
1247 # url_transparent. In such cases outer metadata (from ie_result)
1248 # should be propagated to inner one (info). For this to happen
1249 # _type of info should be overridden with url_transparent. This
067aa17e 1250 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1251 if new_result.get('_type') == 'url':
1252 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1253
1254 return self.process_ie_result(
1255 new_result, download=download, extra_info=extra_info)
40fcba5e 1256 elif result_type in ('playlist', 'multi_video'):
30a074c2 1257 # Protect from infinite recursion due to recursively nested playlists
1258 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1259 webpage_url = ie_result['webpage_url']
1260 if webpage_url in self._playlist_urls:
7e85e872 1261 self.to_screen(
30a074c2 1262 '[download] Skipping already downloaded playlist: %s'
1263 % ie_result.get('title') or ie_result.get('id'))
1264 return
7e85e872 1265
30a074c2 1266 self._playlist_level += 1
1267 self._playlist_urls.add(webpage_url)
bc516a3f 1268 self._sanitize_thumbnails(ie_result)
30a074c2 1269 try:
1270 return self.__process_playlist(ie_result, download)
1271 finally:
1272 self._playlist_level -= 1
1273 if not self._playlist_level:
1274 self._playlist_urls.clear()
8222d8de 1275 elif result_type == 'compat_list':
c9bf4114
PH
1276 self.report_warning(
1277 'Extractor %s returned a compat_list result. '
1278 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1279
8222d8de 1280 def _fixup(r):
9e1a5b84
JW
1281 self.add_extra_info(
1282 r,
9103bbc5
JMF
1283 {
1284 'extractor': ie_result['extractor'],
1285 'webpage_url': ie_result['webpage_url'],
29eb5174 1286 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1287 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1288 }
1289 )
8222d8de
JMF
1290 return r
1291 ie_result['entries'] = [
b6c45014 1292 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1293 for r in ie_result['entries']
1294 ]
1295 return ie_result
1296 else:
1297 raise Exception('Invalid result type: %s' % result_type)
1298
e92caff5 1299 def _ensure_dir_exists(self, path):
1300 return make_dir(path, self.report_error)
1301
30a074c2 1302 def __process_playlist(self, ie_result, download):
1303 # We process each entry in the playlist
1304 playlist = ie_result.get('title') or ie_result.get('id')
1305 self.to_screen('[download] Downloading playlist: %s' % playlist)
1306
498f5606 1307 if 'entries' not in ie_result:
1308 raise EntryNotInPlaylist()
1309 incomplete_entries = bool(ie_result.get('requested_entries'))
1310 if incomplete_entries:
1311 def fill_missing_entries(entries, indexes):
1312 ret = [None] * max(*indexes)
1313 for i, entry in zip(indexes, entries):
1314 ret[i - 1] = entry
1315 return ret
1316 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1317
30a074c2 1318 playlist_results = []
1319
56a8fb4f 1320 playliststart = self.params.get('playliststart', 1)
30a074c2 1321 playlistend = self.params.get('playlistend')
1322 # For backwards compatibility, interpret -1 as whole list
1323 if playlistend == -1:
1324 playlistend = None
1325
1326 playlistitems_str = self.params.get('playlist_items')
1327 playlistitems = None
1328 if playlistitems_str is not None:
1329 def iter_playlistitems(format):
1330 for string_segment in format.split(','):
1331 if '-' in string_segment:
1332 start, end = string_segment.split('-')
1333 for item in range(int(start), int(end) + 1):
1334 yield int(item)
1335 else:
1336 yield int(string_segment)
1337 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1338
1339 ie_entries = ie_result['entries']
56a8fb4f 1340 msg = (
1341 'Downloading %d videos' if not isinstance(ie_entries, list)
1342 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1343 if not isinstance(ie_entries, (list, PagedList)):
1344 ie_entries = LazyList(ie_entries)
1345
1346 entries = []
1347 for i in playlistitems or itertools.count(playliststart):
1348 if playlistitems is None and playlistend is not None and playlistend < i:
1349 break
1350 entry = None
1351 try:
1352 entry = ie_entries[i - 1]
1353 if entry is None:
498f5606 1354 raise EntryNotInPlaylist()
56a8fb4f 1355 except (IndexError, EntryNotInPlaylist):
1356 if incomplete_entries:
1357 raise EntryNotInPlaylist()
1358 elif not playlistitems:
1359 break
1360 entries.append(entry)
120fe513 1361 try:
1362 if entry is not None:
1363 self._match_entry(entry, incomplete=True, silent=True)
1364 except (ExistingVideoReached, RejectedVideoReached):
1365 break
56a8fb4f 1366 ie_result['entries'] = entries
30a074c2 1367
56a8fb4f 1368 # Save playlist_index before re-ordering
1369 entries = [
1370 ((playlistitems[i - 1] if playlistitems else i), entry)
1371 for i, entry in enumerate(entries, 1)
1372 if entry is not None]
1373 n_entries = len(entries)
498f5606 1374
498f5606 1375 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1376 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1377 ie_result['requested_entries'] = playlistitems
1378
1379 if self.params.get('allow_playlist_files', True):
1380 ie_copy = {
1381 'playlist': playlist,
1382 'playlist_id': ie_result.get('id'),
1383 'playlist_title': ie_result.get('title'),
1384 'playlist_uploader': ie_result.get('uploader'),
1385 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1386 'playlist_index': 0,
498f5606 1387 }
1388 ie_copy.update(dict(ie_result))
1389
1390 if self.params.get('writeinfojson', False):
1391 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1392 if not self._ensure_dir_exists(encodeFilename(infofn)):
1393 return
1394 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1395 self.to_screen('[info] Playlist metadata is already present')
1396 else:
1397 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1398 try:
1399 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1400 except (OSError, IOError):
1401 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1402
681de68e 1403 # TODO: This should be passed to ThumbnailsConvertor if necessary
1404 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1405
498f5606 1406 if self.params.get('writedescription', False):
1407 descfn = self.prepare_filename(ie_copy, 'pl_description')
1408 if not self._ensure_dir_exists(encodeFilename(descfn)):
1409 return
1410 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1411 self.to_screen('[info] Playlist description is already present')
1412 elif ie_result.get('description') is None:
1413 self.report_warning('There\'s no playlist description to write.')
1414 else:
1415 try:
1416 self.to_screen('[info] Writing playlist description to: ' + descfn)
1417 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1418 descfile.write(ie_result['description'])
1419 except (OSError, IOError):
1420 self.report_error('Cannot write playlist description file ' + descfn)
1421 return
30a074c2 1422
1423 if self.params.get('playlistreverse', False):
1424 entries = entries[::-1]
30a074c2 1425 if self.params.get('playlistrandom', False):
1426 random.shuffle(entries)
1427
1428 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1429
56a8fb4f 1430 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1431 failures = 0
1432 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1433 for i, entry_tuple in enumerate(entries, 1):
1434 playlist_index, entry = entry_tuple
53ed7066 1435 if 'playlist_index' in self.params.get('compat_options', []):
1436 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1437 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1438 # This __x_forwarded_for_ip thing is a bit ugly but requires
1439 # minimal changes
1440 if x_forwarded_for:
1441 entry['__x_forwarded_for_ip'] = x_forwarded_for
1442 extra = {
1443 'n_entries': n_entries,
f59ae581 1444 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1445 'playlist_index': playlist_index,
1446 'playlist_autonumber': i,
30a074c2 1447 'playlist': playlist,
1448 'playlist_id': ie_result.get('id'),
1449 'playlist_title': ie_result.get('title'),
1450 'playlist_uploader': ie_result.get('uploader'),
1451 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1452 'extractor': ie_result['extractor'],
1453 'webpage_url': ie_result['webpage_url'],
1454 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1455 'extractor_key': ie_result['extractor_key'],
1456 }
1457
1458 if self._match_entry(entry, incomplete=True) is not None:
1459 continue
1460
1461 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1462 if not entry_result:
1463 failures += 1
1464 if failures >= max_failures:
1465 self.report_error(
1466 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1467 break
30a074c2 1468 # TODO: skip failed (empty) entries?
1469 playlist_results.append(entry_result)
1470 ie_result['entries'] = playlist_results
1471 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1472 return ie_result
1473
a0566bbf 1474 @__handle_extraction_exceptions
1475 def __process_iterable_entry(self, entry, download, extra_info):
1476 return self.process_ie_result(
1477 entry, download=download, extra_info=extra_info)
1478
67134eab
JMF
1479 def _build_format_filter(self, filter_spec):
1480 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1481
1482 OPERATORS = {
1483 '<': operator.lt,
1484 '<=': operator.le,
1485 '>': operator.gt,
1486 '>=': operator.ge,
1487 '=': operator.eq,
1488 '!=': operator.ne,
1489 }
67134eab 1490 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1491 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1492 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1493 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1494 $
083c9df9 1495 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1496 m = operator_rex.search(filter_spec)
9ddb6925
S
1497 if m:
1498 try:
1499 comparison_value = int(m.group('value'))
1500 except ValueError:
1501 comparison_value = parse_filesize(m.group('value'))
1502 if comparison_value is None:
1503 comparison_value = parse_filesize(m.group('value') + 'B')
1504 if comparison_value is None:
1505 raise ValueError(
1506 'Invalid value %r in format specification %r' % (
67134eab 1507 m.group('value'), filter_spec))
9ddb6925
S
1508 op = OPERATORS[m.group('op')]
1509
083c9df9 1510 if not m:
9ddb6925
S
1511 STR_OPERATORS = {
1512 '=': operator.eq,
10d33b34
YCH
1513 '^=': lambda attr, value: attr.startswith(value),
1514 '$=': lambda attr, value: attr.endswith(value),
1515 '*=': lambda attr, value: value in attr,
9ddb6925 1516 }
67134eab 1517 str_operator_rex = re.compile(r'''(?x)
f96bff99 1518 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1519 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1520 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1521 \s*$
9ddb6925 1522 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1523 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1524 if m:
1525 comparison_value = m.group('value')
2cc779f4
S
1526 str_op = STR_OPERATORS[m.group('op')]
1527 if m.group('negation'):
e118a879 1528 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1529 else:
1530 op = str_op
083c9df9 1531
9ddb6925 1532 if not m:
67134eab 1533 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1534
1535 def _filter(f):
1536 actual_value = f.get(m.group('key'))
1537 if actual_value is None:
1538 return m.group('none_inclusive')
1539 return op(actual_value, comparison_value)
67134eab
JMF
1540 return _filter
1541
0017d9ad 1542 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1543
af0f7428
S
1544 def can_merge():
1545 merger = FFmpegMergerPP(self)
1546 return merger.available and merger.can_merge()
1547
91ebc640 1548 prefer_best = (
1549 not self.params.get('simulate', False)
1550 and download
1551 and (
1552 not can_merge()
19807826 1553 or info_dict.get('is_live', False)
de6000d9 1554 or self.outtmpl_dict['default'] == '-'))
53ed7066 1555 compat = (
1556 prefer_best
1557 or self.params.get('allow_multiple_audio_streams', False)
1558 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1559
1560 return (
53ed7066 1561 'best/bestvideo+bestaudio' if prefer_best
1562 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1563 else 'bestvideo+bestaudio/best')
0017d9ad 1564
67134eab
JMF
1565 def build_format_selector(self, format_spec):
1566 def syntax_error(note, start):
1567 message = (
1568 'Invalid format specification: '
1569 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1570 return SyntaxError(message)
1571
1572 PICKFIRST = 'PICKFIRST'
1573 MERGE = 'MERGE'
1574 SINGLE = 'SINGLE'
0130afb7 1575 GROUP = 'GROUP'
67134eab
JMF
1576 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1577
91ebc640 1578 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1579 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1580
e8e73840 1581 check_formats = self.params.get('check_formats')
1582
67134eab
JMF
1583 def _parse_filter(tokens):
1584 filter_parts = []
1585 for type, string, start, _, _ in tokens:
1586 if type == tokenize.OP and string == ']':
1587 return ''.join(filter_parts)
1588 else:
1589 filter_parts.append(string)
1590
232541df 1591 def _remove_unused_ops(tokens):
17cc1534 1592 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1593 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1594 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1595 last_string, last_start, last_end, last_line = None, None, None, None
1596 for type, string, start, end, line in tokens:
1597 if type == tokenize.OP and string == '[':
1598 if last_string:
1599 yield tokenize.NAME, last_string, last_start, last_end, last_line
1600 last_string = None
1601 yield type, string, start, end, line
1602 # everything inside brackets will be handled by _parse_filter
1603 for type, string, start, end, line in tokens:
1604 yield type, string, start, end, line
1605 if type == tokenize.OP and string == ']':
1606 break
1607 elif type == tokenize.OP and string in ALLOWED_OPS:
1608 if last_string:
1609 yield tokenize.NAME, last_string, last_start, last_end, last_line
1610 last_string = None
1611 yield type, string, start, end, line
1612 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1613 if not last_string:
1614 last_string = string
1615 last_start = start
1616 last_end = end
1617 else:
1618 last_string += string
1619 if last_string:
1620 yield tokenize.NAME, last_string, last_start, last_end, last_line
1621
cf2ac6df 1622 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1623 selectors = []
1624 current_selector = None
1625 for type, string, start, _, _ in tokens:
1626 # ENCODING is only defined in python 3.x
1627 if type == getattr(tokenize, 'ENCODING', None):
1628 continue
1629 elif type in [tokenize.NAME, tokenize.NUMBER]:
1630 current_selector = FormatSelector(SINGLE, string, [])
1631 elif type == tokenize.OP:
cf2ac6df
JMF
1632 if string == ')':
1633 if not inside_group:
1634 # ')' will be handled by the parentheses group
1635 tokens.restore_last_token()
67134eab 1636 break
cf2ac6df 1637 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1638 tokens.restore_last_token()
1639 break
cf2ac6df
JMF
1640 elif inside_choice and string == ',':
1641 tokens.restore_last_token()
1642 break
1643 elif string == ',':
0a31a350
JMF
1644 if not current_selector:
1645 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1646 selectors.append(current_selector)
1647 current_selector = None
1648 elif string == '/':
d96d604e
JMF
1649 if not current_selector:
1650 raise syntax_error('"/" must follow a format selector', start)
67134eab 1651 first_choice = current_selector
cf2ac6df 1652 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1653 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1654 elif string == '[':
1655 if not current_selector:
1656 current_selector = FormatSelector(SINGLE, 'best', [])
1657 format_filter = _parse_filter(tokens)
1658 current_selector.filters.append(format_filter)
0130afb7
JMF
1659 elif string == '(':
1660 if current_selector:
1661 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1662 group = _parse_format_selection(tokens, inside_group=True)
1663 current_selector = FormatSelector(GROUP, group, [])
67134eab 1664 elif string == '+':
d03cfdce 1665 if not current_selector:
1666 raise syntax_error('Unexpected "+"', start)
1667 selector_1 = current_selector
1668 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1669 if not selector_2:
1670 raise syntax_error('Expected a selector', start)
1671 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1672 else:
1673 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1674 elif type == tokenize.ENDMARKER:
1675 break
1676 if current_selector:
1677 selectors.append(current_selector)
1678 return selectors
1679
f8d4ad9a 1680 def _merge(formats_pair):
1681 format_1, format_2 = formats_pair
1682
1683 formats_info = []
1684 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1685 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1686
1687 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1688 get_no_more = {"video": False, "audio": False}
1689 for (i, fmt_info) in enumerate(formats_info):
1690 for aud_vid in ["audio", "video"]:
1691 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1692 if get_no_more[aud_vid]:
1693 formats_info.pop(i)
1694 get_no_more[aud_vid] = True
1695
1696 if len(formats_info) == 1:
1697 return formats_info[0]
1698
1699 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1700 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1701
1702 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1703 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1704
1705 output_ext = self.params.get('merge_output_format')
1706 if not output_ext:
1707 if the_only_video:
1708 output_ext = the_only_video['ext']
1709 elif the_only_audio and not video_fmts:
1710 output_ext = the_only_audio['ext']
1711 else:
1712 output_ext = 'mkv'
1713
1714 new_dict = {
1715 'requested_formats': formats_info,
1716 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1717 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1718 'ext': output_ext,
1719 }
1720
1721 if the_only_video:
1722 new_dict.update({
1723 'width': the_only_video.get('width'),
1724 'height': the_only_video.get('height'),
1725 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1726 'fps': the_only_video.get('fps'),
1727 'vcodec': the_only_video.get('vcodec'),
1728 'vbr': the_only_video.get('vbr'),
1729 'stretched_ratio': the_only_video.get('stretched_ratio'),
1730 })
1731
1732 if the_only_audio:
1733 new_dict.update({
1734 'acodec': the_only_audio.get('acodec'),
1735 'abr': the_only_audio.get('abr'),
1736 })
1737
1738 return new_dict
1739
e8e73840 1740 def _check_formats(formats):
1741 for f in formats:
1742 self.to_screen('[info] Testing format %s' % f['format_id'])
1743 paths = self.params.get('paths', {})
1744 temp_file = os.path.join(
1745 expand_path(paths.get('home', '').strip()),
1746 expand_path(paths.get('temp', '').strip()),
1747 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
fe346461 1748 try:
1749 dl, _ = self.dl(temp_file, f, test=True)
1750 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1751 dl = False
1752 finally:
1753 if os.path.exists(temp_file):
1754 os.remove(temp_file)
e8e73840 1755 if dl:
1756 yield f
1757 else:
1758 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1759
67134eab 1760 def _build_selector_function(selector):
909d24dd 1761 if isinstance(selector, list): # ,
67134eab
JMF
1762 fs = [_build_selector_function(s) for s in selector]
1763
317f7ab6 1764 def selector_function(ctx):
67134eab 1765 for f in fs:
317f7ab6 1766 for format in f(ctx):
67134eab
JMF
1767 yield format
1768 return selector_function
909d24dd 1769
1770 elif selector.type == GROUP: # ()
0130afb7 1771 selector_function = _build_selector_function(selector.selector)
909d24dd 1772
1773 elif selector.type == PICKFIRST: # /
67134eab
JMF
1774 fs = [_build_selector_function(s) for s in selector.selector]
1775
317f7ab6 1776 def selector_function(ctx):
67134eab 1777 for f in fs:
317f7ab6 1778 picked_formats = list(f(ctx))
67134eab
JMF
1779 if picked_formats:
1780 return picked_formats
1781 return []
67134eab 1782
909d24dd 1783 elif selector.type == SINGLE: # atom
598d185d 1784 format_spec = selector.selector or 'best'
909d24dd 1785
f8d4ad9a 1786 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1787 if format_spec == 'all':
1788 def selector_function(ctx):
1789 formats = list(ctx['formats'])
e8e73840 1790 if check_formats:
1791 formats = _check_formats(formats)
1792 for f in formats:
1793 yield f
f8d4ad9a 1794 elif format_spec == 'mergeall':
1795 def selector_function(ctx):
e8e73840 1796 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1797 if not formats:
1798 return
921b76ca 1799 merged_format = formats[-1]
1800 for f in formats[-2::-1]:
f8d4ad9a 1801 merged_format = _merge((merged_format, f))
1802 yield merged_format
909d24dd 1803
1804 else:
e8e73840 1805 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1806 mobj = re.match(
1807 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1808 format_spec)
1809 if mobj is not None:
1810 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1811 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1812 format_type = (mobj.group('type') or [None])[0]
1813 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1814 format_modified = mobj.group('mod') is not None
909d24dd 1815
1816 format_fallback = not format_type and not format_modified # for b, w
eff63539 1817 filter_f = (
1818 (lambda f: f.get('%scodec' % format_type) != 'none')
1819 if format_type and format_modified # bv*, ba*, wv*, wa*
1820 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1821 if format_type # bv, ba, wv, wa
1822 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1823 if not format_modified # b, w
1824 else None) # b*, w*
67134eab 1825 else:
909d24dd 1826 filter_f = ((lambda f: f.get('ext') == format_spec)
1827 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1828 else (lambda f: f.get('format_id') == format_spec)) # id
1829
1830 def selector_function(ctx):
1831 formats = list(ctx['formats'])
1832 if not formats:
1833 return
1834 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1835 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1836 # for extractors with incomplete formats (audio only (soundcloud)
1837 # or video only (imgur)) best/worst will fallback to
1838 # best/worst {video,audio}-only format
e8e73840 1839 matches = formats
1840 if format_reverse:
1841 matches = matches[::-1]
1842 if check_formats:
1843 matches = list(itertools.islice(_check_formats(matches), format_idx))
1844 n = len(matches)
1845 if -n <= format_idx - 1 < n:
1846 yield matches[format_idx - 1]
909d24dd 1847
1848 elif selector.type == MERGE: # +
d03cfdce 1849 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1850
317f7ab6
S
1851 def selector_function(ctx):
1852 for pair in itertools.product(
d03cfdce 1853 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1854 yield _merge(pair)
083c9df9 1855
67134eab 1856 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1857
317f7ab6
S
1858 def final_selector(ctx):
1859 ctx_copy = copy.deepcopy(ctx)
67134eab 1860 for _filter in filters:
317f7ab6
S
1861 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1862 return selector_function(ctx_copy)
67134eab 1863 return final_selector
083c9df9 1864
67134eab 1865 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1866 try:
232541df 1867 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1868 except tokenize.TokenError:
1869 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1870
1871 class TokenIterator(object):
1872 def __init__(self, tokens):
1873 self.tokens = tokens
1874 self.counter = 0
1875
1876 def __iter__(self):
1877 return self
1878
1879 def __next__(self):
1880 if self.counter >= len(self.tokens):
1881 raise StopIteration()
1882 value = self.tokens[self.counter]
1883 self.counter += 1
1884 return value
1885
1886 next = __next__
1887
1888 def restore_last_token(self):
1889 self.counter -= 1
1890
1891 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1892 return _build_selector_function(parsed_selector)
a9c58ad9 1893
e5660ee6
JMF
1894 def _calc_headers(self, info_dict):
1895 res = std_headers.copy()
1896
1897 add_headers = info_dict.get('http_headers')
1898 if add_headers:
1899 res.update(add_headers)
1900
1901 cookies = self._calc_cookies(info_dict)
1902 if cookies:
1903 res['Cookie'] = cookies
1904
0016b84e
S
1905 if 'X-Forwarded-For' not in res:
1906 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1907 if x_forwarded_for_ip:
1908 res['X-Forwarded-For'] = x_forwarded_for_ip
1909
e5660ee6
JMF
1910 return res
1911
1912 def _calc_cookies(self, info_dict):
5c2266df 1913 pr = sanitized_Request(info_dict['url'])
e5660ee6 1914 self.cookiejar.add_cookie_header(pr)
662435f7 1915 return pr.get_header('Cookie')
e5660ee6 1916
bc516a3f 1917 @staticmethod
1918 def _sanitize_thumbnails(info_dict):
1919 thumbnails = info_dict.get('thumbnails')
1920 if thumbnails is None:
1921 thumbnail = info_dict.get('thumbnail')
1922 if thumbnail:
1923 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1924 if thumbnails:
1925 thumbnails.sort(key=lambda t: (
1926 t.get('preference') if t.get('preference') is not None else -1,
1927 t.get('width') if t.get('width') is not None else -1,
1928 t.get('height') if t.get('height') is not None else -1,
1929 t.get('id') if t.get('id') is not None else '',
1930 t.get('url')))
1931 for i, t in enumerate(thumbnails):
1932 t['url'] = sanitize_url(t['url'])
1933 if t.get('width') and t.get('height'):
1934 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1935 if t.get('id') is None:
1936 t['id'] = '%d' % i
1937
dd82ffea
JMF
1938 def process_video_result(self, info_dict, download=True):
1939 assert info_dict.get('_type', 'video') == 'video'
1940
bec1fad2
PH
1941 if 'id' not in info_dict:
1942 raise ExtractorError('Missing "id" field in extractor result')
1943 if 'title' not in info_dict:
1944 raise ExtractorError('Missing "title" field in extractor result')
1945
c9969434
S
1946 def report_force_conversion(field, field_not, conversion):
1947 self.report_warning(
1948 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1949 % (field, field_not, conversion))
1950
1951 def sanitize_string_field(info, string_field):
1952 field = info.get(string_field)
1953 if field is None or isinstance(field, compat_str):
1954 return
1955 report_force_conversion(string_field, 'a string', 'string')
1956 info[string_field] = compat_str(field)
1957
1958 def sanitize_numeric_fields(info):
1959 for numeric_field in self._NUMERIC_FIELDS:
1960 field = info.get(numeric_field)
1961 if field is None or isinstance(field, compat_numeric_types):
1962 continue
1963 report_force_conversion(numeric_field, 'numeric', 'int')
1964 info[numeric_field] = int_or_none(field)
1965
1966 sanitize_string_field(info_dict, 'id')
1967 sanitize_numeric_fields(info_dict)
be6217b2 1968
dd82ffea
JMF
1969 if 'playlist' not in info_dict:
1970 # It isn't part of a playlist
1971 info_dict['playlist'] = None
1972 info_dict['playlist_index'] = None
1973
bc516a3f 1974 self._sanitize_thumbnails(info_dict)
d5519808 1975
b7b72db9 1976 if self.params.get('list_thumbnails'):
1977 self.list_thumbnails(info_dict)
1978 return
1979
536a55da 1980 thumbnail = info_dict.get('thumbnail')
bc516a3f 1981 thumbnails = info_dict.get('thumbnails')
536a55da
S
1982 if thumbnail:
1983 info_dict['thumbnail'] = sanitize_url(thumbnail)
1984 elif thumbnails:
d5519808
PH
1985 info_dict['thumbnail'] = thumbnails[-1]['url']
1986
c9ae7b95 1987 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1988 info_dict['display_id'] = info_dict['id']
1989
10db0d2f 1990 for ts_key, date_key in (
1991 ('timestamp', 'upload_date'),
1992 ('release_timestamp', 'release_date'),
1993 ):
1994 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1995 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1996 # see http://bugs.python.org/issue1646728)
1997 try:
1998 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1999 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2000 except (ValueError, OverflowError, OSError):
2001 pass
9d2ecdbc 2002
33d2fc2f
S
2003 # Auto generate title fields corresponding to the *_number fields when missing
2004 # in order to always have clean titles. This is very common for TV series.
2005 for field in ('chapter', 'season', 'episode'):
2006 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2007 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2008
05108a49
S
2009 for cc_kind in ('subtitles', 'automatic_captions'):
2010 cc = info_dict.get(cc_kind)
2011 if cc:
2012 for _, subtitle in cc.items():
2013 for subtitle_format in subtitle:
2014 if subtitle_format.get('url'):
2015 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2016 if subtitle_format.get('ext') is None:
2017 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2018
2019 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2020 subtitles = info_dict.get('subtitles')
4bba3716 2021
a504ced0 2022 if self.params.get('listsubtitles', False):
360e1ca5 2023 if 'automatic_captions' in info_dict:
05108a49
S
2024 self.list_subtitles(
2025 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2026 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2027 return
05108a49 2028
360e1ca5 2029 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2030 info_dict['id'], subtitles, automatic_captions)
a504ced0 2031
dd82ffea
JMF
2032 # We now pick which formats have to be downloaded
2033 if info_dict.get('formats') is None:
2034 # There's only one format available
2035 formats = [info_dict]
2036 else:
2037 formats = info_dict['formats']
2038
db95dc13 2039 if not formats:
b7da73eb 2040 if not self.params.get('ignore_no_formats_error'):
2041 raise ExtractorError('No video formats found!')
2042 else:
2043 self.report_warning('No video formats found!')
db95dc13 2044
73af5cc8
S
2045 def is_wellformed(f):
2046 url = f.get('url')
a5ac0c47 2047 if not url:
73af5cc8
S
2048 self.report_warning(
2049 '"url" field is missing or empty - skipping format, '
2050 'there is an error in extractor')
a5ac0c47
S
2051 return False
2052 if isinstance(url, bytes):
2053 sanitize_string_field(f, 'url')
2054 return True
73af5cc8
S
2055
2056 # Filter out malformed formats for better extraction robustness
2057 formats = list(filter(is_wellformed, formats))
2058
181c7053
S
2059 formats_dict = {}
2060
dd82ffea 2061 # We check that all the formats have the format and format_id fields
db95dc13 2062 for i, format in enumerate(formats):
c9969434
S
2063 sanitize_string_field(format, 'format_id')
2064 sanitize_numeric_fields(format)
dcf77cf1 2065 format['url'] = sanitize_url(format['url'])
e74e3b63 2066 if not format.get('format_id'):
8016c922 2067 format['format_id'] = compat_str(i)
e2effb08
S
2068 else:
2069 # Sanitize format_id from characters used in format selector expression
ec85ded8 2070 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2071 format_id = format['format_id']
2072 if format_id not in formats_dict:
2073 formats_dict[format_id] = []
2074 formats_dict[format_id].append(format)
2075
2076 # Make sure all formats have unique format_id
2077 for format_id, ambiguous_formats in formats_dict.items():
2078 if len(ambiguous_formats) > 1:
2079 for i, format in enumerate(ambiguous_formats):
2080 format['format_id'] = '%s-%d' % (format_id, i)
2081
2082 for i, format in enumerate(formats):
8c51aa65 2083 if format.get('format') is None:
6febd1c1 2084 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2085 id=format['format_id'],
2086 res=self.format_resolution(format),
6febd1c1 2087 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2088 )
c1002e96 2089 # Automatically determine file extension if missing
5b1d8575 2090 if format.get('ext') is None:
cce929ea 2091 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2092 # Automatically determine protocol if missing (useful for format
2093 # selection purposes)
6f0be937 2094 if format.get('protocol') is None:
b5559424 2095 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2096 # Add HTTP headers, so that external programs can use them from the
2097 # json output
2098 full_format_info = info_dict.copy()
2099 full_format_info.update(format)
2100 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2101 # Remove private housekeeping stuff
2102 if '__x_forwarded_for_ip' in info_dict:
2103 del info_dict['__x_forwarded_for_ip']
dd82ffea 2104
4bcc7bd1 2105 # TODO Central sorting goes here
99e206d5 2106
b7da73eb 2107 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2108 # only set the 'formats' fields if the original info_dict list them
2109 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2110 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2111 # which can't be exported to json
b3d9ef88 2112 info_dict['formats'] = formats
4ec82a72 2113
2114 info_dict, _ = self.pre_process(info_dict)
2115
cfb56d1a 2116 if self.params.get('listformats'):
b7da73eb 2117 if not info_dict.get('formats'):
2118 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2119 self.list_formats(info_dict)
2120 return
2121
de3ef3ed 2122 req_format = self.params.get('format')
a9c58ad9 2123 if req_format is None:
0017d9ad 2124 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2125 self.write_debug('Default format spec: %s' % req_format)
0017d9ad 2126
5acfa126 2127 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2128
2129 # While in format selection we may need to have an access to the original
2130 # format set in order to calculate some metrics or do some processing.
2131 # For now we need to be able to guess whether original formats provided
2132 # by extractor are incomplete or not (i.e. whether extractor provides only
2133 # video-only or audio-only formats) for proper formats selection for
2134 # extractors with such incomplete formats (see
067aa17e 2135 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2136 # Since formats may be filtered during format selection and may not match
2137 # the original formats the results may be incorrect. Thus original formats
2138 # or pre-calculated metrics should be passed to format selection routines
2139 # as well.
2140 # We will pass a context object containing all necessary additional data
2141 # instead of just formats.
2142 # This fixes incorrect format selection issue (see
067aa17e 2143 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2144 incomplete_formats = (
317f7ab6 2145 # All formats are video-only or
3089bc74 2146 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2147 # all formats are audio-only
3089bc74 2148 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2149
2150 ctx = {
2151 'formats': formats,
2152 'incomplete_formats': incomplete_formats,
2153 }
2154
2155 formats_to_download = list(format_selector(ctx))
dd82ffea 2156 if not formats_to_download:
b7da73eb 2157 if not self.params.get('ignore_no_formats_error'):
2158 raise ExtractorError('Requested format is not available', expected=True)
2159 else:
2160 self.report_warning('Requested format is not available')
2161 elif download:
2162 self.to_screen(
07cce701 2163 '[info] %s: Downloading %d format(s): %s' % (
2164 info_dict['id'], len(formats_to_download),
2165 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2166 for fmt in formats_to_download:
dd82ffea 2167 new_info = dict(info_dict)
4ec82a72 2168 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2169 new_info['__original_infodict'] = info_dict
b7da73eb 2170 new_info.update(fmt)
dd82ffea
JMF
2171 self.process_info(new_info)
2172 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2173 if formats_to_download:
2174 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2175 return info_dict
2176
98c70d6f 2177 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2178 """Select the requested subtitles and their format"""
98c70d6f
JMF
2179 available_subs = {}
2180 if normal_subtitles and self.params.get('writesubtitles'):
2181 available_subs.update(normal_subtitles)
2182 if automatic_captions and self.params.get('writeautomaticsub'):
2183 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2184 if lang not in available_subs:
2185 available_subs[lang] = cap_info
2186
4d171848
JMF
2187 if (not self.params.get('writesubtitles') and not
2188 self.params.get('writeautomaticsub') or not
2189 available_subs):
2190 return None
a504ced0 2191
c32b0aab 2192 all_sub_langs = available_subs.keys()
a504ced0 2193 if self.params.get('allsubtitles', False):
c32b0aab 2194 requested_langs = all_sub_langs
2195 elif self.params.get('subtitleslangs', False):
2196 requested_langs = set()
2197 for lang in self.params.get('subtitleslangs'):
2198 if lang == 'all':
2199 requested_langs.update(all_sub_langs)
2200 continue
2201 discard = lang[0] == '-'
2202 if discard:
2203 lang = lang[1:]
2204 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2205 if discard:
2206 for lang in current_langs:
2207 requested_langs.discard(lang)
2208 else:
2209 requested_langs.update(current_langs)
2210 elif 'en' in available_subs:
2211 requested_langs = ['en']
a504ced0 2212 else:
c32b0aab 2213 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2214 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2215
2216 formats_query = self.params.get('subtitlesformat', 'best')
2217 formats_preference = formats_query.split('/') if formats_query else []
2218 subs = {}
2219 for lang in requested_langs:
2220 formats = available_subs.get(lang)
2221 if formats is None:
2222 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2223 continue
a504ced0
JMF
2224 for ext in formats_preference:
2225 if ext == 'best':
2226 f = formats[-1]
2227 break
2228 matches = list(filter(lambda f: f['ext'] == ext, formats))
2229 if matches:
2230 f = matches[-1]
2231 break
2232 else:
2233 f = formats[-1]
2234 self.report_warning(
2235 'No subtitle format found matching "%s" for language %s, '
2236 'using %s' % (formats_query, lang, f['ext']))
2237 subs[lang] = f
2238 return subs
2239
d06daf23 2240 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2241 def print_mandatory(field, actual_field=None):
2242 if actual_field is None:
2243 actual_field = field
d06daf23 2244 if (self.params.get('force%s' % field, False)
53c18592 2245 and (not incomplete or info_dict.get(actual_field) is not None)):
2246 self.to_stdout(info_dict[actual_field])
d06daf23
S
2247
2248 def print_optional(field):
2249 if (self.params.get('force%s' % field, False)
2250 and info_dict.get(field) is not None):
2251 self.to_stdout(info_dict[field])
2252
53c18592 2253 info_dict = info_dict.copy()
2254 if filename is not None:
2255 info_dict['filename'] = filename
2256 if info_dict.get('requested_formats') is not None:
2257 # For RTMP URLs, also include the playpath
2258 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2259 elif 'url' in info_dict:
2260 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2261
2262 for tmpl in self.params.get('forceprint', []):
2263 if re.match(r'\w+$', tmpl):
2264 tmpl = '%({})s'.format(tmpl)
2265 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2266 self.to_stdout(tmpl % info_copy)
2267
d06daf23
S
2268 print_mandatory('title')
2269 print_mandatory('id')
53c18592 2270 print_mandatory('url', 'urls')
d06daf23
S
2271 print_optional('thumbnail')
2272 print_optional('description')
53c18592 2273 print_optional('filename')
d06daf23
S
2274 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2275 self.to_stdout(formatSeconds(info_dict['duration']))
2276 print_mandatory('format')
53c18592 2277
d06daf23 2278 if self.params.get('forcejson', False):
277d6ff5 2279 self.post_extract(info_dict)
75d43ca0 2280 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2281
e8e73840 2282 def dl(self, name, info, subtitle=False, test=False):
2283
2284 if test:
2285 verbose = self.params.get('verbose')
2286 params = {
2287 'test': True,
2288 'quiet': not verbose,
2289 'verbose': verbose,
2290 'noprogress': not verbose,
2291 'nopart': True,
2292 'skip_unavailable_fragments': False,
2293 'keep_fragments': False,
2294 'overwrites': True,
2295 '_no_ytdl_file': True,
2296 }
2297 else:
2298 params = self.params
2299 fd = get_suitable_downloader(info, params)(self, params)
2300 if not test:
2301 for ph in self._progress_hooks:
2302 fd.add_progress_hook(ph)
18e674b4 2303 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2304 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2305 new_info = dict(info)
2306 if new_info.get('http_headers') is None:
2307 new_info['http_headers'] = self._calc_headers(new_info)
2308 return fd.download(name, new_info, subtitle)
2309
8222d8de
JMF
2310 def process_info(self, info_dict):
2311 """Process a single resolved IE result."""
2312
2313 assert info_dict.get('_type', 'video') == 'video'
fd288278 2314
0202b52a 2315 info_dict.setdefault('__postprocessors', [])
2316
fd288278
PH
2317 max_downloads = self.params.get('max_downloads')
2318 if max_downloads is not None:
2319 if self._num_downloads >= int(max_downloads):
2320 raise MaxDownloadsReached()
8222d8de 2321
d06daf23 2322 # TODO: backward compatibility, to be removed
8222d8de 2323 info_dict['fulltitle'] = info_dict['title']
8222d8de 2324
11b85ce6 2325 if 'format' not in info_dict:
8222d8de
JMF
2326 info_dict['format'] = info_dict['ext']
2327
c77495e3 2328 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2329 return
2330
277d6ff5 2331 self.post_extract(info_dict)
fd288278 2332 self._num_downloads += 1
8222d8de 2333
dcf64d43 2334 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2335 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2336 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2337 files_to_move = {}
8222d8de
JMF
2338
2339 # Forced printings
0202b52a 2340 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2341
8222d8de 2342 if self.params.get('simulate', False):
2d30509f 2343 if self.params.get('force_write_download_archive', False):
2344 self.record_download_archive(info_dict)
2345
2346 # Do nothing else if in simulate mode
8222d8de
JMF
2347 return
2348
de6000d9 2349 if full_filename is None:
8222d8de
JMF
2350 return
2351
e92caff5 2352 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2353 return
e92caff5 2354 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2355 return
2356
2357 if self.params.get('writedescription', False):
de6000d9 2358 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2359 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2360 return
0c3d0f51 2361 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2362 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2363 elif info_dict.get('description') is None:
2364 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2365 else:
2366 try:
6febd1c1 2367 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2368 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2369 descfile.write(info_dict['description'])
7b6fefc9 2370 except (OSError, IOError):
6febd1c1 2371 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2372 return
8222d8de 2373
1fb07d10 2374 if self.params.get('writeannotations', False):
de6000d9 2375 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2376 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2377 return
0c3d0f51 2378 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2379 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2380 elif not info_dict.get('annotations'):
2381 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2382 else:
2383 try:
6febd1c1 2384 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2385 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2386 annofile.write(info_dict['annotations'])
2387 except (KeyError, TypeError):
6febd1c1 2388 self.report_warning('There are no annotations to write.')
7b6fefc9 2389 except (OSError, IOError):
6febd1c1 2390 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2391 return
1fb07d10 2392
c4a91be7 2393 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2394 self.params.get('writeautomaticsub')])
c4a91be7 2395
c84dd8a9 2396 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2397 # subtitles download errors are already managed as troubles in relevant IE
2398 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2399 subtitles = info_dict['requested_subtitles']
fa57af1e 2400 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2401 for sub_lang, sub_info in subtitles.items():
2402 sub_format = sub_info['ext']
56d868db 2403 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2404 sub_filename_final = subtitles_filename(
2405 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2406 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2407 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2408 sub_info['filepath'] = sub_filename
0202b52a 2409 files_to_move[sub_filename] = sub_filename_final
a504ced0 2410 else:
0c9df79e 2411 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2412 if sub_info.get('data') is not None:
2413 try:
2414 # Use newline='' to prevent conversion of newline characters
067aa17e 2415 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2416 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2417 subfile.write(sub_info['data'])
dcf64d43 2418 sub_info['filepath'] = sub_filename
0202b52a 2419 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2420 except (OSError, IOError):
2421 self.report_error('Cannot write subtitles file ' + sub_filename)
2422 return
7b6fefc9 2423 else:
5ff1bc0c 2424 try:
e8e73840 2425 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2426 sub_info['filepath'] = sub_filename
0202b52a 2427 files_to_move[sub_filename] = sub_filename_final
fe346461 2428 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2429 self.report_warning('Unable to download subtitle for "%s": %s' %
2430 (sub_lang, error_to_compat_str(err)))
2431 continue
8222d8de 2432
8222d8de 2433 if self.params.get('writeinfojson', False):
de6000d9 2434 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2435 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2436 return
0c3d0f51 2437 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2438 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2439 else:
66c935fb 2440 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2441 try:
75d43ca0 2442 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2443 except (OSError, IOError):
66c935fb 2444 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2445 return
de6000d9 2446 info_dict['__infojson_filename'] = infofn
8222d8de 2447
56d868db 2448 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2449 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2450 thumb_filename = replace_extension(
2451 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2452 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2453
732044af 2454 # Write internet shortcut files
2455 url_link = webloc_link = desktop_link = False
2456 if self.params.get('writelink', False):
2457 if sys.platform == "darwin": # macOS.
2458 webloc_link = True
2459 elif sys.platform.startswith("linux"):
2460 desktop_link = True
2461 else: # if sys.platform in ['win32', 'cygwin']:
2462 url_link = True
2463 if self.params.get('writeurllink', False):
2464 url_link = True
2465 if self.params.get('writewebloclink', False):
2466 webloc_link = True
2467 if self.params.get('writedesktoplink', False):
2468 desktop_link = True
2469
2470 if url_link or webloc_link or desktop_link:
2471 if 'webpage_url' not in info_dict:
2472 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2473 return
2474 ascii_url = iri_to_uri(info_dict['webpage_url'])
2475
2476 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2477 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2478 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2479 self.to_screen('[info] Internet shortcut is already present')
2480 else:
2481 try:
2482 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2483 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2484 template_vars = {'url': ascii_url}
2485 if embed_filename:
2486 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2487 linkfile.write(template % template_vars)
2488 except (OSError, IOError):
2489 self.report_error('Cannot write internet shortcut ' + linkfn)
2490 return False
2491 return True
2492
2493 if url_link:
2494 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2495 return
2496 if webloc_link:
2497 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2498 return
2499 if desktop_link:
2500 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2501 return
2502
56d868db 2503 try:
2504 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2505 except PostProcessingError as err:
2506 self.report_error('Preprocessing: %s' % str(err))
2507 return
2508
732044af 2509 must_record_download_archive = False
56d868db 2510 if self.params.get('skip_download', False):
2511 info_dict['filepath'] = temp_filename
2512 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2513 info_dict['__files_to_move'] = files_to_move
2514 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2515 else:
2516 # Download
4340deca 2517 try:
0202b52a 2518
6b591b29 2519 def existing_file(*filepaths):
2520 ext = info_dict.get('ext')
2521 final_ext = self.params.get('final_ext', ext)
2522 existing_files = []
2523 for file in orderedSet(filepaths):
2524 if final_ext != ext:
2525 converted = replace_extension(file, final_ext, ext)
2526 if os.path.exists(encodeFilename(converted)):
2527 existing_files.append(converted)
2528 if os.path.exists(encodeFilename(file)):
2529 existing_files.append(file)
2530
2531 if not existing_files or self.params.get('overwrites', False):
2532 for file in orderedSet(existing_files):
2533 self.report_file_delete(file)
2534 os.remove(encodeFilename(file))
2535 return None
2536
2537 self.report_file_already_downloaded(existing_files[0])
2538 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2539 return existing_files[0]
0202b52a 2540
2541 success = True
4340deca 2542 if info_dict.get('requested_formats') is not None:
81cd954a
S
2543
2544 def compatible_formats(formats):
d03cfdce 2545 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2546 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2547 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2548 if len(video_formats) > 2 or len(audio_formats) > 2:
2549 return False
2550
81cd954a 2551 # Check extension
d03cfdce 2552 exts = set(format.get('ext') for format in formats)
2553 COMPATIBLE_EXTS = (
2554 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2555 set(('webm',)),
2556 )
2557 for ext_sets in COMPATIBLE_EXTS:
2558 if ext_sets.issuperset(exts):
2559 return True
81cd954a
S
2560 # TODO: Check acodec/vcodec
2561 return False
2562
2563 requested_formats = info_dict['requested_formats']
0202b52a 2564 old_ext = info_dict['ext']
4d971a16 2565 if self.params.get('merge_output_format') is None:
2566 if not compatible_formats(requested_formats):
2567 info_dict['ext'] = 'mkv'
2568 self.report_warning(
2569 'Requested formats are incompatible for merge and will be merged into mkv.')
2570 if (info_dict['ext'] == 'webm'
2571 and self.params.get('writethumbnail', False)
2572 and info_dict.get('thumbnails')):
2573 info_dict['ext'] = 'mkv'
2574 self.report_warning(
2575 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2576
2577 def correct_ext(filename):
2578 filename_real_ext = os.path.splitext(filename)[1][1:]
2579 filename_wo_ext = (
2580 os.path.splitext(filename)[0]
2581 if filename_real_ext == old_ext
2582 else filename)
2583 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2584
38c6902b 2585 # Ensure filename always has a correct extension for successful merge
0202b52a 2586 full_filename = correct_ext(full_filename)
2587 temp_filename = correct_ext(temp_filename)
2588 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2589 info_dict['__real_download'] = False
18e674b4 2590
2591 _protocols = set(determine_protocol(f) for f in requested_formats)
2592 if len(_protocols) == 1:
2593 info_dict['protocol'] = _protocols.pop()
2594 directly_mergable = (
2595 'no-direct-merge' not in self.params.get('compat_opts', [])
2596 and info_dict.get('protocol') is not None # All requested formats have same protocol
2597 and not self.params.get('allow_unplayable_formats')
2598 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2599 if directly_mergable:
2600 info_dict['url'] = requested_formats[0]['url']
2601 # Treat it as a single download
2602 dl_filename = existing_file(full_filename, temp_filename)
2603 if dl_filename is None:
2604 success, real_download = self.dl(temp_filename, info_dict)
2605 info_dict['__real_download'] = real_download
2606 else:
2607 downloaded = []
2608 merger = FFmpegMergerPP(self)
2609 if self.params.get('allow_unplayable_formats'):
2610 self.report_warning(
2611 'You have requested merging of multiple formats '
2612 'while also allowing unplayable formats to be downloaded. '
2613 'The formats won\'t be merged to prevent data corruption.')
2614 elif not merger.available:
2615 self.report_warning(
2616 'You have requested merging of multiple formats but ffmpeg is not installed. '
2617 'The formats won\'t be merged.')
2618
2619 if dl_filename is None:
2620 for f in requested_formats:
2621 new_info = dict(info_dict)
2622 del new_info['requested_formats']
2623 new_info.update(f)
2624 fname = prepend_extension(
2625 self.prepare_filename(new_info, 'temp'),
2626 'f%s' % f['format_id'], new_info['ext'])
2627 if not self._ensure_dir_exists(fname):
2628 return
2629 downloaded.append(fname)
2630 partial_success, real_download = self.dl(fname, new_info)
2631 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2632 success = success and partial_success
2633 if merger.available and not self.params.get('allow_unplayable_formats'):
2634 info_dict['__postprocessors'].append(merger)
2635 info_dict['__files_to_merge'] = downloaded
2636 # Even if there were no downloads, it is being merged only now
2637 info_dict['__real_download'] = True
2638 else:
2639 for file in downloaded:
2640 files_to_move[file] = None
4340deca
P
2641 else:
2642 # Just a single file
0202b52a 2643 dl_filename = existing_file(full_filename, temp_filename)
2644 if dl_filename is None:
e8e73840 2645 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2646 info_dict['__real_download'] = real_download
2647
0202b52a 2648 dl_filename = dl_filename or temp_filename
c571435f 2649 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2650
3158150c 2651 except network_exceptions as err:
7960b056 2652 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2653 return
2654 except (OSError, IOError) as err:
2655 raise UnavailableVideoError(err)
2656 except (ContentTooShortError, ) as err:
2657 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2658 return
8222d8de 2659
de6000d9 2660 if success and full_filename != '-':
6271f1ca 2661 # Fixup content
62cd676c
PH
2662 fixup_policy = self.params.get('fixup')
2663 if fixup_policy is None:
2664 fixup_policy = 'detect_or_warn'
2665
e4172ac9 2666 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2667
6271f1ca
PH
2668 stretched_ratio = info_dict.get('stretched_ratio')
2669 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2670 if fixup_policy == 'warn':
2671 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2672 info_dict['id'], stretched_ratio))
2673 elif fixup_policy == 'detect_or_warn':
2674 stretched_pp = FFmpegFixupStretchedPP(self)
2675 if stretched_pp.available:
6271f1ca
PH
2676 info_dict['__postprocessors'].append(stretched_pp)
2677 else:
2678 self.report_warning(
d1e4a464
S
2679 '%s: Non-uniform pixel ratio (%s). %s'
2680 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2681 else:
62cd676c
PH
2682 assert fixup_policy in ('ignore', 'never')
2683
3089bc74 2684 if (info_dict.get('requested_formats') is None
6b591b29 2685 and info_dict.get('container') == 'm4a_dash'
2686 and info_dict.get('ext') == 'm4a'):
62cd676c 2687 if fixup_policy == 'warn':
d1e4a464
S
2688 self.report_warning(
2689 '%s: writing DASH m4a. '
2690 'Only some players support this container.'
2691 % info_dict['id'])
62cd676c
PH
2692 elif fixup_policy == 'detect_or_warn':
2693 fixup_pp = FFmpegFixupM4aPP(self)
2694 if fixup_pp.available:
62cd676c
PH
2695 info_dict['__postprocessors'].append(fixup_pp)
2696 else:
2697 self.report_warning(
d1e4a464
S
2698 '%s: writing DASH m4a. '
2699 'Only some players support this container. %s'
2700 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2701 else:
2702 assert fixup_policy in ('ignore', 'never')
6271f1ca 2703
0a473f2f 2704 if ('protocol' in info_dict
2705 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2706 if fixup_policy == 'warn':
a02682fd 2707 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2708 info_dict['id']))
2709 elif fixup_policy == 'detect_or_warn':
2710 fixup_pp = FFmpegFixupM3u8PP(self)
2711 if fixup_pp.available:
f17f8651 2712 info_dict['__postprocessors'].append(fixup_pp)
2713 else:
2714 self.report_warning(
a02682fd 2715 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2716 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2717 else:
2718 assert fixup_policy in ('ignore', 'never')
2719
8222d8de 2720 try:
23c1a667 2721 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2722 except PostProcessingError as err:
2723 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2724 return
ab8e5e51
AM
2725 try:
2726 for ph in self._post_hooks:
23c1a667 2727 ph(info_dict['filepath'])
ab8e5e51
AM
2728 except Exception as err:
2729 self.report_error('post hooks: %s' % str(err))
2730 return
2d30509f 2731 must_record_download_archive = True
2732
2733 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2734 self.record_download_archive(info_dict)
c3e6ffba 2735 max_downloads = self.params.get('max_downloads')
2736 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2737 raise MaxDownloadsReached()
8222d8de
JMF
2738
2739 def download(self, url_list):
2740 """Download a given list of URLs."""
de6000d9 2741 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2742 if (len(url_list) > 1
2743 and outtmpl != '-'
2744 and '%' not in outtmpl
2745 and self.params.get('max_downloads') != 1):
acd69589 2746 raise SameFileError(outtmpl)
8222d8de
JMF
2747
2748 for url in url_list:
2749 try:
5f6a1245 2750 # It also downloads the videos
61aa5ba3
S
2751 res = self.extract_info(
2752 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2753 except UnavailableVideoError:
6febd1c1 2754 self.report_error('unable to download video')
8222d8de 2755 except MaxDownloadsReached:
8b0d7497 2756 self.to_screen('[info] Maximum number of downloaded files reached')
2757 raise
2758 except ExistingVideoReached:
d83cb531 2759 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2760 raise
2761 except RejectedVideoReached:
d83cb531 2762 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2763 raise
63e0be34
PH
2764 else:
2765 if self.params.get('dump_single_json', False):
277d6ff5 2766 self.post_extract(res)
75d43ca0 2767 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2768
2769 return self._download_retcode
2770
1dcc4c0c 2771 def download_with_info_file(self, info_filename):
31bd3925
JMF
2772 with contextlib.closing(fileinput.FileInput(
2773 [info_filename], mode='r',
2774 openhook=fileinput.hook_encoded('utf-8'))) as f:
2775 # FileInput doesn't have a read method, we can't call json.load
498f5606 2776 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2777 try:
2778 self.process_ie_result(info, download=True)
498f5606 2779 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2780 webpage_url = info.get('webpage_url')
2781 if webpage_url is not None:
6febd1c1 2782 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2783 return self.download([webpage_url])
2784 else:
2785 raise
2786 return self._download_retcode
1dcc4c0c 2787
cb202fd2 2788 @staticmethod
75d43ca0 2789 def filter_requested_info(info_dict, actually_filter=True):
ae8f99e6 2790 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2791 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2792 if actually_filter:
bd99f6e6 2793 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
ae8f99e6 2794 empty_values = (None, {}, [], set(), tuple())
2795 reject = lambda k, v: k not in keep_keys and (
2796 k.startswith('_') or k in remove_keys or v in empty_values)
2797 else:
394dcd44 2798 info_dict['epoch'] = int(time.time())
ae8f99e6 2799 reject = lambda k, v: k in remove_keys
5226731e 2800 filter_fn = lambda obj: (
ae8f99e6 2801 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
a515a78d 2802 else obj if not isinstance(obj, dict)
ae8f99e6 2803 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2804 return filter_fn(info_dict)
cb202fd2 2805
dcf64d43 2806 def run_pp(self, pp, infodict):
5bfa4862 2807 files_to_delete = []
dcf64d43 2808 if '__files_to_move' not in infodict:
2809 infodict['__files_to_move'] = {}
af819c21 2810 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2811 if not files_to_delete:
dcf64d43 2812 return infodict
5bfa4862 2813
2814 if self.params.get('keepvideo', False):
2815 for f in files_to_delete:
dcf64d43 2816 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2817 else:
2818 for old_filename in set(files_to_delete):
2819 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2820 try:
2821 os.remove(encodeFilename(old_filename))
2822 except (IOError, OSError):
2823 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2824 if old_filename in infodict['__files_to_move']:
2825 del infodict['__files_to_move'][old_filename]
2826 return infodict
5bfa4862 2827
277d6ff5 2828 @staticmethod
2829 def post_extract(info_dict):
2830 def actual_post_extract(info_dict):
2831 if info_dict.get('_type') in ('playlist', 'multi_video'):
2832 for video_dict in info_dict.get('entries', {}):
b050d210 2833 actual_post_extract(video_dict or {})
277d6ff5 2834 return
2835
07cce701 2836 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2837 extra = post_extractor().items()
2838 info_dict.update(extra)
07cce701 2839 info_dict.pop('__post_extractor', None)
277d6ff5 2840
4ec82a72 2841 original_infodict = info_dict.get('__original_infodict') or {}
2842 original_infodict.update(extra)
2843 original_infodict.pop('__post_extractor', None)
2844
b050d210 2845 actual_post_extract(info_dict or {})
277d6ff5 2846
56d868db 2847 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2848 info = dict(ie_info)
56d868db 2849 info['__files_to_move'] = files_to_move or {}
2850 for pp in self._pps[key]:
dcf64d43 2851 info = self.run_pp(pp, info)
56d868db 2852 return info, info.pop('__files_to_move', None)
5bfa4862 2853
dcf64d43 2854 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2855 """Run all the postprocessors on the given file."""
2856 info = dict(ie_info)
2857 info['filepath'] = filename
dcf64d43 2858 info['__files_to_move'] = files_to_move or {}
0202b52a 2859
56d868db 2860 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2861 info = self.run_pp(pp, info)
2862 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2863 del info['__files_to_move']
56d868db 2864 for pp in self._pps['after_move']:
dcf64d43 2865 info = self.run_pp(pp, info)
23c1a667 2866 return info
c1c9a79c 2867
5db07df6 2868 def _make_archive_id(self, info_dict):
e9fef7ee
S
2869 video_id = info_dict.get('id')
2870 if not video_id:
2871 return
5db07df6
PH
2872 # Future-proof against any change in case
2873 # and backwards compatibility with prior versions
e9fef7ee 2874 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2875 if extractor is None:
1211bb6d
S
2876 url = str_or_none(info_dict.get('url'))
2877 if not url:
2878 return
e9fef7ee
S
2879 # Try to find matching extractor for the URL and take its ie_key
2880 for ie in self._ies:
1211bb6d 2881 if ie.suitable(url):
e9fef7ee
S
2882 extractor = ie.ie_key()
2883 break
2884 else:
2885 return
d0757229 2886 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2887
2888 def in_download_archive(self, info_dict):
2889 fn = self.params.get('download_archive')
2890 if fn is None:
2891 return False
2892
2893 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2894 if not vid_id:
7012b23c 2895 return False # Incomplete video information
5db07df6 2896
a45e8619 2897 return vid_id in self.archive
c1c9a79c
PH
2898
2899 def record_download_archive(self, info_dict):
2900 fn = self.params.get('download_archive')
2901 if fn is None:
2902 return
5db07df6
PH
2903 vid_id = self._make_archive_id(info_dict)
2904 assert vid_id
c1c9a79c 2905 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2906 archive_file.write(vid_id + '\n')
a45e8619 2907 self.archive.add(vid_id)
dd82ffea 2908
8c51aa65 2909 @staticmethod
8abeeb94 2910 def format_resolution(format, default='unknown'):
fb04e403
PH
2911 if format.get('vcodec') == 'none':
2912 return 'audio only'
f49d89ee
PH
2913 if format.get('resolution') is not None:
2914 return format['resolution']
35615307
DA
2915 if format.get('width') and format.get('height'):
2916 res = '%dx%d' % (format['width'], format['height'])
2917 elif format.get('height'):
2918 res = '%sp' % format['height']
2919 elif format.get('width'):
388ae76b 2920 res = '%dx?' % format['width']
8c51aa65 2921 else:
8abeeb94 2922 res = default
8c51aa65
JMF
2923 return res
2924
c57f7757
PH
2925 def _format_note(self, fdict):
2926 res = ''
2927 if fdict.get('ext') in ['f4f', 'f4m']:
2928 res += '(unsupported) '
32f90364
PH
2929 if fdict.get('language'):
2930 if res:
2931 res += ' '
9016d76f 2932 res += '[%s] ' % fdict['language']
c57f7757
PH
2933 if fdict.get('format_note') is not None:
2934 res += fdict['format_note'] + ' '
2935 if fdict.get('tbr') is not None:
2936 res += '%4dk ' % fdict['tbr']
2937 if fdict.get('container') is not None:
2938 if res:
2939 res += ', '
2940 res += '%s container' % fdict['container']
3089bc74
S
2941 if (fdict.get('vcodec') is not None
2942 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2943 if res:
2944 res += ', '
2945 res += fdict['vcodec']
91c7271a 2946 if fdict.get('vbr') is not None:
c57f7757
PH
2947 res += '@'
2948 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2949 res += 'video@'
2950 if fdict.get('vbr') is not None:
2951 res += '%4dk' % fdict['vbr']
fbb21cf5 2952 if fdict.get('fps') is not None:
5d583bdf
S
2953 if res:
2954 res += ', '
2955 res += '%sfps' % fdict['fps']
c57f7757
PH
2956 if fdict.get('acodec') is not None:
2957 if res:
2958 res += ', '
2959 if fdict['acodec'] == 'none':
2960 res += 'video only'
2961 else:
2962 res += '%-5s' % fdict['acodec']
2963 elif fdict.get('abr') is not None:
2964 if res:
2965 res += ', '
2966 res += 'audio'
2967 if fdict.get('abr') is not None:
2968 res += '@%3dk' % fdict['abr']
2969 if fdict.get('asr') is not None:
2970 res += ' (%5dHz)' % fdict['asr']
2971 if fdict.get('filesize') is not None:
2972 if res:
2973 res += ', '
2974 res += format_bytes(fdict['filesize'])
9732d77e
PH
2975 elif fdict.get('filesize_approx') is not None:
2976 if res:
2977 res += ', '
2978 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2979 return res
91c7271a 2980
76d321f6 2981 def _format_note_table(self, f):
2982 def join_fields(*vargs):
2983 return ', '.join((val for val in vargs if val != ''))
2984
2985 return join_fields(
2986 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2987 format_field(f, 'language', '[%s]'),
2988 format_field(f, 'format_note'),
2989 format_field(f, 'container', ignore=(None, f.get('ext'))),
2990 format_field(f, 'asr', '%5dHz'))
2991
c57f7757 2992 def list_formats(self, info_dict):
94badb25 2993 formats = info_dict.get('formats', [info_dict])
53ed7066 2994 new_format = (
2995 'list-formats' not in self.params.get('compat_opts', [])
2996 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 2997 if new_format:
2998 table = [
2999 [
3000 format_field(f, 'format_id'),
3001 format_field(f, 'ext'),
3002 self.format_resolution(f),
3003 format_field(f, 'fps', '%d'),
3004 '|',
3005 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3006 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3007 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3008 '|',
3009 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3010 format_field(f, 'vbr', '%4dk'),
3011 format_field(f, 'acodec', default='unknown').replace('none', ''),
3012 format_field(f, 'abr', '%3dk'),
3013 format_field(f, 'asr', '%5dHz'),
3014 self._format_note_table(f)]
3015 for f in formats
3016 if f.get('preference') is None or f['preference'] >= -1000]
3017 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3018 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3019 else:
3020 table = [
3021 [
3022 format_field(f, 'format_id'),
3023 format_field(f, 'ext'),
3024 self.format_resolution(f),
3025 self._format_note(f)]
3026 for f in formats
3027 if f.get('preference') is None or f['preference'] >= -1000]
3028 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3029
cfb56d1a 3030 self.to_screen(
76d321f6 3031 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3032 header_line,
3033 table,
3034 delim=new_format,
3035 extraGap=(0 if new_format else 1),
3036 hideEmpty=new_format)))
cfb56d1a
PH
3037
3038 def list_thumbnails(self, info_dict):
3039 thumbnails = info_dict.get('thumbnails')
3040 if not thumbnails:
b7b72db9 3041 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3042 return
cfb56d1a
PH
3043
3044 self.to_screen(
3045 '[info] Thumbnails for %s:' % info_dict['id'])
3046 self.to_screen(render_table(
3047 ['ID', 'width', 'height', 'URL'],
3048 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3049
360e1ca5 3050 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3051 if not subtitles:
360e1ca5 3052 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3053 return
a504ced0 3054 self.to_screen(
edab9dbf 3055 'Available %s for %s:' % (name, video_id))
2412044c 3056
3057 def _row(lang, formats):
7aee40c1 3058 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
2412044c 3059 if len(set(names)) == 1:
7aee40c1 3060 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3061 return [lang, ', '.join(names), ', '.join(exts)]
3062
edab9dbf 3063 self.to_screen(render_table(
2412044c 3064 ['Language', 'Name', 'Formats'],
3065 [_row(lang, formats) for lang, formats in subtitles.items()],
3066 hideEmpty=True))
a504ced0 3067
dca08720
PH
3068 def urlopen(self, req):
3069 """ Start an HTTP download """
82d8a8b6 3070 if isinstance(req, compat_basestring):
67dda517 3071 req = sanitized_Request(req)
19a41fc6 3072 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3073
3074 def print_debug_header(self):
3075 if not self.params.get('verbose'):
3076 return
62fec3b2 3077
4192b51c 3078 if type('') is not compat_str:
067aa17e 3079 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3080 self.report_warning(
3081 'Your Python is broken! Update to a newer and supported version')
3082
c6afed48
PH
3083 stdout_encoding = getattr(
3084 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3085 encoding_str = (
734f90bb
PH
3086 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3087 locale.getpreferredencoding(),
3088 sys.getfilesystemencoding(),
c6afed48 3089 stdout_encoding,
b0472057 3090 self.get_encoding()))
4192b51c 3091 write_string(encoding_str, encoding=None)
734f90bb 3092
e5813e53 3093 source = (
3094 '(exe)' if hasattr(sys, 'frozen')
3095 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3096 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3097 else '')
3098 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3099 if _LAZY_LOADER:
f74980cb 3100 self._write_string('[debug] Lazy loading extractors enabled\n')
3101 if _PLUGIN_CLASSES:
3102 self._write_string(
3103 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3104 if self.params.get('compat_opts'):
3105 self._write_string(
3106 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3107 try:
3108 sp = subprocess.Popen(
3109 ['git', 'rev-parse', '--short', 'HEAD'],
3110 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3111 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3112 out, err = process_communicate_or_kill(sp)
dca08720
PH
3113 out = out.decode().strip()
3114 if re.match('[0-9a-f]+', out):
f74980cb 3115 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3116 except Exception:
dca08720
PH
3117 try:
3118 sys.exc_clear()
70a1165b 3119 except Exception:
dca08720 3120 pass
b300cda4
S
3121
3122 def python_implementation():
3123 impl_name = platform.python_implementation()
3124 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3125 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3126 return impl_name
3127
e5813e53 3128 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3129 platform.python_version(),
3130 python_implementation(),
3131 platform.architecture()[0],
b300cda4 3132 platform_name()))
d28b5171 3133
73fac4e9 3134 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3135 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3136 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3137 exe_str = ', '.join(
3138 '%s %s' % (exe, v)
3139 for exe, v in sorted(exe_versions.items())
3140 if v
3141 )
3142 if not exe_str:
3143 exe_str = 'none'
3144 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3145
3146 proxy_map = {}
3147 for handler in self._opener.handlers:
3148 if hasattr(handler, 'proxies'):
3149 proxy_map.update(handler.proxies)
734f90bb 3150 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3151
58b1f00d
PH
3152 if self.params.get('call_home', False):
3153 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3154 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3155 return
58b1f00d
PH
3156 latest_version = self.urlopen(
3157 'https://yt-dl.org/latest/version').read().decode('utf-8')
3158 if version_tuple(latest_version) > version_tuple(__version__):
3159 self.report_warning(
3160 'You are using an outdated version (newest version: %s)! '
3161 'See https://yt-dl.org/update if you need help updating.' %
3162 latest_version)
3163
e344693b 3164 def _setup_opener(self):
6ad14cab 3165 timeout_val = self.params.get('socket_timeout')
19a41fc6 3166 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3167
dca08720
PH
3168 opts_cookiefile = self.params.get('cookiefile')
3169 opts_proxy = self.params.get('proxy')
3170
3171 if opts_cookiefile is None:
3172 self.cookiejar = compat_cookiejar.CookieJar()
3173 else:
590bc6f6 3174 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3175 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3176 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3177 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3178
6a3f4c3f 3179 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3180 if opts_proxy is not None:
3181 if opts_proxy == '':
3182 proxies = {}
3183 else:
3184 proxies = {'http': opts_proxy, 'https': opts_proxy}
3185 else:
3186 proxies = compat_urllib_request.getproxies()
067aa17e 3187 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3188 if 'http' in proxies and 'https' not in proxies:
3189 proxies['https'] = proxies['http']
91410c9b 3190 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3191
3192 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3193 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3194 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3195 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3196 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3197
3198 # When passing our own FileHandler instance, build_opener won't add the
3199 # default FileHandler and allows us to disable the file protocol, which
3200 # can be used for malicious purposes (see
067aa17e 3201 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3202 file_handler = compat_urllib_request.FileHandler()
3203
3204 def file_open(*args, **kwargs):
7a5c1cfe 3205 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3206 file_handler.file_open = file_open
3207
3208 opener = compat_urllib_request.build_opener(
fca6dba8 3209 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3210
dca08720
PH
3211 # Delete the default user-agent header, which would otherwise apply in
3212 # cases where our custom HTTP handler doesn't come into play
067aa17e 3213 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3214 opener.addheaders = []
3215 self._opener = opener
62fec3b2
PH
3216
3217 def encode(self, s):
3218 if isinstance(s, bytes):
3219 return s # Already encoded
3220
3221 try:
3222 return s.encode(self.get_encoding())
3223 except UnicodeEncodeError as err:
3224 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3225 raise
3226
3227 def get_encoding(self):
3228 encoding = self.params.get('encoding')
3229 if encoding is None:
3230 encoding = preferredencoding()
3231 return encoding
ec82d85a 3232
de6000d9 3233 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3234 write_all = self.params.get('write_all_thumbnails', False)
3235 thumbnails = []
3236 if write_all or self.params.get('writethumbnail', False):
0202b52a 3237 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3238 multiple = write_all and len(thumbnails) > 1
ec82d85a 3239
0202b52a 3240 ret = []
6c4fd172 3241 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3242 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3243 suffix = '%s.' % t['id'] if multiple else ''
3244 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3245 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3246
0c3d0f51 3247 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3248 ret.append(suffix + thumb_ext)
ec82d85a
PH
3249 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3250 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3251 else:
5ef7d9bd 3252 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3253 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3254 try:
3255 uf = self.urlopen(t['url'])
d3d89c32 3256 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3257 shutil.copyfileobj(uf, thumbf)
de6000d9 3258 ret.append(suffix + thumb_ext)
ec82d85a
PH
3259 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3260 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3261 t['filepath'] = thumb_filename
3158150c 3262 except network_exceptions as err:
ec82d85a 3263 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3264 (t['url'], error_to_compat_str(err)))
6c4fd172 3265 if ret and not write_all:
3266 break
0202b52a 3267 return ret