]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[utils] Add `__getitem__` for `PagedList`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
ce02ed60 51 determine_ext,
b5559424 52 determine_protocol,
732044af 53 DOT_DESKTOP_LINK_TEMPLATE,
54 DOT_URL_LINK_TEMPLATE,
55 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 56 DownloadError,
c0384f22 57 encode_compat_str,
ce02ed60 58 encodeFilename,
498f5606 59 EntryNotInPlaylist,
a06916d9 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
e29663c6 64 float_or_none,
02dbf93f 65 format_bytes,
76d321f6 66 format_field,
143db31d 67 FORMAT_RE,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
ce02ed60 73 locked_file,
0202b52a 74 make_dir,
dca08720 75 make_HTTPS_handler,
ce02ed60 76 MaxDownloadsReached,
3158150c 77 network_exceptions,
cd6fc19e 78 orderedSet,
a06916d9 79 OUTTMPL_TYPES,
b7ab0590 80 PagedList,
083c9df9 81 parse_filesize,
91410c9b 82 PerRequestProxyHandler,
dca08720 83 platform_name,
eedb7ba5 84 PostProcessingError,
ce02ed60 85 preferredencoding,
eedb7ba5 86 prepend_extension,
a06916d9 87 process_communicate_or_kill,
e8e73840 88 random_uuidv4,
51fb4995 89 register_socks_protocols,
a06916d9 90 RejectedVideoReached,
cfb56d1a 91 render_table,
eedb7ba5 92 replace_extension,
ce02ed60
PH
93 SameFileError,
94 sanitize_filename,
1bb5c511 95 sanitize_path,
dcf77cf1 96 sanitize_url,
67dda517 97 sanitized_Request,
e5660ee6 98 std_headers,
1211bb6d 99 str_or_none,
e29663c6 100 strftime_or_none,
ce02ed60 101 subtitles_filename,
732044af 102 to_high_limit_path,
a439a3a4 103 traverse_dict,
ce02ed60 104 UnavailableVideoError,
29eb5174 105 url_basename,
58b1f00d 106 version_tuple,
ce02ed60
PH
107 write_json_file,
108 write_string,
1bab3437 109 YoutubeDLCookieJar,
6a3f4c3f 110 YoutubeDLCookieProcessor,
dca08720 111 YoutubeDLHandler,
fca6dba8 112 YoutubeDLRedirectHandler,
ce02ed60 113)
a0e07d31 114from .cache import Cache
52a8a1e1 115from .extractor import (
116 gen_extractor_classes,
117 get_info_extractor,
118 _LAZY_LOADER,
119 _PLUGIN_CLASSES
120)
4c54b89e 121from .extractor.openload import PhantomJSwrapper
52a8a1e1 122from .downloader import (
123 get_suitable_downloader,
124 shorten_protocol_name
125)
4c83c967 126from .downloader.rtmp import rtmpdump_version
4f026faf 127from .postprocessor import (
f17f8651 128 FFmpegFixupM3u8PP,
62cd676c 129 FFmpegFixupM4aPP,
6271f1ca 130 FFmpegFixupStretchedPP,
4f026faf
PH
131 FFmpegMergerPP,
132 FFmpegPostProcessor,
0202b52a 133 # FFmpegSubtitlesConvertorPP,
4f026faf 134 get_postprocessor,
0202b52a 135 MoveFilesAfterDownloadPP,
4f026faf 136)
dca08720 137from .version import __version__
8222d8de 138
e9c0cdd3
YCH
139if compat_os_name == 'nt':
140 import ctypes
141
2459b6e1 142
8222d8de
JMF
143class YoutubeDL(object):
144 """YoutubeDL class.
145
146 YoutubeDL objects are the ones responsible of downloading the
147 actual video file and writing it to disk if the user has requested
148 it, among some other tasks. In most cases there should be one per
149 program. As, given a video URL, the downloader doesn't know how to
150 extract all the needed information, task that InfoExtractors do, it
151 has to pass the URL to one of them.
152
153 For this, YoutubeDL objects have a method that allows
154 InfoExtractors to be registered in a given order. When it is passed
155 a URL, the YoutubeDL object handles it to the first InfoExtractor it
156 finds that reports being able to handle it. The InfoExtractor extracts
157 all the information about the video or videos the URL refers to, and
158 YoutubeDL process the extracted information, possibly using a File
159 Downloader to download the video.
160
161 YoutubeDL objects accept a lot of parameters. In order not to saturate
162 the object constructor with arguments, it receives a dictionary of
163 options instead. These options are available through the params
164 attribute for the InfoExtractors to use. The YoutubeDL also
165 registers itself as the downloader in charge for the InfoExtractors
166 that are added to it, so this is a "mutual registration".
167
168 Available options:
169
170 username: Username for authentication purposes.
171 password: Password for authentication purposes.
180940e0 172 videopassword: Password for accessing a video.
1da50aa3
S
173 ap_mso: Adobe Pass multiple-system operator identifier.
174 ap_username: Multiple-system operator account username.
175 ap_password: Multiple-system operator account password.
8222d8de
JMF
176 usenetrc: Use netrc for authentication instead.
177 verbose: Print additional info to stdout.
178 quiet: Do not print messages to stdout.
ad8915b7 179 no_warnings: Do not print out anything for warnings.
53c18592 180 forceprint: A list of templates to force print
181 forceurl: Force printing final URL. (Deprecated)
182 forcetitle: Force printing title. (Deprecated)
183 forceid: Force printing ID. (Deprecated)
184 forcethumbnail: Force printing thumbnail URL. (Deprecated)
185 forcedescription: Force printing description. (Deprecated)
186 forcefilename: Force printing final filename. (Deprecated)
187 forceduration: Force printing duration. (Deprecated)
8694c600 188 forcejson: Force printing info_dict as JSON.
63e0be34
PH
189 dump_single_json: Force printing the info_dict of the whole playlist
190 (or video) as a single JSON line.
c25228e5 191 force_write_download_archive: Force writing download archive regardless
192 of 'skip_download' or 'simulate'.
8222d8de 193 simulate: Do not download the video files.
eb8a4433 194 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 195 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 196 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
197 extracting metadata even if the video is not actually
198 available for download (experimental)
c25228e5 199 format_sort: How to sort the video formats. see "Sorting Formats"
200 for more details.
201 format_sort_force: Force the given format_sort. see "Sorting Formats"
202 for more details.
203 allow_multiple_video_streams: Allow multiple video streams to be merged
204 into a single file
205 allow_multiple_audio_streams: Allow multiple audio streams to be merged
206 into a single file
4524baf0 207 paths: Dictionary of output paths. The allowed keys are 'home'
208 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 209 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 210 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
211 A string a also accepted for backward compatibility
a820dc72
RA
212 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
213 restrictfilenames: Do not allow "&" and spaces in file names
214 trim_file_name: Limit length of filename (extension excluded)
4524baf0 215 windowsfilenames: Force the filenames to be windows compatible
a820dc72 216 ignoreerrors: Do not stop on download errors
7a5c1cfe 217 (Default True when running yt-dlp,
a820dc72 218 but False when directly accessing YoutubeDL class)
26e2805c 219 skip_playlist_after_errors: Number of allowed failures until the rest of
220 the playlist is skipped
d22dec74 221 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 222 overwrites: Overwrite all video and metadata files if True,
223 overwrite only non-video files if None
224 and don't overwrite any file if False
8222d8de
JMF
225 playliststart: Playlist item to start at.
226 playlistend: Playlist item to end at.
c14e88f0 227 playlist_items: Specific indices of playlist to download.
ff815fe6 228 playlistreverse: Download playlist items in reverse order.
75822ca7 229 playlistrandom: Download playlist items in random order.
8222d8de
JMF
230 matchtitle: Download only matching titles.
231 rejecttitle: Reject downloads for matching titles.
8bf9319e 232 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
233 logtostderr: Log messages to stderr instead of stdout.
234 writedescription: Write the video description to a .description file
235 writeinfojson: Write the video description to a .info.json file
75d43ca0 236 clean_infojson: Remove private fields from the infojson
06167fbb 237 writecomments: Extract video comments. This will not be written to disk
238 unless writeinfojson is also given
1fb07d10 239 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 240 writethumbnail: Write the thumbnail image to a file
c25228e5 241 allow_playlist_files: Whether to write playlists' description, infojson etc
242 also to disk when using the 'write*' options
ec82d85a 243 write_all_thumbnails: Write all thumbnail formats to files
732044af 244 writelink: Write an internet shortcut file, depending on the
245 current platform (.url/.webloc/.desktop)
246 writeurllink: Write a Windows internet shortcut file (.url)
247 writewebloclink: Write a macOS internet shortcut file (.webloc)
248 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 249 writesubtitles: Write the video subtitles to a file
741dd8ea 250 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 251 allsubtitles: Deprecated - Use subtitlelangs = ['all']
252 Downloads all the subtitles of the video
0b7f3118 253 (requires writesubtitles or writeautomaticsub)
8222d8de 254 listsubtitles: Lists all available subtitles for the video
a504ced0 255 subtitlesformat: The format code for subtitles
c32b0aab 256 subtitleslangs: List of languages of the subtitles to download (can be regex).
257 The list may contain "all" to refer to all the available
258 subtitles. The language can be prefixed with a "-" to
259 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
260 keepvideo: Keep the video file after post-processing
261 daterange: A DateRange object, download only if the upload_date is in the range.
262 skip_download: Skip the actual download of the video file
c35f9e72 263 cachedir: Location of the cache files in the filesystem.
a0e07d31 264 False to disable filesystem cache.
47192f92 265 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
266 age_limit: An integer representing the user's age in years.
267 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
268 min_views: An integer representing the minimum view count the video
269 must have in order to not be skipped.
270 Videos without view count information are always
271 downloaded. None for no limit.
272 max_views: An integer representing the maximum view count.
273 Videos that are more popular than that are not
274 downloaded.
275 Videos without view count information are always
276 downloaded. None for no limit.
277 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
278 Videos already present in the file are not downloaded
279 again.
8a51f564 280 break_on_existing: Stop the download process after attempting to download a
281 file that is in the archive.
282 break_on_reject: Stop the download process when encountering a video that
283 has been filtered out.
284 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 285 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
286 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
287 At the moment, this is only supported by YouTube.
a1ee09e8 288 proxy: URL of the proxy server to use
38cce791 289 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 290 on geo-restricted sites.
e344693b 291 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
292 bidi_workaround: Work around buggy terminals without bidirectional text
293 support, using fridibi
a0ddb8a2 294 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 295 include_ads: Download ads as well
04b4d394
PH
296 default_search: Prepend this string if an input url is not valid.
297 'auto' for elaborate guessing
62fec3b2 298 encoding: Use this encoding instead of the system-specified.
e8ee972c 299 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
300 Pass in 'in_playlist' to only show this behavior for
301 playlist items.
4f026faf 302 postprocessors: A list of dictionaries, each with an entry
71b640cc 303 * key: The name of the postprocessor. See
7a5c1cfe 304 yt_dlp/postprocessor/__init__.py for a list.
56d868db 305 * when: When to run the postprocessor. Can be one of
306 pre_process|before_dl|post_process|after_move.
307 Assumed to be 'post_process' if not given
ab8e5e51
AM
308 post_hooks: A list of functions that get called as the final step
309 for each video file, after all postprocessors have been
310 called. The filename will be passed as the only argument.
71b640cc
PH
311 progress_hooks: A list of functions that get called on download
312 progress, with a dictionary with the entries
5cda4eda 313 * status: One of "downloading", "error", or "finished".
ee69b99a 314 Check this first and ignore unknown values.
71b640cc 315
5cda4eda 316 If status is one of "downloading", or "finished", the
ee69b99a
PH
317 following properties may also be present:
318 * filename: The final filename (always present)
5cda4eda 319 * tmpfilename: The filename we're currently writing to
71b640cc
PH
320 * downloaded_bytes: Bytes on disk
321 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
322 * total_bytes_estimate: Guess of the eventual file size,
323 None if unavailable.
324 * elapsed: The number of seconds since download started.
71b640cc
PH
325 * eta: The estimated time in seconds, None if unknown
326 * speed: The download speed in bytes/second, None if
327 unknown
5cda4eda
PH
328 * fragment_index: The counter of the currently
329 downloaded video fragment.
330 * fragment_count: The number of fragments (= individual
331 files that will be merged)
71b640cc
PH
332
333 Progress hooks are guaranteed to be called at least once
334 (with status "finished") if the download is successful.
45598f15 335 merge_output_format: Extension to use when merging formats.
6b591b29 336 final_ext: Expected final extension; used to detect when the file was
337 already downloaded and converted. "merge_output_format" is
338 replaced by this extension when given
6271f1ca
PH
339 fixup: Automatically correct known faults of the file.
340 One of:
341 - "never": do nothing
342 - "warn": only emit a warning
343 - "detect_or_warn": check whether we can do anything
62cd676c 344 about it, warn otherwise (default)
504f20dd 345 source_address: Client-side IP address to bind to.
6ec6cb4e 346 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 347 yt-dlp servers for debugging. (BROKEN)
1cf376f5 348 sleep_interval_requests: Number of seconds to sleep between requests
349 during extraction
7aa589a5
S
350 sleep_interval: Number of seconds to sleep before each download when
351 used alone or a lower bound of a range for randomized
352 sleep before each download (minimum possible number
353 of seconds to sleep) when used along with
354 max_sleep_interval.
355 max_sleep_interval:Upper bound of a range for randomized sleep before each
356 download (maximum possible number of seconds to sleep).
357 Must only be used along with sleep_interval.
358 Actual sleep time will be a random float from range
359 [sleep_interval; max_sleep_interval].
1cf376f5 360 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
361 listformats: Print an overview of available video formats and exit.
362 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
363 match_filter: A function that gets called with the info_dict of
364 every video.
365 If it returns a message, the video is ignored.
366 If it returns None, the video is downloaded.
367 match_filter_func in utils.py is one example for this.
7e5db8c9 368 no_color: Do not emit color codes in output.
0a840f58 369 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 370 HTTP header
0a840f58 371 geo_bypass_country:
773f291d
S
372 Two-letter ISO 3166-2 country code that will be used for
373 explicit geographic restriction bypassing via faking
504f20dd 374 X-Forwarded-For HTTP header
5f95927a
S
375 geo_bypass_ip_block:
376 IP range in CIDR notation that will be used similarly to
504f20dd 377 geo_bypass_country
71b640cc 378
85729c51 379 The following options determine which downloader is picked:
52a8a1e1 380 external_downloader: A dictionary of protocol keys and the executable of the
381 external downloader to use for it. The allowed protocols
382 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
383 Set the value to 'native' to use the native downloader
384 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
385 or {'m3u8': 'ffmpeg'} instead.
386 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
387 if True, otherwise use ffmpeg/avconv if False, otherwise
388 use downloader suggested by extractor if None.
53ed7066 389 compat_opts: Compatibility options. See "Differences in default behavior".
18e674b4 390 Note that only format-sort, format-spec, no-live-chat,
391 no-attach-info-json, playlist-index, list-formats,
392 no-direct-merge, no-youtube-channel-redirect,
53ed7066 393 and no-youtube-unavailable-videos works when used via the API
fe7e0c98 394
8222d8de 395 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 396 the downloader (see yt_dlp/downloader/common.py):
8222d8de 397 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 398 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 399 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 400 http_chunk_size.
76b1bd67
JMF
401
402 The following options are used by the post processors:
d4a24f40 403 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 404 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
405 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
406 to the binary or its containing directory.
43820c03 407 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
408 and a list of additional command-line arguments for the
409 postprocessor/executable. The dict can also have "PP+EXE" keys
410 which are used when the given exe is used by the given PP.
411 Use 'default' as the name for arguments to passed to all PP
e409895f 412
413 The following options are used by the extractors:
62bff2c1 414 extractor_retries: Number of times to retry for known errors
415 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 416 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 417 discontinuities such as ad breaks (default: False)
3600fd59 418 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 419 data will be downloaded and processed by extractor.
420 You can reduce network I/O by disabling it if you don't
421 care about DASH. (only for youtube)
e409895f 422 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 423 data will be downloaded and processed by extractor.
424 You can reduce network I/O by disabling it if you don't
425 care about HLS. (only for youtube)
8222d8de
JMF
426 """
427
c9969434
S
428 _NUMERIC_FIELDS = set((
429 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
430 'timestamp', 'upload_year', 'upload_month', 'upload_day',
431 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
432 'average_rating', 'comment_count', 'age_limit',
433 'start_time', 'end_time',
434 'chapter_number', 'season_number', 'episode_number',
435 'track_number', 'disc_number', 'release_year',
436 'playlist_index',
437 ))
438
8222d8de
JMF
439 params = None
440 _ies = []
56d868db 441 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 442 __prepare_filename_warned = False
1cf376f5 443 _first_webpage_request = True
8222d8de
JMF
444 _download_retcode = None
445 _num_downloads = None
30a074c2 446 _playlist_level = 0
447 _playlist_urls = set()
8222d8de
JMF
448 _screen_file = None
449
3511266b 450 def __init__(self, params=None, auto_init=True):
8222d8de 451 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
452 if params is None:
453 params = {}
8222d8de 454 self._ies = []
56c73665 455 self._ies_instances = {}
56d868db 456 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 457 self.__prepare_filename_warned = False
1cf376f5 458 self._first_webpage_request = True
ab8e5e51 459 self._post_hooks = []
933605d7 460 self._progress_hooks = []
8222d8de
JMF
461 self._download_retcode = 0
462 self._num_downloads = 0
463 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 464 self._err_file = sys.stderr
4abf617b
S
465 self.params = {
466 # Default parameters
467 'nocheckcertificate': False,
468 }
469 self.params.update(params)
a0e07d31 470 self.cache = Cache(self)
34308b30 471
a61f4b28 472 if sys.version_info < (3, 6):
473 self.report_warning(
474 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
475 'Update to Python 3.6 or above' % sys.version_info[:2])
476
be5df5ee
S
477 def check_deprecated(param, option, suggestion):
478 if self.params.get(param) is not None:
53ed7066 479 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
480 return True
481 return False
482
483 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
484 if self.params.get('geo_verification_proxy') is None:
485 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
486
0d1bb027 487 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
488 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 489 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 490
491 for msg in self.params.get('warnings', []):
492 self.report_warning(msg)
493
6b591b29 494 if self.params.get('final_ext'):
495 if self.params.get('merge_output_format'):
496 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
497 self.params['merge_output_format'] = self.params['final_ext']
498
b9d973be 499 if 'overwrites' in self.params and self.params['overwrites'] is None:
500 del self.params['overwrites']
501
0783b09b 502 if params.get('bidi_workaround', False):
1c088fa8
PH
503 try:
504 import pty
505 master, slave = pty.openpty()
003c69a8 506 width = compat_get_terminal_size().columns
1c088fa8
PH
507 if width is None:
508 width_args = []
509 else:
510 width_args = ['-w', str(width)]
5d681e96 511 sp_kwargs = dict(
1c088fa8
PH
512 stdin=subprocess.PIPE,
513 stdout=slave,
514 stderr=self._err_file)
5d681e96
PH
515 try:
516 self._output_process = subprocess.Popen(
517 ['bidiv'] + width_args, **sp_kwargs
518 )
519 except OSError:
5d681e96
PH
520 self._output_process = subprocess.Popen(
521 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
522 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 523 except OSError as ose:
66e7ace1 524 if ose.errno == errno.ENOENT:
6febd1c1 525 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
526 else:
527 raise
0783b09b 528
3089bc74
S
529 if (sys.platform != 'win32'
530 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
531 and not params.get('restrictfilenames', False)):
e9137224 532 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 533 self.report_warning(
6febd1c1 534 'Assuming --restrict-filenames since file system encoding '
1b725173 535 'cannot encode all characters. '
6febd1c1 536 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 537 self.params['restrictfilenames'] = True
34308b30 538
de6000d9 539 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 540
dca08720
PH
541 self._setup_opener()
542
4cd0a709 543 """Preload the archive, if any is specified"""
544 def preload_download_archive(fn):
545 if fn is None:
546 return False
0760b0a7 547 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 548 try:
549 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
550 for line in archive_file:
551 self.archive.add(line.strip())
552 except IOError as ioe:
553 if ioe.errno != errno.ENOENT:
554 raise
555 return False
556 return True
557
558 self.archive = set()
559 preload_download_archive(self.params.get('download_archive'))
560
3511266b
PH
561 if auto_init:
562 self.print_debug_header()
563 self.add_default_info_extractors()
564
4f026faf
PH
565 for pp_def_raw in self.params.get('postprocessors', []):
566 pp_class = get_postprocessor(pp_def_raw['key'])
567 pp_def = dict(pp_def_raw)
568 del pp_def['key']
5bfa4862 569 if 'when' in pp_def:
570 when = pp_def['when']
571 del pp_def['when']
572 else:
56d868db 573 when = 'post_process'
4f026faf 574 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 575 self.add_post_processor(pp, when=when)
4f026faf 576
ab8e5e51
AM
577 for ph in self.params.get('post_hooks', []):
578 self.add_post_hook(ph)
579
71b640cc
PH
580 for ph in self.params.get('progress_hooks', []):
581 self.add_progress_hook(ph)
582
51fb4995
YCH
583 register_socks_protocols()
584
7d4111ed
PH
585 def warn_if_short_id(self, argv):
586 # short YouTube ID starting with dash?
587 idxs = [
588 i for i, a in enumerate(argv)
589 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
590 if idxs:
591 correct_argv = (
7a5c1cfe 592 ['yt-dlp']
3089bc74
S
593 + [a for i, a in enumerate(argv) if i not in idxs]
594 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
595 )
596 self.report_warning(
597 'Long argument string detected. '
598 'Use -- to separate parameters and URLs, like this:\n%s\n' %
599 args_to_str(correct_argv))
600
8222d8de
JMF
601 def add_info_extractor(self, ie):
602 """Add an InfoExtractor object to the end of the list."""
603 self._ies.append(ie)
e52d7f85
JMF
604 if not isinstance(ie, type):
605 self._ies_instances[ie.ie_key()] = ie
606 ie.set_downloader(self)
8222d8de 607
56c73665
JMF
608 def get_info_extractor(self, ie_key):
609 """
610 Get an instance of an IE with name ie_key, it will try to get one from
611 the _ies list, if there's no instance it will create a new one and add
612 it to the extractor list.
613 """
614 ie = self._ies_instances.get(ie_key)
615 if ie is None:
616 ie = get_info_extractor(ie_key)()
617 self.add_info_extractor(ie)
618 return ie
619
023fa8c4
JMF
620 def add_default_info_extractors(self):
621 """
622 Add the InfoExtractors returned by gen_extractors to the end of the list
623 """
e52d7f85 624 for ie in gen_extractor_classes():
023fa8c4
JMF
625 self.add_info_extractor(ie)
626
56d868db 627 def add_post_processor(self, pp, when='post_process'):
8222d8de 628 """Add a PostProcessor object to the end of the chain."""
5bfa4862 629 self._pps[when].append(pp)
8222d8de
JMF
630 pp.set_downloader(self)
631
ab8e5e51
AM
632 def add_post_hook(self, ph):
633 """Add the post hook"""
634 self._post_hooks.append(ph)
635
933605d7
JMF
636 def add_progress_hook(self, ph):
637 """Add the progress hook (currently only for the file downloader)"""
638 self._progress_hooks.append(ph)
8ab470f1 639
1c088fa8 640 def _bidi_workaround(self, message):
5d681e96 641 if not hasattr(self, '_output_channel'):
1c088fa8
PH
642 return message
643
5d681e96 644 assert hasattr(self, '_output_process')
11b85ce6 645 assert isinstance(message, compat_str)
6febd1c1
PH
646 line_count = message.count('\n') + 1
647 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 648 self._output_process.stdin.flush()
6febd1c1 649 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 650 for _ in range(line_count))
6febd1c1 651 return res[:-len('\n')]
1c088fa8 652
734f90bb 653 def _write_string(self, s, out=None):
b58ddb32 654 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 655
848887eb 656 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 657 """Print message to stdout"""
8bf9319e 658 if self.params.get('logger'):
43afe285 659 self.params['logger'].debug(message)
848887eb 660 elif not quiet:
1c088fa8 661 message = self._bidi_workaround(message)
6febd1c1 662 terminator = ['\n', ''][skip_eol]
8222d8de 663 output = message + terminator
1c088fa8 664
734f90bb 665 self._write_string(output, self._screen_file)
8222d8de
JMF
666
667 def to_stderr(self, message):
0760b0a7 668 """Print message to stderr"""
11b85ce6 669 assert isinstance(message, compat_str)
8bf9319e 670 if self.params.get('logger'):
43afe285
IB
671 self.params['logger'].error(message)
672 else:
1c088fa8 673 message = self._bidi_workaround(message)
6febd1c1 674 output = message + '\n'
734f90bb 675 self._write_string(output, self._err_file)
8222d8de 676
1e5b9a95
PH
677 def to_console_title(self, message):
678 if not self.params.get('consoletitle', False):
679 return
4bede0d8
C
680 if compat_os_name == 'nt':
681 if ctypes.windll.kernel32.GetConsoleWindow():
682 # c_wchar_p() might not be necessary if `message` is
683 # already of type unicode()
684 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 685 elif 'TERM' in os.environ:
b46696bd 686 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 687
bdde425c
PH
688 def save_console_title(self):
689 if not self.params.get('consoletitle', False):
690 return
94c3442e
S
691 if self.params.get('simulate', False):
692 return
4bede0d8 693 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 694 # Save the title on stack
734f90bb 695 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
696
697 def restore_console_title(self):
698 if not self.params.get('consoletitle', False):
699 return
94c3442e
S
700 if self.params.get('simulate', False):
701 return
4bede0d8 702 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 703 # Restore the title from stack
734f90bb 704 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
705
706 def __enter__(self):
707 self.save_console_title()
708 return self
709
710 def __exit__(self, *args):
711 self.restore_console_title()
f89197d7 712
dca08720 713 if self.params.get('cookiefile') is not None:
1bab3437 714 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 715
8222d8de
JMF
716 def trouble(self, message=None, tb=None):
717 """Determine action to take when a download problem appears.
718
719 Depending on if the downloader has been configured to ignore
720 download errors or not, this method may throw an exception or
721 not when errors are found, after printing the message.
722
723 tb, if given, is additional traceback information.
724 """
725 if message is not None:
726 self.to_stderr(message)
727 if self.params.get('verbose'):
728 if tb is None:
729 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 730 tb = ''
8222d8de 731 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 732 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 733 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
734 else:
735 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 736 tb = ''.join(tb_data)
c19bc311 737 if tb:
738 self.to_stderr(tb)
8222d8de
JMF
739 if not self.params.get('ignoreerrors', False):
740 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
741 exc_info = sys.exc_info()[1].exc_info
742 else:
743 exc_info = sys.exc_info()
744 raise DownloadError(message, exc_info)
745 self._download_retcode = 1
746
0760b0a7 747 def to_screen(self, message, skip_eol=False):
748 """Print message to stdout if not in quiet mode"""
749 self.to_stdout(
750 message, skip_eol, quiet=self.params.get('quiet', False))
751
8222d8de
JMF
752 def report_warning(self, message):
753 '''
754 Print the message to stderr, it will be prefixed with 'WARNING:'
755 If stderr is a tty file the 'WARNING:' will be colored
756 '''
6d07ce01
JMF
757 if self.params.get('logger') is not None:
758 self.params['logger'].warning(message)
8222d8de 759 else:
ad8915b7
PH
760 if self.params.get('no_warnings'):
761 return
e9c0cdd3 762 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
763 _msg_header = '\033[0;33mWARNING:\033[0m'
764 else:
765 _msg_header = 'WARNING:'
766 warning_message = '%s %s' % (_msg_header, message)
767 self.to_stderr(warning_message)
8222d8de
JMF
768
769 def report_error(self, message, tb=None):
770 '''
771 Do the same as trouble, but prefixes the message with 'ERROR:', colored
772 in red if stderr is a tty file.
773 '''
e9c0cdd3 774 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 775 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 776 else:
6febd1c1
PH
777 _msg_header = 'ERROR:'
778 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
779 self.trouble(error_message, tb)
780
0760b0a7 781 def write_debug(self, message):
782 '''Log debug message or Print message to stderr'''
783 if not self.params.get('verbose', False):
784 return
785 message = '[debug] %s' % message
786 if self.params.get('logger'):
787 self.params['logger'].debug(message)
788 else:
789 self._write_string('%s\n' % message)
790
8222d8de
JMF
791 def report_file_already_downloaded(self, file_name):
792 """Report file has already been fully downloaded."""
793 try:
6febd1c1 794 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 795 except UnicodeEncodeError:
6febd1c1 796 self.to_screen('[download] The file has already been downloaded')
8222d8de 797
0c3d0f51 798 def report_file_delete(self, file_name):
799 """Report that existing file will be deleted."""
800 try:
c25228e5 801 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 802 except UnicodeEncodeError:
c25228e5 803 self.to_screen('Deleting existing file')
0c3d0f51 804
de6000d9 805 def parse_outtmpl(self):
806 outtmpl_dict = self.params.get('outtmpl', {})
807 if not isinstance(outtmpl_dict, dict):
808 outtmpl_dict = {'default': outtmpl_dict}
809 outtmpl_dict.update({
810 k: v for k, v in DEFAULT_OUTTMPL.items()
811 if not outtmpl_dict.get(k)})
812 for key, val in outtmpl_dict.items():
813 if isinstance(val, bytes):
814 self.report_warning(
815 'Parameter outtmpl is bytes, but should be a unicode string. '
816 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
817 return outtmpl_dict
818
143db31d 819 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
820 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
821 template_dict = dict(info_dict)
a439a3a4 822 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 823
824 # duration_string
825 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 826 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 827 if info_dict.get('duration', None) is not None
828 else None)
829
830 # epoch
831 template_dict['epoch'] = int(time.time())
832
833 # autonumber
834 autonumber_size = self.params.get('autonumber_size')
835 if autonumber_size is None:
836 autonumber_size = 5
837 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
838
839 # resolution if not defined
840 if template_dict.get('resolution') is None:
841 if template_dict.get('width') and template_dict.get('height'):
842 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
843 elif template_dict.get('height'):
844 template_dict['resolution'] = '%sp' % template_dict['height']
845 elif template_dict.get('width'):
846 template_dict['resolution'] = '%dx?' % template_dict['width']
847
143db31d 848 # For fields playlist_index and autonumber convert all occurrences
849 # of %(field)s to %(field)0Nd for backward compatibility
850 field_size_compat_map = {
f59ae581 851 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
143db31d 852 'autonumber': autonumber_size,
853 }
854 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
855 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
856 if mobj:
857 outtmpl = re.sub(
858 FIELD_SIZE_COMPAT_RE,
859 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
860 outtmpl)
861
862 numeric_fields = list(self._NUMERIC_FIELDS)
a439a3a4 863 if sanitize is None:
864 sanitize = lambda k, v: v
143db31d 865
e625be0d 866 EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
867 # Field is of the form key1.key2...
868 # where keys (except first) can be string, int or slice
869 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
870 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
871 (?P<negate>-)?
872 (?P<fields>{0})
873 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
874 (?:>(?P<strf_format>.+?))?
875 (?:\|(?P<default>.*?))?
876 $'''.format(FIELD_RE))
877 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
878 MATH_FUNCTIONS = {
879 '+': float.__add__,
880 '-': float.__sub__,
881 }
882 for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
883 final_key = outer_mobj.group('key')
884 str_type = outer_mobj.group('type')
885 value = None
886 mobj = re.match(INTERNAL_FORMAT_RE, final_key)
887 if mobj is not None:
888 mobj = mobj.groupdict()
889 # Object traversal
890 fields = mobj['fields'].split('.')
891 value = traverse_dict(template_dict, fields)
892 # Negative
893 if mobj['negate']:
894 value = float_or_none(value)
895 if value is not None:
896 value *= -1
897 # Do maths
898 if mobj['maths']:
899 value = float_or_none(value)
900 operator = None
901 for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
902 if item == '':
903 value = None
904 if value is None:
905 break
906 if operator:
907 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
908 offset = float_or_none(item)
909 if offset is None:
910 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
911 try:
912 value = operator(value, multiplier * offset)
913 except (TypeError, ZeroDivisionError):
914 value = None
915 operator = None
916 else:
917 operator = MATH_FUNCTIONS[item]
918 # Datetime formatting
919 if mobj['strf_format']:
920 value = strftime_or_none(value, mobj['strf_format'])
921 # Set default
922 if value is None and mobj['default'] is not None:
923 value = mobj['default']
924 # Sanitize
925 if str_type in 'crs' and value is not None: # string
926 value = sanitize('%{}'.format(str_type) % fields[-1], value)
a439a3a4 927 else: # numeric
928 numeric_fields.append(final_key)
929 value = float_or_none(value)
143db31d 930 if value is not None:
a439a3a4 931 template_dict[final_key] = value
143db31d 932
933 # Missing numeric fields used together with integer presentation types
934 # in format specification will break the argument substitution since
935 # string NA placeholder is returned for missing fields. We will patch
936 # output template for missing fields to meet string presentation type.
937 for numeric_field in numeric_fields:
a439a3a4 938 if template_dict.get(numeric_field) is None:
143db31d 939 outtmpl = re.sub(
940 FORMAT_RE.format(re.escape(numeric_field)),
941 r'%({0})s'.format(numeric_field), outtmpl)
942
a439a3a4 943 template_dict = collections.defaultdict(lambda: na, (
944 (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
945 for k, v in template_dict.items() if v is not None))
143db31d 946 return outtmpl, template_dict
947
de6000d9 948 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 949 try:
586a91b6 950 sanitize = lambda k, v: sanitize_filename(
45598aab 951 compat_str(v),
1bb5c511 952 restricted=self.params.get('restrictfilenames'),
40df485f 953 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 954 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 955 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 956
15da37c7
S
957 # expand_path translates '%%' into '%' and '$$' into '$'
958 # correspondingly that is not what we want since we need to keep
959 # '%%' intact for template dict substitution step. Working around
960 # with boundary-alike separator hack.
961ea474 961 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
962 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
963
964 # outtmpl should be expand_path'ed before template dict substitution
965 # because meta fields may contain env variables we don't want to
966 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
967 # title "Hello $PATH", we don't want `$PATH` to be expanded.
968 filename = expand_path(outtmpl).replace(sep, '') % template_dict
969
143db31d 970 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 971 if force_ext is not None:
972 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
973
bdc3fd2f
U
974 # https://github.com/blackjack4494/youtube-dlc/issues/85
975 trim_file_name = self.params.get('trim_file_name', False)
976 if trim_file_name:
977 fn_groups = filename.rsplit('.')
978 ext = fn_groups[-1]
979 sub_ext = ''
980 if len(fn_groups) > 2:
981 sub_ext = fn_groups[-2]
982 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
983
0202b52a 984 return filename
8222d8de 985 except ValueError as err:
6febd1c1 986 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
987 return None
988
de6000d9 989 def prepare_filename(self, info_dict, dir_type='', warn=False):
990 """Generate the output filename."""
0202b52a 991 paths = self.params.get('paths', {})
992 assert isinstance(paths, dict)
de6000d9 993 filename = self._prepare_filename(info_dict, dir_type or 'default')
994
995 if warn and not self.__prepare_filename_warned:
996 if not paths:
997 pass
998 elif filename == '-':
999 self.report_warning('--paths is ignored when an outputting to stdout')
1000 elif os.path.isabs(filename):
1001 self.report_warning('--paths is ignored since an absolute path is given in output template')
1002 self.__prepare_filename_warned = True
1003 if filename == '-' or not filename:
1004 return filename
1005
0202b52a 1006 homepath = expand_path(paths.get('home', '').strip())
1007 assert isinstance(homepath, compat_str)
1008 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1009 assert isinstance(subdir, compat_str)
c2934512 1010 path = os.path.join(homepath, subdir, filename)
1011
1012 # Temporary fix for #4787
1013 # 'Treat' all problem characters by passing filename through preferredencoding
1014 # to workaround encoding issues with subprocess on python2 @ Windows
1015 if sys.version_info < (3, 0) and sys.platform == 'win32':
1016 path = encodeFilename(path, True).decode(preferredencoding())
1017 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 1018
c77495e3 1019 def _match_entry(self, info_dict, incomplete=False):
ecdec191 1020 """ Returns None if the file should be downloaded """
8222d8de 1021
c77495e3 1022 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1023
8b0d7497 1024 def check_filter():
8b0d7497 1025 if 'title' in info_dict:
1026 # This can happen when we're just evaluating the playlist
1027 title = info_dict['title']
1028 matchtitle = self.params.get('matchtitle', False)
1029 if matchtitle:
1030 if not re.search(matchtitle, title, re.IGNORECASE):
1031 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1032 rejecttitle = self.params.get('rejecttitle', False)
1033 if rejecttitle:
1034 if re.search(rejecttitle, title, re.IGNORECASE):
1035 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1036 date = info_dict.get('upload_date')
1037 if date is not None:
1038 dateRange = self.params.get('daterange', DateRange())
1039 if date not in dateRange:
1040 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1041 view_count = info_dict.get('view_count')
1042 if view_count is not None:
1043 min_views = self.params.get('min_views')
1044 if min_views is not None and view_count < min_views:
1045 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1046 max_views = self.params.get('max_views')
1047 if max_views is not None and view_count > max_views:
1048 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1049 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1050 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1051
1052 if not incomplete:
1053 match_filter = self.params.get('match_filter')
1054 if match_filter is not None:
1055 ret = match_filter(info_dict)
1056 if ret is not None:
1057 return ret
1058 return None
1059
c77495e3 1060 if self.in_download_archive(info_dict):
1061 reason = '%s has already been recorded in the archive' % video_title
1062 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1063 else:
1064 reason = check_filter()
1065 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1066 if reason is not None:
1067 self.to_screen('[download] ' + reason)
c77495e3 1068 if self.params.get(break_opt, False):
1069 raise break_err()
8b0d7497 1070 return reason
fe7e0c98 1071
b6c45014
JMF
1072 @staticmethod
1073 def add_extra_info(info_dict, extra_info):
1074 '''Set the keys from extra_info in info dict if they are missing'''
1075 for key, value in extra_info.items():
1076 info_dict.setdefault(key, value)
1077
58f197b7 1078 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1079 process=True, force_generic_extractor=False):
41d1cca3 1080 """
1081 Return a list with a dictionary for each video extracted.
1082
1083 Arguments:
1084 url -- URL to extract
1085
1086 Keyword arguments:
1087 download -- whether to download videos during extraction
1088 ie_key -- extractor key hint
1089 extra_info -- dictionary containing the extra values to add to each result
1090 process -- whether to resolve all unresolved references (URLs, playlist items),
1091 must be True for download to work.
1092 force_generic_extractor -- force using the generic extractor
1093 """
fe7e0c98 1094
61aa5ba3 1095 if not ie_key and force_generic_extractor:
d22dec74
S
1096 ie_key = 'Generic'
1097
8222d8de 1098 if ie_key:
56c73665 1099 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1100 else:
1101 ies = self._ies
1102
1103 for ie in ies:
1104 if not ie.suitable(url):
1105 continue
1106
9a68de12 1107 ie_key = ie.ie_key()
1108 ie = self.get_info_extractor(ie_key)
8222d8de 1109 if not ie.working():
6febd1c1
PH
1110 self.report_warning('The program functionality for this site has been marked as broken, '
1111 'and will probably not work.')
8222d8de
JMF
1112
1113 try:
d0757229 1114 temp_id = str_or_none(
63be1aab 1115 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1116 else ie._match_id(url))
a0566bbf 1117 except (AssertionError, IndexError, AttributeError):
1118 temp_id = None
1119 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1120 self.to_screen("[%s] %s: has already been recorded in archive" % (
1121 ie_key, temp_id))
1122 break
58f197b7 1123 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1124 else:
1125 self.report_error('no suitable InfoExtractor for URL %s' % url)
1126
1127 def __handle_extraction_exceptions(func):
1128 def wrapper(self, *args, **kwargs):
1129 try:
1130 return func(self, *args, **kwargs)
773f291d
S
1131 except GeoRestrictedError as e:
1132 msg = e.msg
1133 if e.countries:
1134 msg += '\nThis video is available in %s.' % ', '.join(
1135 map(ISO3166Utils.short2full, e.countries))
1136 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1137 self.report_error(msg)
fb043a6e 1138 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1139 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1140 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1141 raise
8222d8de
JMF
1142 except Exception as e:
1143 if self.params.get('ignoreerrors', False):
9b9c5355 1144 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1145 else:
1146 raise
a0566bbf 1147 return wrapper
1148
1149 @__handle_extraction_exceptions
58f197b7 1150 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1151 ie_result = ie.extract(url)
1152 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1153 return
1154 if isinstance(ie_result, list):
1155 # Backwards compatibility: old IE result format
1156 ie_result = {
1157 '_type': 'compat_list',
1158 'entries': ie_result,
1159 }
a0566bbf 1160 self.add_default_extra_info(ie_result, ie, url)
1161 if process:
1162 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1163 else:
a0566bbf 1164 return ie_result
fe7e0c98 1165
ea38e55f
PH
1166 def add_default_extra_info(self, ie_result, ie, url):
1167 self.add_extra_info(ie_result, {
1168 'extractor': ie.IE_NAME,
1169 'webpage_url': url,
1170 'webpage_url_basename': url_basename(url),
1171 'extractor_key': ie.ie_key(),
1172 })
1173
8222d8de
JMF
1174 def process_ie_result(self, ie_result, download=True, extra_info={}):
1175 """
1176 Take the result of the ie(may be modified) and resolve all unresolved
1177 references (URLs, playlist items).
1178
1179 It will also download the videos if 'download'.
1180 Returns the resolved ie_result.
1181 """
e8ee972c
PH
1182 result_type = ie_result.get('_type', 'video')
1183
057a5206 1184 if result_type in ('url', 'url_transparent'):
134c6ea8 1185 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1186 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1187 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1188 or extract_flat is True):
de6000d9 1189 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1190 return ie_result
1191
8222d8de 1192 if result_type == 'video':
b6c45014 1193 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1194 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1195 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1196 if additional_urls:
1197 # TODO: Improve MetadataFromFieldPP to allow setting a list
1198 if isinstance(additional_urls, compat_str):
1199 additional_urls = [additional_urls]
1200 self.to_screen(
1201 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1202 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1203 ie_result['additional_entries'] = [
1204 self.extract_info(
1205 url, download, extra_info,
1206 force_generic_extractor=self.params.get('force_generic_extractor'))
1207 for url in additional_urls
1208 ]
1209 return ie_result
8222d8de
JMF
1210 elif result_type == 'url':
1211 # We have to add extra_info to the results because it may be
1212 # contained in a playlist
07cce701 1213 return self.extract_info(
1214 ie_result['url'], download,
1215 ie_key=ie_result.get('ie_key'),
1216 extra_info=extra_info)
7fc3fa05
PH
1217 elif result_type == 'url_transparent':
1218 # Use the information from the embedding page
1219 info = self.extract_info(
1220 ie_result['url'], ie_key=ie_result.get('ie_key'),
1221 extra_info=extra_info, download=False, process=False)
1222
1640eb09
S
1223 # extract_info may return None when ignoreerrors is enabled and
1224 # extraction failed with an error, don't crash and return early
1225 # in this case
1226 if not info:
1227 return info
1228
412c617d
PH
1229 force_properties = dict(
1230 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1231 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1232 if f in force_properties:
1233 del force_properties[f]
1234 new_result = info.copy()
1235 new_result.update(force_properties)
7fc3fa05 1236
0563f7ac
S
1237 # Extracted info may not be a video result (i.e.
1238 # info.get('_type', 'video') != video) but rather an url or
1239 # url_transparent. In such cases outer metadata (from ie_result)
1240 # should be propagated to inner one (info). For this to happen
1241 # _type of info should be overridden with url_transparent. This
067aa17e 1242 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1243 if new_result.get('_type') == 'url':
1244 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1245
1246 return self.process_ie_result(
1247 new_result, download=download, extra_info=extra_info)
40fcba5e 1248 elif result_type in ('playlist', 'multi_video'):
30a074c2 1249 # Protect from infinite recursion due to recursively nested playlists
1250 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1251 webpage_url = ie_result['webpage_url']
1252 if webpage_url in self._playlist_urls:
7e85e872 1253 self.to_screen(
30a074c2 1254 '[download] Skipping already downloaded playlist: %s'
1255 % ie_result.get('title') or ie_result.get('id'))
1256 return
7e85e872 1257
30a074c2 1258 self._playlist_level += 1
1259 self._playlist_urls.add(webpage_url)
bc516a3f 1260 self._sanitize_thumbnails(ie_result)
30a074c2 1261 try:
1262 return self.__process_playlist(ie_result, download)
1263 finally:
1264 self._playlist_level -= 1
1265 if not self._playlist_level:
1266 self._playlist_urls.clear()
8222d8de 1267 elif result_type == 'compat_list':
c9bf4114
PH
1268 self.report_warning(
1269 'Extractor %s returned a compat_list result. '
1270 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1271
8222d8de 1272 def _fixup(r):
9e1a5b84
JW
1273 self.add_extra_info(
1274 r,
9103bbc5
JMF
1275 {
1276 'extractor': ie_result['extractor'],
1277 'webpage_url': ie_result['webpage_url'],
29eb5174 1278 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1279 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1280 }
1281 )
8222d8de
JMF
1282 return r
1283 ie_result['entries'] = [
b6c45014 1284 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1285 for r in ie_result['entries']
1286 ]
1287 return ie_result
1288 else:
1289 raise Exception('Invalid result type: %s' % result_type)
1290
e92caff5 1291 def _ensure_dir_exists(self, path):
1292 return make_dir(path, self.report_error)
1293
30a074c2 1294 def __process_playlist(self, ie_result, download):
1295 # We process each entry in the playlist
1296 playlist = ie_result.get('title') or ie_result.get('id')
1297 self.to_screen('[download] Downloading playlist: %s' % playlist)
1298
498f5606 1299 if 'entries' not in ie_result:
1300 raise EntryNotInPlaylist()
1301 incomplete_entries = bool(ie_result.get('requested_entries'))
1302 if incomplete_entries:
1303 def fill_missing_entries(entries, indexes):
1304 ret = [None] * max(*indexes)
1305 for i, entry in zip(indexes, entries):
1306 ret[i - 1] = entry
1307 return ret
1308 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1309
30a074c2 1310 playlist_results = []
1311
1312 playliststart = self.params.get('playliststart', 1) - 1
1313 playlistend = self.params.get('playlistend')
1314 # For backwards compatibility, interpret -1 as whole list
1315 if playlistend == -1:
1316 playlistend = None
1317
1318 playlistitems_str = self.params.get('playlist_items')
1319 playlistitems = None
1320 if playlistitems_str is not None:
1321 def iter_playlistitems(format):
1322 for string_segment in format.split(','):
1323 if '-' in string_segment:
1324 start, end = string_segment.split('-')
1325 for item in range(int(start), int(end) + 1):
1326 yield int(item)
1327 else:
1328 yield int(string_segment)
1329 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1330
1331 ie_entries = ie_result['entries']
1332
1333 def make_playlistitems_entries(list_ie_entries):
1334 num_entries = len(list_ie_entries)
498f5606 1335 for i in playlistitems:
1336 if -num_entries < i <= num_entries:
1337 yield list_ie_entries[i - 1]
1338 elif incomplete_entries:
1339 raise EntryNotInPlaylist()
30a074c2 1340
1341 if isinstance(ie_entries, list):
1342 n_all_entries = len(ie_entries)
1343 if playlistitems:
498f5606 1344 entries = list(make_playlistitems_entries(ie_entries))
30a074c2 1345 else:
1346 entries = ie_entries[playliststart:playlistend]
1347 n_entries = len(entries)
498f5606 1348 msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
30a074c2 1349 elif isinstance(ie_entries, PagedList):
1350 if playlistitems:
1351 entries = []
1352 for item in playlistitems:
1353 entries.extend(ie_entries.getslice(
1354 item - 1, item
1355 ))
1356 else:
1357 entries = ie_entries.getslice(
1358 playliststart, playlistend)
1359 n_entries = len(entries)
498f5606 1360 msg = 'Downloading %d videos' % n_entries
30a074c2 1361 else: # iterable
1362 if playlistitems:
498f5606 1363 entries = list(make_playlistitems_entries(list(itertools.islice(
1364 ie_entries, 0, max(playlistitems)))))
30a074c2 1365 else:
1366 entries = list(itertools.islice(
1367 ie_entries, playliststart, playlistend))
1368 n_entries = len(entries)
498f5606 1369 msg = 'Downloading %d videos' % n_entries
1370
1371 if any((entry is None for entry in entries)):
1372 raise EntryNotInPlaylist()
1373 if not playlistitems and (playliststart or playlistend):
1374 playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1375 ie_result['entries'] = entries
1376 ie_result['requested_entries'] = playlistitems
1377
1378 if self.params.get('allow_playlist_files', True):
1379 ie_copy = {
1380 'playlist': playlist,
1381 'playlist_id': ie_result.get('id'),
1382 'playlist_title': ie_result.get('title'),
1383 'playlist_uploader': ie_result.get('uploader'),
1384 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1385 'playlist_index': 0,
498f5606 1386 }
1387 ie_copy.update(dict(ie_result))
1388
1389 if self.params.get('writeinfojson', False):
1390 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1391 if not self._ensure_dir_exists(encodeFilename(infofn)):
1392 return
1393 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1394 self.to_screen('[info] Playlist metadata is already present')
1395 else:
1396 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1397 try:
1398 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1399 except (OSError, IOError):
1400 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1401
681de68e 1402 # TODO: This should be passed to ThumbnailsConvertor if necessary
1403 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1404
498f5606 1405 if self.params.get('writedescription', False):
1406 descfn = self.prepare_filename(ie_copy, 'pl_description')
1407 if not self._ensure_dir_exists(encodeFilename(descfn)):
1408 return
1409 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1410 self.to_screen('[info] Playlist description is already present')
1411 elif ie_result.get('description') is None:
1412 self.report_warning('There\'s no playlist description to write.')
1413 else:
1414 try:
1415 self.to_screen('[info] Writing playlist description to: ' + descfn)
1416 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1417 descfile.write(ie_result['description'])
1418 except (OSError, IOError):
1419 self.report_error('Cannot write playlist description file ' + descfn)
1420 return
30a074c2 1421
71729754 1422 # Save playlist_index before re-ordering
1423 entries = [
1424 ((playlistitems[i - 1] if playlistitems else i), entry)
1425 for i, entry in enumerate(entries, 1)]
1426
30a074c2 1427 if self.params.get('playlistreverse', False):
1428 entries = entries[::-1]
30a074c2 1429 if self.params.get('playlistrandom', False):
1430 random.shuffle(entries)
1431
1432 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1433
498f5606 1434 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
26e2805c 1435 failures = 0
1436 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1437 for i, entry_tuple in enumerate(entries, 1):
1438 playlist_index, entry = entry_tuple
53ed7066 1439 if 'playlist_index' in self.params.get('compat_options', []):
1440 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1441 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1442 # This __x_forwarded_for_ip thing is a bit ugly but requires
1443 # minimal changes
1444 if x_forwarded_for:
1445 entry['__x_forwarded_for_ip'] = x_forwarded_for
1446 extra = {
1447 'n_entries': n_entries,
f59ae581 1448 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1449 'playlist_index': playlist_index,
1450 'playlist_autonumber': i,
30a074c2 1451 'playlist': playlist,
1452 'playlist_id': ie_result.get('id'),
1453 'playlist_title': ie_result.get('title'),
1454 'playlist_uploader': ie_result.get('uploader'),
1455 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1456 'extractor': ie_result['extractor'],
1457 'webpage_url': ie_result['webpage_url'],
1458 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1459 'extractor_key': ie_result['extractor_key'],
1460 }
1461
1462 if self._match_entry(entry, incomplete=True) is not None:
1463 continue
1464
1465 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1466 if not entry_result:
1467 failures += 1
1468 if failures >= max_failures:
1469 self.report_error(
1470 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1471 break
30a074c2 1472 # TODO: skip failed (empty) entries?
1473 playlist_results.append(entry_result)
1474 ie_result['entries'] = playlist_results
1475 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1476 return ie_result
1477
a0566bbf 1478 @__handle_extraction_exceptions
1479 def __process_iterable_entry(self, entry, download, extra_info):
1480 return self.process_ie_result(
1481 entry, download=download, extra_info=extra_info)
1482
67134eab
JMF
1483 def _build_format_filter(self, filter_spec):
1484 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1485
1486 OPERATORS = {
1487 '<': operator.lt,
1488 '<=': operator.le,
1489 '>': operator.gt,
1490 '>=': operator.ge,
1491 '=': operator.eq,
1492 '!=': operator.ne,
1493 }
67134eab 1494 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1495 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1496 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1497 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1498 $
083c9df9 1499 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1500 m = operator_rex.search(filter_spec)
9ddb6925
S
1501 if m:
1502 try:
1503 comparison_value = int(m.group('value'))
1504 except ValueError:
1505 comparison_value = parse_filesize(m.group('value'))
1506 if comparison_value is None:
1507 comparison_value = parse_filesize(m.group('value') + 'B')
1508 if comparison_value is None:
1509 raise ValueError(
1510 'Invalid value %r in format specification %r' % (
67134eab 1511 m.group('value'), filter_spec))
9ddb6925
S
1512 op = OPERATORS[m.group('op')]
1513
083c9df9 1514 if not m:
9ddb6925
S
1515 STR_OPERATORS = {
1516 '=': operator.eq,
10d33b34
YCH
1517 '^=': lambda attr, value: attr.startswith(value),
1518 '$=': lambda attr, value: attr.endswith(value),
1519 '*=': lambda attr, value: value in attr,
9ddb6925 1520 }
67134eab 1521 str_operator_rex = re.compile(r'''(?x)
f96bff99 1522 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1523 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1524 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1525 \s*$
9ddb6925 1526 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1527 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1528 if m:
1529 comparison_value = m.group('value')
2cc779f4
S
1530 str_op = STR_OPERATORS[m.group('op')]
1531 if m.group('negation'):
e118a879 1532 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1533 else:
1534 op = str_op
083c9df9 1535
9ddb6925 1536 if not m:
67134eab 1537 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1538
1539 def _filter(f):
1540 actual_value = f.get(m.group('key'))
1541 if actual_value is None:
1542 return m.group('none_inclusive')
1543 return op(actual_value, comparison_value)
67134eab
JMF
1544 return _filter
1545
0017d9ad 1546 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1547
af0f7428
S
1548 def can_merge():
1549 merger = FFmpegMergerPP(self)
1550 return merger.available and merger.can_merge()
1551
91ebc640 1552 prefer_best = (
1553 not self.params.get('simulate', False)
1554 and download
1555 and (
1556 not can_merge()
19807826 1557 or info_dict.get('is_live', False)
de6000d9 1558 or self.outtmpl_dict['default'] == '-'))
53ed7066 1559 compat = (
1560 prefer_best
1561 or self.params.get('allow_multiple_audio_streams', False)
1562 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1563
1564 return (
53ed7066 1565 'best/bestvideo+bestaudio' if prefer_best
1566 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1567 else 'bestvideo+bestaudio/best')
0017d9ad 1568
67134eab
JMF
1569 def build_format_selector(self, format_spec):
1570 def syntax_error(note, start):
1571 message = (
1572 'Invalid format specification: '
1573 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1574 return SyntaxError(message)
1575
1576 PICKFIRST = 'PICKFIRST'
1577 MERGE = 'MERGE'
1578 SINGLE = 'SINGLE'
0130afb7 1579 GROUP = 'GROUP'
67134eab
JMF
1580 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1581
91ebc640 1582 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1583 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1584
e8e73840 1585 check_formats = self.params.get('check_formats')
1586
67134eab
JMF
1587 def _parse_filter(tokens):
1588 filter_parts = []
1589 for type, string, start, _, _ in tokens:
1590 if type == tokenize.OP and string == ']':
1591 return ''.join(filter_parts)
1592 else:
1593 filter_parts.append(string)
1594
232541df 1595 def _remove_unused_ops(tokens):
17cc1534 1596 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1597 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1598 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1599 last_string, last_start, last_end, last_line = None, None, None, None
1600 for type, string, start, end, line in tokens:
1601 if type == tokenize.OP and string == '[':
1602 if last_string:
1603 yield tokenize.NAME, last_string, last_start, last_end, last_line
1604 last_string = None
1605 yield type, string, start, end, line
1606 # everything inside brackets will be handled by _parse_filter
1607 for type, string, start, end, line in tokens:
1608 yield type, string, start, end, line
1609 if type == tokenize.OP and string == ']':
1610 break
1611 elif type == tokenize.OP and string in ALLOWED_OPS:
1612 if last_string:
1613 yield tokenize.NAME, last_string, last_start, last_end, last_line
1614 last_string = None
1615 yield type, string, start, end, line
1616 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1617 if not last_string:
1618 last_string = string
1619 last_start = start
1620 last_end = end
1621 else:
1622 last_string += string
1623 if last_string:
1624 yield tokenize.NAME, last_string, last_start, last_end, last_line
1625
cf2ac6df 1626 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1627 selectors = []
1628 current_selector = None
1629 for type, string, start, _, _ in tokens:
1630 # ENCODING is only defined in python 3.x
1631 if type == getattr(tokenize, 'ENCODING', None):
1632 continue
1633 elif type in [tokenize.NAME, tokenize.NUMBER]:
1634 current_selector = FormatSelector(SINGLE, string, [])
1635 elif type == tokenize.OP:
cf2ac6df
JMF
1636 if string == ')':
1637 if not inside_group:
1638 # ')' will be handled by the parentheses group
1639 tokens.restore_last_token()
67134eab 1640 break
cf2ac6df 1641 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1642 tokens.restore_last_token()
1643 break
cf2ac6df
JMF
1644 elif inside_choice and string == ',':
1645 tokens.restore_last_token()
1646 break
1647 elif string == ',':
0a31a350
JMF
1648 if not current_selector:
1649 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1650 selectors.append(current_selector)
1651 current_selector = None
1652 elif string == '/':
d96d604e
JMF
1653 if not current_selector:
1654 raise syntax_error('"/" must follow a format selector', start)
67134eab 1655 first_choice = current_selector
cf2ac6df 1656 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1657 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1658 elif string == '[':
1659 if not current_selector:
1660 current_selector = FormatSelector(SINGLE, 'best', [])
1661 format_filter = _parse_filter(tokens)
1662 current_selector.filters.append(format_filter)
0130afb7
JMF
1663 elif string == '(':
1664 if current_selector:
1665 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1666 group = _parse_format_selection(tokens, inside_group=True)
1667 current_selector = FormatSelector(GROUP, group, [])
67134eab 1668 elif string == '+':
d03cfdce 1669 if not current_selector:
1670 raise syntax_error('Unexpected "+"', start)
1671 selector_1 = current_selector
1672 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1673 if not selector_2:
1674 raise syntax_error('Expected a selector', start)
1675 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1676 else:
1677 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1678 elif type == tokenize.ENDMARKER:
1679 break
1680 if current_selector:
1681 selectors.append(current_selector)
1682 return selectors
1683
f8d4ad9a 1684 def _merge(formats_pair):
1685 format_1, format_2 = formats_pair
1686
1687 formats_info = []
1688 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1689 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1690
1691 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1692 get_no_more = {"video": False, "audio": False}
1693 for (i, fmt_info) in enumerate(formats_info):
1694 for aud_vid in ["audio", "video"]:
1695 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1696 if get_no_more[aud_vid]:
1697 formats_info.pop(i)
1698 get_no_more[aud_vid] = True
1699
1700 if len(formats_info) == 1:
1701 return formats_info[0]
1702
1703 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1704 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1705
1706 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1707 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1708
1709 output_ext = self.params.get('merge_output_format')
1710 if not output_ext:
1711 if the_only_video:
1712 output_ext = the_only_video['ext']
1713 elif the_only_audio and not video_fmts:
1714 output_ext = the_only_audio['ext']
1715 else:
1716 output_ext = 'mkv'
1717
1718 new_dict = {
1719 'requested_formats': formats_info,
1720 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1721 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1722 'ext': output_ext,
1723 }
1724
1725 if the_only_video:
1726 new_dict.update({
1727 'width': the_only_video.get('width'),
1728 'height': the_only_video.get('height'),
1729 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1730 'fps': the_only_video.get('fps'),
1731 'vcodec': the_only_video.get('vcodec'),
1732 'vbr': the_only_video.get('vbr'),
1733 'stretched_ratio': the_only_video.get('stretched_ratio'),
1734 })
1735
1736 if the_only_audio:
1737 new_dict.update({
1738 'acodec': the_only_audio.get('acodec'),
1739 'abr': the_only_audio.get('abr'),
1740 })
1741
1742 return new_dict
1743
e8e73840 1744 def _check_formats(formats):
1745 for f in formats:
1746 self.to_screen('[info] Testing format %s' % f['format_id'])
1747 paths = self.params.get('paths', {})
1748 temp_file = os.path.join(
1749 expand_path(paths.get('home', '').strip()),
1750 expand_path(paths.get('temp', '').strip()),
1751 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
fe346461 1752 try:
1753 dl, _ = self.dl(temp_file, f, test=True)
1754 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1755 dl = False
1756 finally:
1757 if os.path.exists(temp_file):
1758 os.remove(temp_file)
e8e73840 1759 if dl:
1760 yield f
1761 else:
1762 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1763
67134eab 1764 def _build_selector_function(selector):
909d24dd 1765 if isinstance(selector, list): # ,
67134eab
JMF
1766 fs = [_build_selector_function(s) for s in selector]
1767
317f7ab6 1768 def selector_function(ctx):
67134eab 1769 for f in fs:
317f7ab6 1770 for format in f(ctx):
67134eab
JMF
1771 yield format
1772 return selector_function
909d24dd 1773
1774 elif selector.type == GROUP: # ()
0130afb7 1775 selector_function = _build_selector_function(selector.selector)
909d24dd 1776
1777 elif selector.type == PICKFIRST: # /
67134eab
JMF
1778 fs = [_build_selector_function(s) for s in selector.selector]
1779
317f7ab6 1780 def selector_function(ctx):
67134eab 1781 for f in fs:
317f7ab6 1782 picked_formats = list(f(ctx))
67134eab
JMF
1783 if picked_formats:
1784 return picked_formats
1785 return []
67134eab 1786
909d24dd 1787 elif selector.type == SINGLE: # atom
598d185d 1788 format_spec = selector.selector or 'best'
909d24dd 1789
f8d4ad9a 1790 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1791 if format_spec == 'all':
1792 def selector_function(ctx):
1793 formats = list(ctx['formats'])
e8e73840 1794 if check_formats:
1795 formats = _check_formats(formats)
1796 for f in formats:
1797 yield f
f8d4ad9a 1798 elif format_spec == 'mergeall':
1799 def selector_function(ctx):
e8e73840 1800 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1801 if not formats:
1802 return
921b76ca 1803 merged_format = formats[-1]
1804 for f in formats[-2::-1]:
f8d4ad9a 1805 merged_format = _merge((merged_format, f))
1806 yield merged_format
909d24dd 1807
1808 else:
e8e73840 1809 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1810 mobj = re.match(
1811 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1812 format_spec)
1813 if mobj is not None:
1814 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1815 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1816 format_type = (mobj.group('type') or [None])[0]
1817 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1818 format_modified = mobj.group('mod') is not None
909d24dd 1819
1820 format_fallback = not format_type and not format_modified # for b, w
eff63539 1821 filter_f = (
1822 (lambda f: f.get('%scodec' % format_type) != 'none')
1823 if format_type and format_modified # bv*, ba*, wv*, wa*
1824 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1825 if format_type # bv, ba, wv, wa
1826 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1827 if not format_modified # b, w
1828 else None) # b*, w*
67134eab 1829 else:
909d24dd 1830 filter_f = ((lambda f: f.get('ext') == format_spec)
1831 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1832 else (lambda f: f.get('format_id') == format_spec)) # id
1833
1834 def selector_function(ctx):
1835 formats = list(ctx['formats'])
1836 if not formats:
1837 return
1838 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1839 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1840 # for extractors with incomplete formats (audio only (soundcloud)
1841 # or video only (imgur)) best/worst will fallback to
1842 # best/worst {video,audio}-only format
e8e73840 1843 matches = formats
1844 if format_reverse:
1845 matches = matches[::-1]
1846 if check_formats:
1847 matches = list(itertools.islice(_check_formats(matches), format_idx))
1848 n = len(matches)
1849 if -n <= format_idx - 1 < n:
1850 yield matches[format_idx - 1]
909d24dd 1851
1852 elif selector.type == MERGE: # +
d03cfdce 1853 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1854
317f7ab6
S
1855 def selector_function(ctx):
1856 for pair in itertools.product(
d03cfdce 1857 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1858 yield _merge(pair)
083c9df9 1859
67134eab 1860 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1861
317f7ab6
S
1862 def final_selector(ctx):
1863 ctx_copy = copy.deepcopy(ctx)
67134eab 1864 for _filter in filters:
317f7ab6
S
1865 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1866 return selector_function(ctx_copy)
67134eab 1867 return final_selector
083c9df9 1868
67134eab 1869 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1870 try:
232541df 1871 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1872 except tokenize.TokenError:
1873 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1874
1875 class TokenIterator(object):
1876 def __init__(self, tokens):
1877 self.tokens = tokens
1878 self.counter = 0
1879
1880 def __iter__(self):
1881 return self
1882
1883 def __next__(self):
1884 if self.counter >= len(self.tokens):
1885 raise StopIteration()
1886 value = self.tokens[self.counter]
1887 self.counter += 1
1888 return value
1889
1890 next = __next__
1891
1892 def restore_last_token(self):
1893 self.counter -= 1
1894
1895 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1896 return _build_selector_function(parsed_selector)
a9c58ad9 1897
e5660ee6
JMF
1898 def _calc_headers(self, info_dict):
1899 res = std_headers.copy()
1900
1901 add_headers = info_dict.get('http_headers')
1902 if add_headers:
1903 res.update(add_headers)
1904
1905 cookies = self._calc_cookies(info_dict)
1906 if cookies:
1907 res['Cookie'] = cookies
1908
0016b84e
S
1909 if 'X-Forwarded-For' not in res:
1910 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1911 if x_forwarded_for_ip:
1912 res['X-Forwarded-For'] = x_forwarded_for_ip
1913
e5660ee6
JMF
1914 return res
1915
1916 def _calc_cookies(self, info_dict):
5c2266df 1917 pr = sanitized_Request(info_dict['url'])
e5660ee6 1918 self.cookiejar.add_cookie_header(pr)
662435f7 1919 return pr.get_header('Cookie')
e5660ee6 1920
bc516a3f 1921 @staticmethod
1922 def _sanitize_thumbnails(info_dict):
1923 thumbnails = info_dict.get('thumbnails')
1924 if thumbnails is None:
1925 thumbnail = info_dict.get('thumbnail')
1926 if thumbnail:
1927 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1928 if thumbnails:
1929 thumbnails.sort(key=lambda t: (
1930 t.get('preference') if t.get('preference') is not None else -1,
1931 t.get('width') if t.get('width') is not None else -1,
1932 t.get('height') if t.get('height') is not None else -1,
1933 t.get('id') if t.get('id') is not None else '',
1934 t.get('url')))
1935 for i, t in enumerate(thumbnails):
1936 t['url'] = sanitize_url(t['url'])
1937 if t.get('width') and t.get('height'):
1938 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1939 if t.get('id') is None:
1940 t['id'] = '%d' % i
1941
dd82ffea
JMF
1942 def process_video_result(self, info_dict, download=True):
1943 assert info_dict.get('_type', 'video') == 'video'
1944
bec1fad2
PH
1945 if 'id' not in info_dict:
1946 raise ExtractorError('Missing "id" field in extractor result')
1947 if 'title' not in info_dict:
1948 raise ExtractorError('Missing "title" field in extractor result')
1949
c9969434
S
1950 def report_force_conversion(field, field_not, conversion):
1951 self.report_warning(
1952 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1953 % (field, field_not, conversion))
1954
1955 def sanitize_string_field(info, string_field):
1956 field = info.get(string_field)
1957 if field is None or isinstance(field, compat_str):
1958 return
1959 report_force_conversion(string_field, 'a string', 'string')
1960 info[string_field] = compat_str(field)
1961
1962 def sanitize_numeric_fields(info):
1963 for numeric_field in self._NUMERIC_FIELDS:
1964 field = info.get(numeric_field)
1965 if field is None or isinstance(field, compat_numeric_types):
1966 continue
1967 report_force_conversion(numeric_field, 'numeric', 'int')
1968 info[numeric_field] = int_or_none(field)
1969
1970 sanitize_string_field(info_dict, 'id')
1971 sanitize_numeric_fields(info_dict)
be6217b2 1972
dd82ffea
JMF
1973 if 'playlist' not in info_dict:
1974 # It isn't part of a playlist
1975 info_dict['playlist'] = None
1976 info_dict['playlist_index'] = None
1977
bc516a3f 1978 self._sanitize_thumbnails(info_dict)
d5519808 1979
b7b72db9 1980 if self.params.get('list_thumbnails'):
1981 self.list_thumbnails(info_dict)
1982 return
1983
536a55da 1984 thumbnail = info_dict.get('thumbnail')
bc516a3f 1985 thumbnails = info_dict.get('thumbnails')
536a55da
S
1986 if thumbnail:
1987 info_dict['thumbnail'] = sanitize_url(thumbnail)
1988 elif thumbnails:
d5519808
PH
1989 info_dict['thumbnail'] = thumbnails[-1]['url']
1990
c9ae7b95 1991 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1992 info_dict['display_id'] = info_dict['id']
1993
10db0d2f 1994 for ts_key, date_key in (
1995 ('timestamp', 'upload_date'),
1996 ('release_timestamp', 'release_date'),
1997 ):
1998 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1999 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2000 # see http://bugs.python.org/issue1646728)
2001 try:
2002 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2003 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2004 except (ValueError, OverflowError, OSError):
2005 pass
9d2ecdbc 2006
33d2fc2f
S
2007 # Auto generate title fields corresponding to the *_number fields when missing
2008 # in order to always have clean titles. This is very common for TV series.
2009 for field in ('chapter', 'season', 'episode'):
2010 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2011 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2012
05108a49
S
2013 for cc_kind in ('subtitles', 'automatic_captions'):
2014 cc = info_dict.get(cc_kind)
2015 if cc:
2016 for _, subtitle in cc.items():
2017 for subtitle_format in subtitle:
2018 if subtitle_format.get('url'):
2019 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2020 if subtitle_format.get('ext') is None:
2021 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2022
2023 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2024 subtitles = info_dict.get('subtitles')
4bba3716 2025
a504ced0 2026 if self.params.get('listsubtitles', False):
360e1ca5 2027 if 'automatic_captions' in info_dict:
05108a49
S
2028 self.list_subtitles(
2029 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2030 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2031 return
05108a49 2032
360e1ca5 2033 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2034 info_dict['id'], subtitles, automatic_captions)
a504ced0 2035
dd82ffea
JMF
2036 # We now pick which formats have to be downloaded
2037 if info_dict.get('formats') is None:
2038 # There's only one format available
2039 formats = [info_dict]
2040 else:
2041 formats = info_dict['formats']
2042
db95dc13 2043 if not formats:
b7da73eb 2044 if not self.params.get('ignore_no_formats_error'):
2045 raise ExtractorError('No video formats found!')
2046 else:
2047 self.report_warning('No video formats found!')
db95dc13 2048
73af5cc8
S
2049 def is_wellformed(f):
2050 url = f.get('url')
a5ac0c47 2051 if not url:
73af5cc8
S
2052 self.report_warning(
2053 '"url" field is missing or empty - skipping format, '
2054 'there is an error in extractor')
a5ac0c47
S
2055 return False
2056 if isinstance(url, bytes):
2057 sanitize_string_field(f, 'url')
2058 return True
73af5cc8
S
2059
2060 # Filter out malformed formats for better extraction robustness
2061 formats = list(filter(is_wellformed, formats))
2062
181c7053
S
2063 formats_dict = {}
2064
dd82ffea 2065 # We check that all the formats have the format and format_id fields
db95dc13 2066 for i, format in enumerate(formats):
c9969434
S
2067 sanitize_string_field(format, 'format_id')
2068 sanitize_numeric_fields(format)
dcf77cf1 2069 format['url'] = sanitize_url(format['url'])
e74e3b63 2070 if not format.get('format_id'):
8016c922 2071 format['format_id'] = compat_str(i)
e2effb08
S
2072 else:
2073 # Sanitize format_id from characters used in format selector expression
ec85ded8 2074 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2075 format_id = format['format_id']
2076 if format_id not in formats_dict:
2077 formats_dict[format_id] = []
2078 formats_dict[format_id].append(format)
2079
2080 # Make sure all formats have unique format_id
2081 for format_id, ambiguous_formats in formats_dict.items():
2082 if len(ambiguous_formats) > 1:
2083 for i, format in enumerate(ambiguous_formats):
2084 format['format_id'] = '%s-%d' % (format_id, i)
2085
2086 for i, format in enumerate(formats):
8c51aa65 2087 if format.get('format') is None:
6febd1c1 2088 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2089 id=format['format_id'],
2090 res=self.format_resolution(format),
6febd1c1 2091 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2092 )
c1002e96 2093 # Automatically determine file extension if missing
5b1d8575 2094 if format.get('ext') is None:
cce929ea 2095 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2096 # Automatically determine protocol if missing (useful for format
2097 # selection purposes)
6f0be937 2098 if format.get('protocol') is None:
b5559424 2099 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2100 # Add HTTP headers, so that external programs can use them from the
2101 # json output
2102 full_format_info = info_dict.copy()
2103 full_format_info.update(format)
2104 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2105 # Remove private housekeeping stuff
2106 if '__x_forwarded_for_ip' in info_dict:
2107 del info_dict['__x_forwarded_for_ip']
dd82ffea 2108
4bcc7bd1 2109 # TODO Central sorting goes here
99e206d5 2110
b7da73eb 2111 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2112 # only set the 'formats' fields if the original info_dict list them
2113 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2114 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2115 # which can't be exported to json
b3d9ef88 2116 info_dict['formats'] = formats
4ec82a72 2117
2118 info_dict, _ = self.pre_process(info_dict)
2119
cfb56d1a 2120 if self.params.get('listformats'):
b7da73eb 2121 if not info_dict.get('formats'):
2122 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2123 self.list_formats(info_dict)
2124 return
2125
de3ef3ed 2126 req_format = self.params.get('format')
a9c58ad9 2127 if req_format is None:
0017d9ad 2128 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2129 self.write_debug('Default format spec: %s' % req_format)
0017d9ad 2130
5acfa126 2131 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2132
2133 # While in format selection we may need to have an access to the original
2134 # format set in order to calculate some metrics or do some processing.
2135 # For now we need to be able to guess whether original formats provided
2136 # by extractor are incomplete or not (i.e. whether extractor provides only
2137 # video-only or audio-only formats) for proper formats selection for
2138 # extractors with such incomplete formats (see
067aa17e 2139 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2140 # Since formats may be filtered during format selection and may not match
2141 # the original formats the results may be incorrect. Thus original formats
2142 # or pre-calculated metrics should be passed to format selection routines
2143 # as well.
2144 # We will pass a context object containing all necessary additional data
2145 # instead of just formats.
2146 # This fixes incorrect format selection issue (see
067aa17e 2147 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2148 incomplete_formats = (
317f7ab6 2149 # All formats are video-only or
3089bc74 2150 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2151 # all formats are audio-only
3089bc74 2152 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2153
2154 ctx = {
2155 'formats': formats,
2156 'incomplete_formats': incomplete_formats,
2157 }
2158
2159 formats_to_download = list(format_selector(ctx))
dd82ffea 2160 if not formats_to_download:
b7da73eb 2161 if not self.params.get('ignore_no_formats_error'):
2162 raise ExtractorError('Requested format is not available', expected=True)
2163 else:
2164 self.report_warning('Requested format is not available')
2165 elif download:
2166 self.to_screen(
07cce701 2167 '[info] %s: Downloading %d format(s): %s' % (
2168 info_dict['id'], len(formats_to_download),
2169 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2170 for fmt in formats_to_download:
dd82ffea 2171 new_info = dict(info_dict)
4ec82a72 2172 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2173 new_info['__original_infodict'] = info_dict
b7da73eb 2174 new_info.update(fmt)
dd82ffea
JMF
2175 self.process_info(new_info)
2176 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2177 if formats_to_download:
2178 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2179 return info_dict
2180
98c70d6f 2181 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2182 """Select the requested subtitles and their format"""
98c70d6f
JMF
2183 available_subs = {}
2184 if normal_subtitles and self.params.get('writesubtitles'):
2185 available_subs.update(normal_subtitles)
2186 if automatic_captions and self.params.get('writeautomaticsub'):
2187 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2188 if lang not in available_subs:
2189 available_subs[lang] = cap_info
2190
4d171848
JMF
2191 if (not self.params.get('writesubtitles') and not
2192 self.params.get('writeautomaticsub') or not
2193 available_subs):
2194 return None
a504ced0 2195
c32b0aab 2196 all_sub_langs = available_subs.keys()
a504ced0 2197 if self.params.get('allsubtitles', False):
c32b0aab 2198 requested_langs = all_sub_langs
2199 elif self.params.get('subtitleslangs', False):
2200 requested_langs = set()
2201 for lang in self.params.get('subtitleslangs'):
2202 if lang == 'all':
2203 requested_langs.update(all_sub_langs)
2204 continue
2205 discard = lang[0] == '-'
2206 if discard:
2207 lang = lang[1:]
2208 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2209 if discard:
2210 for lang in current_langs:
2211 requested_langs.discard(lang)
2212 else:
2213 requested_langs.update(current_langs)
2214 elif 'en' in available_subs:
2215 requested_langs = ['en']
a504ced0 2216 else:
c32b0aab 2217 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2218 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2219
2220 formats_query = self.params.get('subtitlesformat', 'best')
2221 formats_preference = formats_query.split('/') if formats_query else []
2222 subs = {}
2223 for lang in requested_langs:
2224 formats = available_subs.get(lang)
2225 if formats is None:
2226 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2227 continue
a504ced0
JMF
2228 for ext in formats_preference:
2229 if ext == 'best':
2230 f = formats[-1]
2231 break
2232 matches = list(filter(lambda f: f['ext'] == ext, formats))
2233 if matches:
2234 f = matches[-1]
2235 break
2236 else:
2237 f = formats[-1]
2238 self.report_warning(
2239 'No subtitle format found matching "%s" for language %s, '
2240 'using %s' % (formats_query, lang, f['ext']))
2241 subs[lang] = f
2242 return subs
2243
d06daf23 2244 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2245 def print_mandatory(field, actual_field=None):
2246 if actual_field is None:
2247 actual_field = field
d06daf23 2248 if (self.params.get('force%s' % field, False)
53c18592 2249 and (not incomplete or info_dict.get(actual_field) is not None)):
2250 self.to_stdout(info_dict[actual_field])
d06daf23
S
2251
2252 def print_optional(field):
2253 if (self.params.get('force%s' % field, False)
2254 and info_dict.get(field) is not None):
2255 self.to_stdout(info_dict[field])
2256
53c18592 2257 info_dict = info_dict.copy()
2258 if filename is not None:
2259 info_dict['filename'] = filename
2260 if info_dict.get('requested_formats') is not None:
2261 # For RTMP URLs, also include the playpath
2262 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2263 elif 'url' in info_dict:
2264 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2265
2266 for tmpl in self.params.get('forceprint', []):
2267 if re.match(r'\w+$', tmpl):
2268 tmpl = '%({})s'.format(tmpl)
2269 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2270 self.to_stdout(tmpl % info_copy)
2271
d06daf23
S
2272 print_mandatory('title')
2273 print_mandatory('id')
53c18592 2274 print_mandatory('url', 'urls')
d06daf23
S
2275 print_optional('thumbnail')
2276 print_optional('description')
53c18592 2277 print_optional('filename')
d06daf23
S
2278 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2279 self.to_stdout(formatSeconds(info_dict['duration']))
2280 print_mandatory('format')
53c18592 2281
d06daf23 2282 if self.params.get('forcejson', False):
277d6ff5 2283 self.post_extract(info_dict)
75d43ca0 2284 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2285
e8e73840 2286 def dl(self, name, info, subtitle=False, test=False):
2287
2288 if test:
2289 verbose = self.params.get('verbose')
2290 params = {
2291 'test': True,
2292 'quiet': not verbose,
2293 'verbose': verbose,
2294 'noprogress': not verbose,
2295 'nopart': True,
2296 'skip_unavailable_fragments': False,
2297 'keep_fragments': False,
2298 'overwrites': True,
2299 '_no_ytdl_file': True,
2300 }
2301 else:
2302 params = self.params
2303 fd = get_suitable_downloader(info, params)(self, params)
2304 if not test:
2305 for ph in self._progress_hooks:
2306 fd.add_progress_hook(ph)
18e674b4 2307 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2308 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2309 new_info = dict(info)
2310 if new_info.get('http_headers') is None:
2311 new_info['http_headers'] = self._calc_headers(new_info)
2312 return fd.download(name, new_info, subtitle)
2313
8222d8de
JMF
2314 def process_info(self, info_dict):
2315 """Process a single resolved IE result."""
2316
2317 assert info_dict.get('_type', 'video') == 'video'
fd288278 2318
0202b52a 2319 info_dict.setdefault('__postprocessors', [])
2320
fd288278
PH
2321 max_downloads = self.params.get('max_downloads')
2322 if max_downloads is not None:
2323 if self._num_downloads >= int(max_downloads):
2324 raise MaxDownloadsReached()
8222d8de 2325
d06daf23 2326 # TODO: backward compatibility, to be removed
8222d8de 2327 info_dict['fulltitle'] = info_dict['title']
8222d8de 2328
11b85ce6 2329 if 'format' not in info_dict:
8222d8de
JMF
2330 info_dict['format'] = info_dict['ext']
2331
c77495e3 2332 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2333 return
2334
277d6ff5 2335 self.post_extract(info_dict)
fd288278 2336 self._num_downloads += 1
8222d8de 2337
dcf64d43 2338 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2339 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2340 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2341 files_to_move = {}
8222d8de
JMF
2342
2343 # Forced printings
0202b52a 2344 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2345
8222d8de 2346 if self.params.get('simulate', False):
2d30509f 2347 if self.params.get('force_write_download_archive', False):
2348 self.record_download_archive(info_dict)
2349
2350 # Do nothing else if in simulate mode
8222d8de
JMF
2351 return
2352
de6000d9 2353 if full_filename is None:
8222d8de
JMF
2354 return
2355
e92caff5 2356 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2357 return
e92caff5 2358 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2359 return
2360
2361 if self.params.get('writedescription', False):
de6000d9 2362 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2363 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2364 return
0c3d0f51 2365 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2366 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2367 elif info_dict.get('description') is None:
2368 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2369 else:
2370 try:
6febd1c1 2371 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2372 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2373 descfile.write(info_dict['description'])
7b6fefc9 2374 except (OSError, IOError):
6febd1c1 2375 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2376 return
8222d8de 2377
1fb07d10 2378 if self.params.get('writeannotations', False):
de6000d9 2379 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2380 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2381 return
0c3d0f51 2382 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2383 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2384 elif not info_dict.get('annotations'):
2385 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2386 else:
2387 try:
6febd1c1 2388 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2389 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2390 annofile.write(info_dict['annotations'])
2391 except (KeyError, TypeError):
6febd1c1 2392 self.report_warning('There are no annotations to write.')
7b6fefc9 2393 except (OSError, IOError):
6febd1c1 2394 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2395 return
1fb07d10 2396
c4a91be7 2397 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2398 self.params.get('writeautomaticsub')])
c4a91be7 2399
c84dd8a9 2400 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2401 # subtitles download errors are already managed as troubles in relevant IE
2402 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2403 subtitles = info_dict['requested_subtitles']
fa57af1e 2404 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2405 for sub_lang, sub_info in subtitles.items():
2406 sub_format = sub_info['ext']
56d868db 2407 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2408 sub_filename_final = subtitles_filename(
2409 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2410 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2411 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2412 sub_info['filepath'] = sub_filename
0202b52a 2413 files_to_move[sub_filename] = sub_filename_final
a504ced0 2414 else:
0c9df79e 2415 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2416 if sub_info.get('data') is not None:
2417 try:
2418 # Use newline='' to prevent conversion of newline characters
067aa17e 2419 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2420 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2421 subfile.write(sub_info['data'])
dcf64d43 2422 sub_info['filepath'] = sub_filename
0202b52a 2423 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2424 except (OSError, IOError):
2425 self.report_error('Cannot write subtitles file ' + sub_filename)
2426 return
7b6fefc9 2427 else:
5ff1bc0c 2428 try:
e8e73840 2429 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2430 sub_info['filepath'] = sub_filename
0202b52a 2431 files_to_move[sub_filename] = sub_filename_final
fe346461 2432 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2433 self.report_warning('Unable to download subtitle for "%s": %s' %
2434 (sub_lang, error_to_compat_str(err)))
2435 continue
8222d8de 2436
8222d8de 2437 if self.params.get('writeinfojson', False):
de6000d9 2438 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2439 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2440 return
0c3d0f51 2441 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2442 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2443 else:
66c935fb 2444 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2445 try:
75d43ca0 2446 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2447 except (OSError, IOError):
66c935fb 2448 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2449 return
de6000d9 2450 info_dict['__infojson_filename'] = infofn
8222d8de 2451
56d868db 2452 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2453 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2454 thumb_filename = replace_extension(
2455 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2456 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2457
732044af 2458 # Write internet shortcut files
2459 url_link = webloc_link = desktop_link = False
2460 if self.params.get('writelink', False):
2461 if sys.platform == "darwin": # macOS.
2462 webloc_link = True
2463 elif sys.platform.startswith("linux"):
2464 desktop_link = True
2465 else: # if sys.platform in ['win32', 'cygwin']:
2466 url_link = True
2467 if self.params.get('writeurllink', False):
2468 url_link = True
2469 if self.params.get('writewebloclink', False):
2470 webloc_link = True
2471 if self.params.get('writedesktoplink', False):
2472 desktop_link = True
2473
2474 if url_link or webloc_link or desktop_link:
2475 if 'webpage_url' not in info_dict:
2476 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2477 return
2478 ascii_url = iri_to_uri(info_dict['webpage_url'])
2479
2480 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2481 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2482 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2483 self.to_screen('[info] Internet shortcut is already present')
2484 else:
2485 try:
2486 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2487 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2488 template_vars = {'url': ascii_url}
2489 if embed_filename:
2490 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2491 linkfile.write(template % template_vars)
2492 except (OSError, IOError):
2493 self.report_error('Cannot write internet shortcut ' + linkfn)
2494 return False
2495 return True
2496
2497 if url_link:
2498 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2499 return
2500 if webloc_link:
2501 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2502 return
2503 if desktop_link:
2504 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2505 return
2506
56d868db 2507 try:
2508 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2509 except PostProcessingError as err:
2510 self.report_error('Preprocessing: %s' % str(err))
2511 return
2512
732044af 2513 must_record_download_archive = False
56d868db 2514 if self.params.get('skip_download', False):
2515 info_dict['filepath'] = temp_filename
2516 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2517 info_dict['__files_to_move'] = files_to_move
2518 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2519 else:
2520 # Download
4340deca 2521 try:
0202b52a 2522
6b591b29 2523 def existing_file(*filepaths):
2524 ext = info_dict.get('ext')
2525 final_ext = self.params.get('final_ext', ext)
2526 existing_files = []
2527 for file in orderedSet(filepaths):
2528 if final_ext != ext:
2529 converted = replace_extension(file, final_ext, ext)
2530 if os.path.exists(encodeFilename(converted)):
2531 existing_files.append(converted)
2532 if os.path.exists(encodeFilename(file)):
2533 existing_files.append(file)
2534
2535 if not existing_files or self.params.get('overwrites', False):
2536 for file in orderedSet(existing_files):
2537 self.report_file_delete(file)
2538 os.remove(encodeFilename(file))
2539 return None
2540
2541 self.report_file_already_downloaded(existing_files[0])
2542 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2543 return existing_files[0]
0202b52a 2544
2545 success = True
4340deca 2546 if info_dict.get('requested_formats') is not None:
81cd954a
S
2547
2548 def compatible_formats(formats):
d03cfdce 2549 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2550 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2551 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2552 if len(video_formats) > 2 or len(audio_formats) > 2:
2553 return False
2554
81cd954a 2555 # Check extension
d03cfdce 2556 exts = set(format.get('ext') for format in formats)
2557 COMPATIBLE_EXTS = (
2558 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2559 set(('webm',)),
2560 )
2561 for ext_sets in COMPATIBLE_EXTS:
2562 if ext_sets.issuperset(exts):
2563 return True
81cd954a
S
2564 # TODO: Check acodec/vcodec
2565 return False
2566
2567 requested_formats = info_dict['requested_formats']
0202b52a 2568 old_ext = info_dict['ext']
4d971a16 2569 if self.params.get('merge_output_format') is None:
2570 if not compatible_formats(requested_formats):
2571 info_dict['ext'] = 'mkv'
2572 self.report_warning(
2573 'Requested formats are incompatible for merge and will be merged into mkv.')
2574 if (info_dict['ext'] == 'webm'
2575 and self.params.get('writethumbnail', False)
2576 and info_dict.get('thumbnails')):
2577 info_dict['ext'] = 'mkv'
2578 self.report_warning(
2579 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2580
2581 def correct_ext(filename):
2582 filename_real_ext = os.path.splitext(filename)[1][1:]
2583 filename_wo_ext = (
2584 os.path.splitext(filename)[0]
2585 if filename_real_ext == old_ext
2586 else filename)
2587 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2588
38c6902b 2589 # Ensure filename always has a correct extension for successful merge
0202b52a 2590 full_filename = correct_ext(full_filename)
2591 temp_filename = correct_ext(temp_filename)
2592 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2593 info_dict['__real_download'] = False
18e674b4 2594
2595 _protocols = set(determine_protocol(f) for f in requested_formats)
2596 if len(_protocols) == 1:
2597 info_dict['protocol'] = _protocols.pop()
2598 directly_mergable = (
2599 'no-direct-merge' not in self.params.get('compat_opts', [])
2600 and info_dict.get('protocol') is not None # All requested formats have same protocol
2601 and not self.params.get('allow_unplayable_formats')
2602 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2603 if directly_mergable:
2604 info_dict['url'] = requested_formats[0]['url']
2605 # Treat it as a single download
2606 dl_filename = existing_file(full_filename, temp_filename)
2607 if dl_filename is None:
2608 success, real_download = self.dl(temp_filename, info_dict)
2609 info_dict['__real_download'] = real_download
2610 else:
2611 downloaded = []
2612 merger = FFmpegMergerPP(self)
2613 if self.params.get('allow_unplayable_formats'):
2614 self.report_warning(
2615 'You have requested merging of multiple formats '
2616 'while also allowing unplayable formats to be downloaded. '
2617 'The formats won\'t be merged to prevent data corruption.')
2618 elif not merger.available:
2619 self.report_warning(
2620 'You have requested merging of multiple formats but ffmpeg is not installed. '
2621 'The formats won\'t be merged.')
2622
2623 if dl_filename is None:
2624 for f in requested_formats:
2625 new_info = dict(info_dict)
2626 del new_info['requested_formats']
2627 new_info.update(f)
2628 fname = prepend_extension(
2629 self.prepare_filename(new_info, 'temp'),
2630 'f%s' % f['format_id'], new_info['ext'])
2631 if not self._ensure_dir_exists(fname):
2632 return
2633 downloaded.append(fname)
2634 partial_success, real_download = self.dl(fname, new_info)
2635 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2636 success = success and partial_success
2637 if merger.available and not self.params.get('allow_unplayable_formats'):
2638 info_dict['__postprocessors'].append(merger)
2639 info_dict['__files_to_merge'] = downloaded
2640 # Even if there were no downloads, it is being merged only now
2641 info_dict['__real_download'] = True
2642 else:
2643 for file in downloaded:
2644 files_to_move[file] = None
4340deca
P
2645 else:
2646 # Just a single file
0202b52a 2647 dl_filename = existing_file(full_filename, temp_filename)
2648 if dl_filename is None:
e8e73840 2649 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2650 info_dict['__real_download'] = real_download
2651
0202b52a 2652 dl_filename = dl_filename or temp_filename
c571435f 2653 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2654
3158150c 2655 except network_exceptions as err:
7960b056 2656 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2657 return
2658 except (OSError, IOError) as err:
2659 raise UnavailableVideoError(err)
2660 except (ContentTooShortError, ) as err:
2661 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2662 return
8222d8de 2663
de6000d9 2664 if success and full_filename != '-':
6271f1ca 2665 # Fixup content
62cd676c
PH
2666 fixup_policy = self.params.get('fixup')
2667 if fixup_policy is None:
2668 fixup_policy = 'detect_or_warn'
2669
e4172ac9 2670 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2671
6271f1ca
PH
2672 stretched_ratio = info_dict.get('stretched_ratio')
2673 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2674 if fixup_policy == 'warn':
2675 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2676 info_dict['id'], stretched_ratio))
2677 elif fixup_policy == 'detect_or_warn':
2678 stretched_pp = FFmpegFixupStretchedPP(self)
2679 if stretched_pp.available:
6271f1ca
PH
2680 info_dict['__postprocessors'].append(stretched_pp)
2681 else:
2682 self.report_warning(
d1e4a464
S
2683 '%s: Non-uniform pixel ratio (%s). %s'
2684 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2685 else:
62cd676c
PH
2686 assert fixup_policy in ('ignore', 'never')
2687
3089bc74 2688 if (info_dict.get('requested_formats') is None
6b591b29 2689 and info_dict.get('container') == 'm4a_dash'
2690 and info_dict.get('ext') == 'm4a'):
62cd676c 2691 if fixup_policy == 'warn':
d1e4a464
S
2692 self.report_warning(
2693 '%s: writing DASH m4a. '
2694 'Only some players support this container.'
2695 % info_dict['id'])
62cd676c
PH
2696 elif fixup_policy == 'detect_or_warn':
2697 fixup_pp = FFmpegFixupM4aPP(self)
2698 if fixup_pp.available:
62cd676c
PH
2699 info_dict['__postprocessors'].append(fixup_pp)
2700 else:
2701 self.report_warning(
d1e4a464
S
2702 '%s: writing DASH m4a. '
2703 'Only some players support this container. %s'
2704 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2705 else:
2706 assert fixup_policy in ('ignore', 'never')
6271f1ca 2707
0a473f2f 2708 if ('protocol' in info_dict
2709 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2710 if fixup_policy == 'warn':
a02682fd 2711 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2712 info_dict['id']))
2713 elif fixup_policy == 'detect_or_warn':
2714 fixup_pp = FFmpegFixupM3u8PP(self)
2715 if fixup_pp.available:
f17f8651 2716 info_dict['__postprocessors'].append(fixup_pp)
2717 else:
2718 self.report_warning(
a02682fd 2719 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2720 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2721 else:
2722 assert fixup_policy in ('ignore', 'never')
2723
8222d8de 2724 try:
23c1a667 2725 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2726 except PostProcessingError as err:
2727 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2728 return
ab8e5e51
AM
2729 try:
2730 for ph in self._post_hooks:
23c1a667 2731 ph(info_dict['filepath'])
ab8e5e51
AM
2732 except Exception as err:
2733 self.report_error('post hooks: %s' % str(err))
2734 return
2d30509f 2735 must_record_download_archive = True
2736
2737 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2738 self.record_download_archive(info_dict)
c3e6ffba 2739 max_downloads = self.params.get('max_downloads')
2740 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2741 raise MaxDownloadsReached()
8222d8de
JMF
2742
2743 def download(self, url_list):
2744 """Download a given list of URLs."""
de6000d9 2745 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2746 if (len(url_list) > 1
2747 and outtmpl != '-'
2748 and '%' not in outtmpl
2749 and self.params.get('max_downloads') != 1):
acd69589 2750 raise SameFileError(outtmpl)
8222d8de
JMF
2751
2752 for url in url_list:
2753 try:
5f6a1245 2754 # It also downloads the videos
61aa5ba3
S
2755 res = self.extract_info(
2756 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2757 except UnavailableVideoError:
6febd1c1 2758 self.report_error('unable to download video')
8222d8de 2759 except MaxDownloadsReached:
8b0d7497 2760 self.to_screen('[info] Maximum number of downloaded files reached')
2761 raise
2762 except ExistingVideoReached:
d83cb531 2763 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2764 raise
2765 except RejectedVideoReached:
d83cb531 2766 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2767 raise
63e0be34
PH
2768 else:
2769 if self.params.get('dump_single_json', False):
277d6ff5 2770 self.post_extract(res)
75d43ca0 2771 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2772
2773 return self._download_retcode
2774
1dcc4c0c 2775 def download_with_info_file(self, info_filename):
31bd3925
JMF
2776 with contextlib.closing(fileinput.FileInput(
2777 [info_filename], mode='r',
2778 openhook=fileinput.hook_encoded('utf-8'))) as f:
2779 # FileInput doesn't have a read method, we can't call json.load
498f5606 2780 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2781 try:
2782 self.process_ie_result(info, download=True)
498f5606 2783 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2784 webpage_url = info.get('webpage_url')
2785 if webpage_url is not None:
6febd1c1 2786 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2787 return self.download([webpage_url])
2788 else:
2789 raise
2790 return self._download_retcode
1dcc4c0c 2791
cb202fd2 2792 @staticmethod
75d43ca0 2793 def filter_requested_info(info_dict, actually_filter=True):
4ec82a72 2794 info_dict.pop('__original_infodict', None) # Always remove this
75d43ca0 2795 if not actually_filter:
394dcd44 2796 info_dict['epoch'] = int(time.time())
75d43ca0 2797 return info_dict
5226731e 2798 exceptions = {
498f5606 2799 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
5226731e 2800 'keep': ['_type'],
2801 }
2802 keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2803 filter_fn = lambda obj: (
a515a78d 2804 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2805 else obj if not isinstance(obj, dict)
2806 else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
5226731e 2807 return filter_fn(info_dict)
cb202fd2 2808
dcf64d43 2809 def run_pp(self, pp, infodict):
5bfa4862 2810 files_to_delete = []
dcf64d43 2811 if '__files_to_move' not in infodict:
2812 infodict['__files_to_move'] = {}
af819c21 2813 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2814 if not files_to_delete:
dcf64d43 2815 return infodict
5bfa4862 2816
2817 if self.params.get('keepvideo', False):
2818 for f in files_to_delete:
dcf64d43 2819 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2820 else:
2821 for old_filename in set(files_to_delete):
2822 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2823 try:
2824 os.remove(encodeFilename(old_filename))
2825 except (IOError, OSError):
2826 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2827 if old_filename in infodict['__files_to_move']:
2828 del infodict['__files_to_move'][old_filename]
2829 return infodict
5bfa4862 2830
277d6ff5 2831 @staticmethod
2832 def post_extract(info_dict):
2833 def actual_post_extract(info_dict):
2834 if info_dict.get('_type') in ('playlist', 'multi_video'):
2835 for video_dict in info_dict.get('entries', {}):
b050d210 2836 actual_post_extract(video_dict or {})
277d6ff5 2837 return
2838
07cce701 2839 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2840 extra = post_extractor().items()
2841 info_dict.update(extra)
07cce701 2842 info_dict.pop('__post_extractor', None)
277d6ff5 2843
4ec82a72 2844 original_infodict = info_dict.get('__original_infodict') or {}
2845 original_infodict.update(extra)
2846 original_infodict.pop('__post_extractor', None)
2847
b050d210 2848 actual_post_extract(info_dict or {})
277d6ff5 2849
56d868db 2850 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2851 info = dict(ie_info)
56d868db 2852 info['__files_to_move'] = files_to_move or {}
2853 for pp in self._pps[key]:
dcf64d43 2854 info = self.run_pp(pp, info)
56d868db 2855 return info, info.pop('__files_to_move', None)
5bfa4862 2856
dcf64d43 2857 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2858 """Run all the postprocessors on the given file."""
2859 info = dict(ie_info)
2860 info['filepath'] = filename
dcf64d43 2861 info['__files_to_move'] = files_to_move or {}
0202b52a 2862
56d868db 2863 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2864 info = self.run_pp(pp, info)
2865 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2866 del info['__files_to_move']
56d868db 2867 for pp in self._pps['after_move']:
dcf64d43 2868 info = self.run_pp(pp, info)
23c1a667 2869 return info
c1c9a79c 2870
5db07df6 2871 def _make_archive_id(self, info_dict):
e9fef7ee
S
2872 video_id = info_dict.get('id')
2873 if not video_id:
2874 return
5db07df6
PH
2875 # Future-proof against any change in case
2876 # and backwards compatibility with prior versions
e9fef7ee 2877 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2878 if extractor is None:
1211bb6d
S
2879 url = str_or_none(info_dict.get('url'))
2880 if not url:
2881 return
e9fef7ee
S
2882 # Try to find matching extractor for the URL and take its ie_key
2883 for ie in self._ies:
1211bb6d 2884 if ie.suitable(url):
e9fef7ee
S
2885 extractor = ie.ie_key()
2886 break
2887 else:
2888 return
d0757229 2889 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2890
2891 def in_download_archive(self, info_dict):
2892 fn = self.params.get('download_archive')
2893 if fn is None:
2894 return False
2895
2896 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2897 if not vid_id:
7012b23c 2898 return False # Incomplete video information
5db07df6 2899
a45e8619 2900 return vid_id in self.archive
c1c9a79c
PH
2901
2902 def record_download_archive(self, info_dict):
2903 fn = self.params.get('download_archive')
2904 if fn is None:
2905 return
5db07df6
PH
2906 vid_id = self._make_archive_id(info_dict)
2907 assert vid_id
c1c9a79c 2908 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2909 archive_file.write(vid_id + '\n')
a45e8619 2910 self.archive.add(vid_id)
dd82ffea 2911
8c51aa65 2912 @staticmethod
8abeeb94 2913 def format_resolution(format, default='unknown'):
fb04e403
PH
2914 if format.get('vcodec') == 'none':
2915 return 'audio only'
f49d89ee
PH
2916 if format.get('resolution') is not None:
2917 return format['resolution']
35615307
DA
2918 if format.get('width') and format.get('height'):
2919 res = '%dx%d' % (format['width'], format['height'])
2920 elif format.get('height'):
2921 res = '%sp' % format['height']
2922 elif format.get('width'):
388ae76b 2923 res = '%dx?' % format['width']
8c51aa65 2924 else:
8abeeb94 2925 res = default
8c51aa65
JMF
2926 return res
2927
c57f7757
PH
2928 def _format_note(self, fdict):
2929 res = ''
2930 if fdict.get('ext') in ['f4f', 'f4m']:
2931 res += '(unsupported) '
32f90364
PH
2932 if fdict.get('language'):
2933 if res:
2934 res += ' '
9016d76f 2935 res += '[%s] ' % fdict['language']
c57f7757
PH
2936 if fdict.get('format_note') is not None:
2937 res += fdict['format_note'] + ' '
2938 if fdict.get('tbr') is not None:
2939 res += '%4dk ' % fdict['tbr']
2940 if fdict.get('container') is not None:
2941 if res:
2942 res += ', '
2943 res += '%s container' % fdict['container']
3089bc74
S
2944 if (fdict.get('vcodec') is not None
2945 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2946 if res:
2947 res += ', '
2948 res += fdict['vcodec']
91c7271a 2949 if fdict.get('vbr') is not None:
c57f7757
PH
2950 res += '@'
2951 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2952 res += 'video@'
2953 if fdict.get('vbr') is not None:
2954 res += '%4dk' % fdict['vbr']
fbb21cf5 2955 if fdict.get('fps') is not None:
5d583bdf
S
2956 if res:
2957 res += ', '
2958 res += '%sfps' % fdict['fps']
c57f7757
PH
2959 if fdict.get('acodec') is not None:
2960 if res:
2961 res += ', '
2962 if fdict['acodec'] == 'none':
2963 res += 'video only'
2964 else:
2965 res += '%-5s' % fdict['acodec']
2966 elif fdict.get('abr') is not None:
2967 if res:
2968 res += ', '
2969 res += 'audio'
2970 if fdict.get('abr') is not None:
2971 res += '@%3dk' % fdict['abr']
2972 if fdict.get('asr') is not None:
2973 res += ' (%5dHz)' % fdict['asr']
2974 if fdict.get('filesize') is not None:
2975 if res:
2976 res += ', '
2977 res += format_bytes(fdict['filesize'])
9732d77e
PH
2978 elif fdict.get('filesize_approx') is not None:
2979 if res:
2980 res += ', '
2981 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2982 return res
91c7271a 2983
76d321f6 2984 def _format_note_table(self, f):
2985 def join_fields(*vargs):
2986 return ', '.join((val for val in vargs if val != ''))
2987
2988 return join_fields(
2989 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2990 format_field(f, 'language', '[%s]'),
2991 format_field(f, 'format_note'),
2992 format_field(f, 'container', ignore=(None, f.get('ext'))),
2993 format_field(f, 'asr', '%5dHz'))
2994
c57f7757 2995 def list_formats(self, info_dict):
94badb25 2996 formats = info_dict.get('formats', [info_dict])
53ed7066 2997 new_format = (
2998 'list-formats' not in self.params.get('compat_opts', [])
2999 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 3000 if new_format:
3001 table = [
3002 [
3003 format_field(f, 'format_id'),
3004 format_field(f, 'ext'),
3005 self.format_resolution(f),
3006 format_field(f, 'fps', '%d'),
3007 '|',
3008 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3009 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3010 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3011 '|',
3012 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3013 format_field(f, 'vbr', '%4dk'),
3014 format_field(f, 'acodec', default='unknown').replace('none', ''),
3015 format_field(f, 'abr', '%3dk'),
3016 format_field(f, 'asr', '%5dHz'),
3017 self._format_note_table(f)]
3018 for f in formats
3019 if f.get('preference') is None or f['preference'] >= -1000]
3020 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3021 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3022 else:
3023 table = [
3024 [
3025 format_field(f, 'format_id'),
3026 format_field(f, 'ext'),
3027 self.format_resolution(f),
3028 self._format_note(f)]
3029 for f in formats
3030 if f.get('preference') is None or f['preference'] >= -1000]
3031 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3032
cfb56d1a 3033 self.to_screen(
76d321f6 3034 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3035 header_line,
3036 table,
3037 delim=new_format,
3038 extraGap=(0 if new_format else 1),
3039 hideEmpty=new_format)))
cfb56d1a
PH
3040
3041 def list_thumbnails(self, info_dict):
3042 thumbnails = info_dict.get('thumbnails')
3043 if not thumbnails:
b7b72db9 3044 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3045 return
cfb56d1a
PH
3046
3047 self.to_screen(
3048 '[info] Thumbnails for %s:' % info_dict['id'])
3049 self.to_screen(render_table(
3050 ['ID', 'width', 'height', 'URL'],
3051 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3052
360e1ca5 3053 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3054 if not subtitles:
360e1ca5 3055 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3056 return
a504ced0 3057 self.to_screen(
edab9dbf 3058 'Available %s for %s:' % (name, video_id))
2412044c 3059
3060 def _row(lang, formats):
7aee40c1 3061 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
2412044c 3062 if len(set(names)) == 1:
7aee40c1 3063 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3064 return [lang, ', '.join(names), ', '.join(exts)]
3065
edab9dbf 3066 self.to_screen(render_table(
2412044c 3067 ['Language', 'Name', 'Formats'],
3068 [_row(lang, formats) for lang, formats in subtitles.items()],
3069 hideEmpty=True))
a504ced0 3070
dca08720
PH
3071 def urlopen(self, req):
3072 """ Start an HTTP download """
82d8a8b6 3073 if isinstance(req, compat_basestring):
67dda517 3074 req = sanitized_Request(req)
19a41fc6 3075 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3076
3077 def print_debug_header(self):
3078 if not self.params.get('verbose'):
3079 return
62fec3b2 3080
4192b51c 3081 if type('') is not compat_str:
067aa17e 3082 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3083 self.report_warning(
3084 'Your Python is broken! Update to a newer and supported version')
3085
c6afed48
PH
3086 stdout_encoding = getattr(
3087 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3088 encoding_str = (
734f90bb
PH
3089 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3090 locale.getpreferredencoding(),
3091 sys.getfilesystemencoding(),
c6afed48 3092 stdout_encoding,
b0472057 3093 self.get_encoding()))
4192b51c 3094 write_string(encoding_str, encoding=None)
734f90bb 3095
e5813e53 3096 source = (
3097 '(exe)' if hasattr(sys, 'frozen')
3098 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3099 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3100 else '')
3101 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3102 if _LAZY_LOADER:
f74980cb 3103 self._write_string('[debug] Lazy loading extractors enabled\n')
3104 if _PLUGIN_CLASSES:
3105 self._write_string(
3106 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3107 if self.params.get('compat_opts'):
3108 self._write_string(
3109 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3110 try:
3111 sp = subprocess.Popen(
3112 ['git', 'rev-parse', '--short', 'HEAD'],
3113 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3114 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3115 out, err = process_communicate_or_kill(sp)
dca08720
PH
3116 out = out.decode().strip()
3117 if re.match('[0-9a-f]+', out):
f74980cb 3118 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3119 except Exception:
dca08720
PH
3120 try:
3121 sys.exc_clear()
70a1165b 3122 except Exception:
dca08720 3123 pass
b300cda4
S
3124
3125 def python_implementation():
3126 impl_name = platform.python_implementation()
3127 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3128 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3129 return impl_name
3130
e5813e53 3131 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3132 platform.python_version(),
3133 python_implementation(),
3134 platform.architecture()[0],
b300cda4 3135 platform_name()))
d28b5171 3136
73fac4e9 3137 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3138 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3139 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3140 exe_str = ', '.join(
3141 '%s %s' % (exe, v)
3142 for exe, v in sorted(exe_versions.items())
3143 if v
3144 )
3145 if not exe_str:
3146 exe_str = 'none'
3147 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3148
3149 proxy_map = {}
3150 for handler in self._opener.handlers:
3151 if hasattr(handler, 'proxies'):
3152 proxy_map.update(handler.proxies)
734f90bb 3153 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3154
58b1f00d
PH
3155 if self.params.get('call_home', False):
3156 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3157 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3158 return
58b1f00d
PH
3159 latest_version = self.urlopen(
3160 'https://yt-dl.org/latest/version').read().decode('utf-8')
3161 if version_tuple(latest_version) > version_tuple(__version__):
3162 self.report_warning(
3163 'You are using an outdated version (newest version: %s)! '
3164 'See https://yt-dl.org/update if you need help updating.' %
3165 latest_version)
3166
e344693b 3167 def _setup_opener(self):
6ad14cab 3168 timeout_val = self.params.get('socket_timeout')
19a41fc6 3169 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3170
dca08720
PH
3171 opts_cookiefile = self.params.get('cookiefile')
3172 opts_proxy = self.params.get('proxy')
3173
3174 if opts_cookiefile is None:
3175 self.cookiejar = compat_cookiejar.CookieJar()
3176 else:
590bc6f6 3177 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3178 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3179 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3180 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3181
6a3f4c3f 3182 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3183 if opts_proxy is not None:
3184 if opts_proxy == '':
3185 proxies = {}
3186 else:
3187 proxies = {'http': opts_proxy, 'https': opts_proxy}
3188 else:
3189 proxies = compat_urllib_request.getproxies()
067aa17e 3190 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3191 if 'http' in proxies and 'https' not in proxies:
3192 proxies['https'] = proxies['http']
91410c9b 3193 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3194
3195 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3196 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3197 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3198 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3199 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3200
3201 # When passing our own FileHandler instance, build_opener won't add the
3202 # default FileHandler and allows us to disable the file protocol, which
3203 # can be used for malicious purposes (see
067aa17e 3204 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3205 file_handler = compat_urllib_request.FileHandler()
3206
3207 def file_open(*args, **kwargs):
7a5c1cfe 3208 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3209 file_handler.file_open = file_open
3210
3211 opener = compat_urllib_request.build_opener(
fca6dba8 3212 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3213
dca08720
PH
3214 # Delete the default user-agent header, which would otherwise apply in
3215 # cases where our custom HTTP handler doesn't come into play
067aa17e 3216 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3217 opener.addheaders = []
3218 self._opener = opener
62fec3b2
PH
3219
3220 def encode(self, s):
3221 if isinstance(s, bytes):
3222 return s # Already encoded
3223
3224 try:
3225 return s.encode(self.get_encoding())
3226 except UnicodeEncodeError as err:
3227 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3228 raise
3229
3230 def get_encoding(self):
3231 encoding = self.params.get('encoding')
3232 if encoding is None:
3233 encoding = preferredencoding()
3234 return encoding
ec82d85a 3235
de6000d9 3236 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3237 write_all = self.params.get('write_all_thumbnails', False)
3238 thumbnails = []
3239 if write_all or self.params.get('writethumbnail', False):
0202b52a 3240 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3241 multiple = write_all and len(thumbnails) > 1
ec82d85a 3242
0202b52a 3243 ret = []
6c4fd172 3244 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3245 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3246 suffix = '%s.' % t['id'] if multiple else ''
3247 thumb_display_id = '%s ' % t['id'] if multiple else ''
dcf64d43 3248 t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3249
0c3d0f51 3250 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3251 ret.append(suffix + thumb_ext)
ec82d85a
PH
3252 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3253 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3254 else:
5ef7d9bd 3255 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3256 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3257 try:
3258 uf = self.urlopen(t['url'])
d3d89c32 3259 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3260 shutil.copyfileobj(uf, thumbf)
de6000d9 3261 ret.append(suffix + thumb_ext)
ec82d85a
PH
3262 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3263 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3158150c 3264 except network_exceptions as err:
ec82d85a 3265 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3266 (t['url'], error_to_compat_str(err)))
6c4fd172 3267 if ret and not write_all:
3268 break
0202b52a 3269 return ret