]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Remove support for obsolete python versions
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
ce02ed60 51 determine_ext,
b5559424 52 determine_protocol,
732044af 53 DOT_DESKTOP_LINK_TEMPLATE,
54 DOT_URL_LINK_TEMPLATE,
55 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 56 DownloadError,
c0384f22 57 encode_compat_str,
ce02ed60 58 encodeFilename,
498f5606 59 EntryNotInPlaylist,
a06916d9 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
e29663c6 64 float_or_none,
02dbf93f 65 format_bytes,
76d321f6 66 format_field,
143db31d 67 FORMAT_RE,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
56a8fb4f 73 LazyList,
ce02ed60 74 locked_file,
0202b52a 75 make_dir,
dca08720 76 make_HTTPS_handler,
ce02ed60 77 MaxDownloadsReached,
3158150c 78 network_exceptions,
cd6fc19e 79 orderedSet,
a06916d9 80 OUTTMPL_TYPES,
b7ab0590 81 PagedList,
083c9df9 82 parse_filesize,
91410c9b 83 PerRequestProxyHandler,
dca08720 84 platform_name,
eedb7ba5 85 PostProcessingError,
ce02ed60 86 preferredencoding,
eedb7ba5 87 prepend_extension,
a06916d9 88 process_communicate_or_kill,
e8e73840 89 random_uuidv4,
51fb4995 90 register_socks_protocols,
a06916d9 91 RejectedVideoReached,
cfb56d1a 92 render_table,
eedb7ba5 93 replace_extension,
ce02ed60
PH
94 SameFileError,
95 sanitize_filename,
1bb5c511 96 sanitize_path,
dcf77cf1 97 sanitize_url,
67dda517 98 sanitized_Request,
e5660ee6 99 std_headers,
1211bb6d 100 str_or_none,
e29663c6 101 strftime_or_none,
ce02ed60 102 subtitles_filename,
732044af 103 to_high_limit_path,
a439a3a4 104 traverse_dict,
ce02ed60 105 UnavailableVideoError,
29eb5174 106 url_basename,
58b1f00d 107 version_tuple,
ce02ed60
PH
108 write_json_file,
109 write_string,
1bab3437 110 YoutubeDLCookieJar,
6a3f4c3f 111 YoutubeDLCookieProcessor,
dca08720 112 YoutubeDLHandler,
fca6dba8 113 YoutubeDLRedirectHandler,
ce02ed60 114)
a0e07d31 115from .cache import Cache
52a8a1e1 116from .extractor import (
117 gen_extractor_classes,
118 get_info_extractor,
119 _LAZY_LOADER,
120 _PLUGIN_CLASSES
121)
4c54b89e 122from .extractor.openload import PhantomJSwrapper
52a8a1e1 123from .downloader import (
124 get_suitable_downloader,
125 shorten_protocol_name
126)
4c83c967 127from .downloader.rtmp import rtmpdump_version
4f026faf 128from .postprocessor import (
f17f8651 129 FFmpegFixupM3u8PP,
62cd676c 130 FFmpegFixupM4aPP,
6271f1ca 131 FFmpegFixupStretchedPP,
4f026faf
PH
132 FFmpegMergerPP,
133 FFmpegPostProcessor,
0202b52a 134 # FFmpegSubtitlesConvertorPP,
4f026faf 135 get_postprocessor,
0202b52a 136 MoveFilesAfterDownloadPP,
4f026faf 137)
dca08720 138from .version import __version__
8222d8de 139
e9c0cdd3
YCH
140if compat_os_name == 'nt':
141 import ctypes
142
2459b6e1 143
8222d8de
JMF
144class YoutubeDL(object):
145 """YoutubeDL class.
146
147 YoutubeDL objects are the ones responsible of downloading the
148 actual video file and writing it to disk if the user has requested
149 it, among some other tasks. In most cases there should be one per
150 program. As, given a video URL, the downloader doesn't know how to
151 extract all the needed information, task that InfoExtractors do, it
152 has to pass the URL to one of them.
153
154 For this, YoutubeDL objects have a method that allows
155 InfoExtractors to be registered in a given order. When it is passed
156 a URL, the YoutubeDL object handles it to the first InfoExtractor it
157 finds that reports being able to handle it. The InfoExtractor extracts
158 all the information about the video or videos the URL refers to, and
159 YoutubeDL process the extracted information, possibly using a File
160 Downloader to download the video.
161
162 YoutubeDL objects accept a lot of parameters. In order not to saturate
163 the object constructor with arguments, it receives a dictionary of
164 options instead. These options are available through the params
165 attribute for the InfoExtractors to use. The YoutubeDL also
166 registers itself as the downloader in charge for the InfoExtractors
167 that are added to it, so this is a "mutual registration".
168
169 Available options:
170
171 username: Username for authentication purposes.
172 password: Password for authentication purposes.
180940e0 173 videopassword: Password for accessing a video.
1da50aa3
S
174 ap_mso: Adobe Pass multiple-system operator identifier.
175 ap_username: Multiple-system operator account username.
176 ap_password: Multiple-system operator account password.
8222d8de
JMF
177 usenetrc: Use netrc for authentication instead.
178 verbose: Print additional info to stdout.
179 quiet: Do not print messages to stdout.
ad8915b7 180 no_warnings: Do not print out anything for warnings.
53c18592 181 forceprint: A list of templates to force print
182 forceurl: Force printing final URL. (Deprecated)
183 forcetitle: Force printing title. (Deprecated)
184 forceid: Force printing ID. (Deprecated)
185 forcethumbnail: Force printing thumbnail URL. (Deprecated)
186 forcedescription: Force printing description. (Deprecated)
187 forcefilename: Force printing final filename. (Deprecated)
188 forceduration: Force printing duration. (Deprecated)
8694c600 189 forcejson: Force printing info_dict as JSON.
63e0be34
PH
190 dump_single_json: Force printing the info_dict of the whole playlist
191 (or video) as a single JSON line.
c25228e5 192 force_write_download_archive: Force writing download archive regardless
193 of 'skip_download' or 'simulate'.
8222d8de 194 simulate: Do not download the video files.
eb8a4433 195 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 196 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 197 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
198 extracting metadata even if the video is not actually
199 available for download (experimental)
c25228e5 200 format_sort: How to sort the video formats. see "Sorting Formats"
201 for more details.
202 format_sort_force: Force the given format_sort. see "Sorting Formats"
203 for more details.
204 allow_multiple_video_streams: Allow multiple video streams to be merged
205 into a single file
206 allow_multiple_audio_streams: Allow multiple audio streams to be merged
207 into a single file
4524baf0 208 paths: Dictionary of output paths. The allowed keys are 'home'
209 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 210 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 211 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
212 A string a also accepted for backward compatibility
a820dc72
RA
213 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
214 restrictfilenames: Do not allow "&" and spaces in file names
215 trim_file_name: Limit length of filename (extension excluded)
4524baf0 216 windowsfilenames: Force the filenames to be windows compatible
a820dc72 217 ignoreerrors: Do not stop on download errors
7a5c1cfe 218 (Default True when running yt-dlp,
a820dc72 219 but False when directly accessing YoutubeDL class)
26e2805c 220 skip_playlist_after_errors: Number of allowed failures until the rest of
221 the playlist is skipped
d22dec74 222 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 223 overwrites: Overwrite all video and metadata files if True,
224 overwrite only non-video files if None
225 and don't overwrite any file if False
8222d8de
JMF
226 playliststart: Playlist item to start at.
227 playlistend: Playlist item to end at.
c14e88f0 228 playlist_items: Specific indices of playlist to download.
ff815fe6 229 playlistreverse: Download playlist items in reverse order.
75822ca7 230 playlistrandom: Download playlist items in random order.
8222d8de
JMF
231 matchtitle: Download only matching titles.
232 rejecttitle: Reject downloads for matching titles.
8bf9319e 233 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
234 logtostderr: Log messages to stderr instead of stdout.
235 writedescription: Write the video description to a .description file
236 writeinfojson: Write the video description to a .info.json file
75d43ca0 237 clean_infojson: Remove private fields from the infojson
06167fbb 238 writecomments: Extract video comments. This will not be written to disk
239 unless writeinfojson is also given
1fb07d10 240 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 241 writethumbnail: Write the thumbnail image to a file
c25228e5 242 allow_playlist_files: Whether to write playlists' description, infojson etc
243 also to disk when using the 'write*' options
ec82d85a 244 write_all_thumbnails: Write all thumbnail formats to files
732044af 245 writelink: Write an internet shortcut file, depending on the
246 current platform (.url/.webloc/.desktop)
247 writeurllink: Write a Windows internet shortcut file (.url)
248 writewebloclink: Write a macOS internet shortcut file (.webloc)
249 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 250 writesubtitles: Write the video subtitles to a file
741dd8ea 251 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 252 allsubtitles: Deprecated - Use subtitlelangs = ['all']
253 Downloads all the subtitles of the video
0b7f3118 254 (requires writesubtitles or writeautomaticsub)
8222d8de 255 listsubtitles: Lists all available subtitles for the video
a504ced0 256 subtitlesformat: The format code for subtitles
c32b0aab 257 subtitleslangs: List of languages of the subtitles to download (can be regex).
258 The list may contain "all" to refer to all the available
259 subtitles. The language can be prefixed with a "-" to
260 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
261 keepvideo: Keep the video file after post-processing
262 daterange: A DateRange object, download only if the upload_date is in the range.
263 skip_download: Skip the actual download of the video file
c35f9e72 264 cachedir: Location of the cache files in the filesystem.
a0e07d31 265 False to disable filesystem cache.
47192f92 266 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
267 age_limit: An integer representing the user's age in years.
268 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
269 min_views: An integer representing the minimum view count the video
270 must have in order to not be skipped.
271 Videos without view count information are always
272 downloaded. None for no limit.
273 max_views: An integer representing the maximum view count.
274 Videos that are more popular than that are not
275 downloaded.
276 Videos without view count information are always
277 downloaded. None for no limit.
278 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
279 Videos already present in the file are not downloaded
280 again.
8a51f564 281 break_on_existing: Stop the download process after attempting to download a
282 file that is in the archive.
283 break_on_reject: Stop the download process when encountering a video that
284 has been filtered out.
285 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 286 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
287 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
288 At the moment, this is only supported by YouTube.
a1ee09e8 289 proxy: URL of the proxy server to use
38cce791 290 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 291 on geo-restricted sites.
e344693b 292 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
293 bidi_workaround: Work around buggy terminals without bidirectional text
294 support, using fridibi
a0ddb8a2 295 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 296 include_ads: Download ads as well
04b4d394
PH
297 default_search: Prepend this string if an input url is not valid.
298 'auto' for elaborate guessing
62fec3b2 299 encoding: Use this encoding instead of the system-specified.
e8ee972c 300 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
301 Pass in 'in_playlist' to only show this behavior for
302 playlist items.
4f026faf 303 postprocessors: A list of dictionaries, each with an entry
71b640cc 304 * key: The name of the postprocessor. See
7a5c1cfe 305 yt_dlp/postprocessor/__init__.py for a list.
56d868db 306 * when: When to run the postprocessor. Can be one of
307 pre_process|before_dl|post_process|after_move.
308 Assumed to be 'post_process' if not given
ab8e5e51
AM
309 post_hooks: A list of functions that get called as the final step
310 for each video file, after all postprocessors have been
311 called. The filename will be passed as the only argument.
71b640cc
PH
312 progress_hooks: A list of functions that get called on download
313 progress, with a dictionary with the entries
5cda4eda 314 * status: One of "downloading", "error", or "finished".
ee69b99a 315 Check this first and ignore unknown values.
71b640cc 316
5cda4eda 317 If status is one of "downloading", or "finished", the
ee69b99a
PH
318 following properties may also be present:
319 * filename: The final filename (always present)
5cda4eda 320 * tmpfilename: The filename we're currently writing to
71b640cc
PH
321 * downloaded_bytes: Bytes on disk
322 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
323 * total_bytes_estimate: Guess of the eventual file size,
324 None if unavailable.
325 * elapsed: The number of seconds since download started.
71b640cc
PH
326 * eta: The estimated time in seconds, None if unknown
327 * speed: The download speed in bytes/second, None if
328 unknown
5cda4eda
PH
329 * fragment_index: The counter of the currently
330 downloaded video fragment.
331 * fragment_count: The number of fragments (= individual
332 files that will be merged)
71b640cc
PH
333
334 Progress hooks are guaranteed to be called at least once
335 (with status "finished") if the download is successful.
45598f15 336 merge_output_format: Extension to use when merging formats.
6b591b29 337 final_ext: Expected final extension; used to detect when the file was
338 already downloaded and converted. "merge_output_format" is
339 replaced by this extension when given
6271f1ca
PH
340 fixup: Automatically correct known faults of the file.
341 One of:
342 - "never": do nothing
343 - "warn": only emit a warning
344 - "detect_or_warn": check whether we can do anything
62cd676c 345 about it, warn otherwise (default)
504f20dd 346 source_address: Client-side IP address to bind to.
6ec6cb4e 347 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 348 yt-dlp servers for debugging. (BROKEN)
1cf376f5 349 sleep_interval_requests: Number of seconds to sleep between requests
350 during extraction
7aa589a5
S
351 sleep_interval: Number of seconds to sleep before each download when
352 used alone or a lower bound of a range for randomized
353 sleep before each download (minimum possible number
354 of seconds to sleep) when used along with
355 max_sleep_interval.
356 max_sleep_interval:Upper bound of a range for randomized sleep before each
357 download (maximum possible number of seconds to sleep).
358 Must only be used along with sleep_interval.
359 Actual sleep time will be a random float from range
360 [sleep_interval; max_sleep_interval].
1cf376f5 361 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
362 listformats: Print an overview of available video formats and exit.
363 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
364 match_filter: A function that gets called with the info_dict of
365 every video.
366 If it returns a message, the video is ignored.
367 If it returns None, the video is downloaded.
368 match_filter_func in utils.py is one example for this.
7e5db8c9 369 no_color: Do not emit color codes in output.
0a840f58 370 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 371 HTTP header
0a840f58 372 geo_bypass_country:
773f291d
S
373 Two-letter ISO 3166-2 country code that will be used for
374 explicit geographic restriction bypassing via faking
504f20dd 375 X-Forwarded-For HTTP header
5f95927a
S
376 geo_bypass_ip_block:
377 IP range in CIDR notation that will be used similarly to
504f20dd 378 geo_bypass_country
71b640cc 379
85729c51 380 The following options determine which downloader is picked:
52a8a1e1 381 external_downloader: A dictionary of protocol keys and the executable of the
382 external downloader to use for it. The allowed protocols
383 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
384 Set the value to 'native' to use the native downloader
385 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
386 or {'m3u8': 'ffmpeg'} instead.
387 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
388 if True, otherwise use ffmpeg/avconv if False, otherwise
389 use downloader suggested by extractor if None.
53ed7066 390 compat_opts: Compatibility options. See "Differences in default behavior".
18e674b4 391 Note that only format-sort, format-spec, no-live-chat,
392 no-attach-info-json, playlist-index, list-formats,
393 no-direct-merge, no-youtube-channel-redirect,
53ed7066 394 and no-youtube-unavailable-videos works when used via the API
fe7e0c98 395
8222d8de 396 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 397 the downloader (see yt_dlp/downloader/common.py):
8222d8de 398 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 399 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 400 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 401 http_chunk_size.
76b1bd67
JMF
402
403 The following options are used by the post processors:
d4a24f40 404 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 405 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
406 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
407 to the binary or its containing directory.
43820c03 408 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
409 and a list of additional command-line arguments for the
410 postprocessor/executable. The dict can also have "PP+EXE" keys
411 which are used when the given exe is used by the given PP.
412 Use 'default' as the name for arguments to passed to all PP
e409895f 413
414 The following options are used by the extractors:
62bff2c1 415 extractor_retries: Number of times to retry for known errors
416 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 417 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 418 discontinuities such as ad breaks (default: False)
3600fd59 419 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 420 data will be downloaded and processed by extractor.
421 You can reduce network I/O by disabling it if you don't
422 care about DASH. (only for youtube)
e409895f 423 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about HLS. (only for youtube)
8222d8de
JMF
427 """
428
c9969434
S
429 _NUMERIC_FIELDS = set((
430 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
431 'timestamp', 'upload_year', 'upload_month', 'upload_day',
432 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
433 'average_rating', 'comment_count', 'age_limit',
434 'start_time', 'end_time',
435 'chapter_number', 'season_number', 'episode_number',
436 'track_number', 'disc_number', 'release_year',
437 'playlist_index',
438 ))
439
8222d8de
JMF
440 params = None
441 _ies = []
56d868db 442 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 443 __prepare_filename_warned = False
1cf376f5 444 _first_webpage_request = True
8222d8de
JMF
445 _download_retcode = None
446 _num_downloads = None
30a074c2 447 _playlist_level = 0
448 _playlist_urls = set()
8222d8de
JMF
449 _screen_file = None
450
3511266b 451 def __init__(self, params=None, auto_init=True):
8222d8de 452 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
453 if params is None:
454 params = {}
8222d8de 455 self._ies = []
56c73665 456 self._ies_instances = {}
56d868db 457 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 458 self.__prepare_filename_warned = False
1cf376f5 459 self._first_webpage_request = True
ab8e5e51 460 self._post_hooks = []
933605d7 461 self._progress_hooks = []
8222d8de
JMF
462 self._download_retcode = 0
463 self._num_downloads = 0
464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 465 self._err_file = sys.stderr
4abf617b
S
466 self.params = {
467 # Default parameters
468 'nocheckcertificate': False,
469 }
470 self.params.update(params)
a0e07d31 471 self.cache = Cache(self)
34308b30 472
a61f4b28 473 if sys.version_info < (3, 6):
474 self.report_warning(
475 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
476 'Update to Python 3.6 or above' % sys.version_info[:2])
477
be5df5ee
S
478 def check_deprecated(param, option, suggestion):
479 if self.params.get(param) is not None:
53ed7066 480 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
481 return True
482 return False
483
484 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
485 if self.params.get('geo_verification_proxy') is None:
486 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
487
0d1bb027 488 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
489 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 490 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 491
492 for msg in self.params.get('warnings', []):
493 self.report_warning(msg)
494
6b591b29 495 if self.params.get('final_ext'):
496 if self.params.get('merge_output_format'):
497 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
498 self.params['merge_output_format'] = self.params['final_ext']
499
b9d973be 500 if 'overwrites' in self.params and self.params['overwrites'] is None:
501 del self.params['overwrites']
502
0783b09b 503 if params.get('bidi_workaround', False):
1c088fa8
PH
504 try:
505 import pty
506 master, slave = pty.openpty()
003c69a8 507 width = compat_get_terminal_size().columns
1c088fa8
PH
508 if width is None:
509 width_args = []
510 else:
511 width_args = ['-w', str(width)]
5d681e96 512 sp_kwargs = dict(
1c088fa8
PH
513 stdin=subprocess.PIPE,
514 stdout=slave,
515 stderr=self._err_file)
5d681e96
PH
516 try:
517 self._output_process = subprocess.Popen(
518 ['bidiv'] + width_args, **sp_kwargs
519 )
520 except OSError:
5d681e96
PH
521 self._output_process = subprocess.Popen(
522 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
523 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 524 except OSError as ose:
66e7ace1 525 if ose.errno == errno.ENOENT:
6febd1c1 526 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
527 else:
528 raise
0783b09b 529
3089bc74
S
530 if (sys.platform != 'win32'
531 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
532 and not params.get('restrictfilenames', False)):
e9137224 533 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 534 self.report_warning(
6febd1c1 535 'Assuming --restrict-filenames since file system encoding '
1b725173 536 'cannot encode all characters. '
6febd1c1 537 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 538 self.params['restrictfilenames'] = True
34308b30 539
de6000d9 540 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 541
dca08720
PH
542 self._setup_opener()
543
4cd0a709 544 """Preload the archive, if any is specified"""
545 def preload_download_archive(fn):
546 if fn is None:
547 return False
0760b0a7 548 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 549 try:
550 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
551 for line in archive_file:
552 self.archive.add(line.strip())
553 except IOError as ioe:
554 if ioe.errno != errno.ENOENT:
555 raise
556 return False
557 return True
558
559 self.archive = set()
560 preload_download_archive(self.params.get('download_archive'))
561
3511266b
PH
562 if auto_init:
563 self.print_debug_header()
564 self.add_default_info_extractors()
565
4f026faf
PH
566 for pp_def_raw in self.params.get('postprocessors', []):
567 pp_class = get_postprocessor(pp_def_raw['key'])
568 pp_def = dict(pp_def_raw)
569 del pp_def['key']
5bfa4862 570 if 'when' in pp_def:
571 when = pp_def['when']
572 del pp_def['when']
573 else:
56d868db 574 when = 'post_process'
4f026faf 575 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 576 self.add_post_processor(pp, when=when)
4f026faf 577
ab8e5e51
AM
578 for ph in self.params.get('post_hooks', []):
579 self.add_post_hook(ph)
580
71b640cc
PH
581 for ph in self.params.get('progress_hooks', []):
582 self.add_progress_hook(ph)
583
51fb4995
YCH
584 register_socks_protocols()
585
7d4111ed
PH
586 def warn_if_short_id(self, argv):
587 # short YouTube ID starting with dash?
588 idxs = [
589 i for i, a in enumerate(argv)
590 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
591 if idxs:
592 correct_argv = (
7a5c1cfe 593 ['yt-dlp']
3089bc74
S
594 + [a for i, a in enumerate(argv) if i not in idxs]
595 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
596 )
597 self.report_warning(
598 'Long argument string detected. '
599 'Use -- to separate parameters and URLs, like this:\n%s\n' %
600 args_to_str(correct_argv))
601
8222d8de
JMF
602 def add_info_extractor(self, ie):
603 """Add an InfoExtractor object to the end of the list."""
604 self._ies.append(ie)
e52d7f85
JMF
605 if not isinstance(ie, type):
606 self._ies_instances[ie.ie_key()] = ie
607 ie.set_downloader(self)
8222d8de 608
56c73665
JMF
609 def get_info_extractor(self, ie_key):
610 """
611 Get an instance of an IE with name ie_key, it will try to get one from
612 the _ies list, if there's no instance it will create a new one and add
613 it to the extractor list.
614 """
615 ie = self._ies_instances.get(ie_key)
616 if ie is None:
617 ie = get_info_extractor(ie_key)()
618 self.add_info_extractor(ie)
619 return ie
620
023fa8c4
JMF
621 def add_default_info_extractors(self):
622 """
623 Add the InfoExtractors returned by gen_extractors to the end of the list
624 """
e52d7f85 625 for ie in gen_extractor_classes():
023fa8c4
JMF
626 self.add_info_extractor(ie)
627
56d868db 628 def add_post_processor(self, pp, when='post_process'):
8222d8de 629 """Add a PostProcessor object to the end of the chain."""
5bfa4862 630 self._pps[when].append(pp)
8222d8de
JMF
631 pp.set_downloader(self)
632
ab8e5e51
AM
633 def add_post_hook(self, ph):
634 """Add the post hook"""
635 self._post_hooks.append(ph)
636
933605d7
JMF
637 def add_progress_hook(self, ph):
638 """Add the progress hook (currently only for the file downloader)"""
639 self._progress_hooks.append(ph)
8ab470f1 640
1c088fa8 641 def _bidi_workaround(self, message):
5d681e96 642 if not hasattr(self, '_output_channel'):
1c088fa8
PH
643 return message
644
5d681e96 645 assert hasattr(self, '_output_process')
11b85ce6 646 assert isinstance(message, compat_str)
6febd1c1
PH
647 line_count = message.count('\n') + 1
648 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 649 self._output_process.stdin.flush()
6febd1c1 650 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 651 for _ in range(line_count))
6febd1c1 652 return res[:-len('\n')]
1c088fa8 653
734f90bb 654 def _write_string(self, s, out=None):
b58ddb32 655 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 656
848887eb 657 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 658 """Print message to stdout"""
8bf9319e 659 if self.params.get('logger'):
43afe285 660 self.params['logger'].debug(message)
835a1478 661 elif not quiet or self.params.get('verbose'):
662 self._write_string(
663 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
664 self._err_file if quiet else self._screen_file)
8222d8de
JMF
665
666 def to_stderr(self, message):
0760b0a7 667 """Print message to stderr"""
11b85ce6 668 assert isinstance(message, compat_str)
8bf9319e 669 if self.params.get('logger'):
43afe285
IB
670 self.params['logger'].error(message)
671 else:
835a1478 672 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
8222d8de 673
1e5b9a95
PH
674 def to_console_title(self, message):
675 if not self.params.get('consoletitle', False):
676 return
4bede0d8
C
677 if compat_os_name == 'nt':
678 if ctypes.windll.kernel32.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 682 elif 'TERM' in os.environ:
b46696bd 683 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 684
bdde425c
PH
685 def save_console_title(self):
686 if not self.params.get('consoletitle', False):
687 return
94c3442e
S
688 if self.params.get('simulate', False):
689 return
4bede0d8 690 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 691 # Save the title on stack
734f90bb 692 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
693
694 def restore_console_title(self):
695 if not self.params.get('consoletitle', False):
696 return
94c3442e
S
697 if self.params.get('simulate', False):
698 return
4bede0d8 699 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 700 # Restore the title from stack
734f90bb 701 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
702
703 def __enter__(self):
704 self.save_console_title()
705 return self
706
707 def __exit__(self, *args):
708 self.restore_console_title()
f89197d7 709
dca08720 710 if self.params.get('cookiefile') is not None:
1bab3437 711 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 712
8222d8de
JMF
713 def trouble(self, message=None, tb=None):
714 """Determine action to take when a download problem appears.
715
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
719
720 tb, if given, is additional traceback information.
721 """
722 if message is not None:
723 self.to_stderr(message)
724 if self.params.get('verbose'):
725 if tb is None:
726 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 727 tb = ''
8222d8de 728 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 729 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 730 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
731 else:
732 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 733 tb = ''.join(tb_data)
c19bc311 734 if tb:
735 self.to_stderr(tb)
8222d8de
JMF
736 if not self.params.get('ignoreerrors', False):
737 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
738 exc_info = sys.exc_info()[1].exc_info
739 else:
740 exc_info = sys.exc_info()
741 raise DownloadError(message, exc_info)
742 self._download_retcode = 1
743
0760b0a7 744 def to_screen(self, message, skip_eol=False):
745 """Print message to stdout if not in quiet mode"""
746 self.to_stdout(
747 message, skip_eol, quiet=self.params.get('quiet', False))
748
8222d8de
JMF
749 def report_warning(self, message):
750 '''
751 Print the message to stderr, it will be prefixed with 'WARNING:'
752 If stderr is a tty file the 'WARNING:' will be colored
753 '''
6d07ce01
JMF
754 if self.params.get('logger') is not None:
755 self.params['logger'].warning(message)
8222d8de 756 else:
ad8915b7
PH
757 if self.params.get('no_warnings'):
758 return
e9c0cdd3 759 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
760 _msg_header = '\033[0;33mWARNING:\033[0m'
761 else:
762 _msg_header = 'WARNING:'
763 warning_message = '%s %s' % (_msg_header, message)
764 self.to_stderr(warning_message)
8222d8de
JMF
765
766 def report_error(self, message, tb=None):
767 '''
768 Do the same as trouble, but prefixes the message with 'ERROR:', colored
769 in red if stderr is a tty file.
770 '''
e9c0cdd3 771 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 772 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 773 else:
6febd1c1
PH
774 _msg_header = 'ERROR:'
775 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
776 self.trouble(error_message, tb)
777
0760b0a7 778 def write_debug(self, message):
779 '''Log debug message or Print message to stderr'''
780 if not self.params.get('verbose', False):
781 return
782 message = '[debug] %s' % message
783 if self.params.get('logger'):
784 self.params['logger'].debug(message)
785 else:
786 self._write_string('%s\n' % message)
787
8222d8de
JMF
788 def report_file_already_downloaded(self, file_name):
789 """Report file has already been fully downloaded."""
790 try:
6febd1c1 791 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 792 except UnicodeEncodeError:
6febd1c1 793 self.to_screen('[download] The file has already been downloaded')
8222d8de 794
0c3d0f51 795 def report_file_delete(self, file_name):
796 """Report that existing file will be deleted."""
797 try:
c25228e5 798 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 799 except UnicodeEncodeError:
c25228e5 800 self.to_screen('Deleting existing file')
0c3d0f51 801
de6000d9 802 def parse_outtmpl(self):
803 outtmpl_dict = self.params.get('outtmpl', {})
804 if not isinstance(outtmpl_dict, dict):
805 outtmpl_dict = {'default': outtmpl_dict}
806 outtmpl_dict.update({
807 k: v for k, v in DEFAULT_OUTTMPL.items()
808 if not outtmpl_dict.get(k)})
809 for key, val in outtmpl_dict.items():
810 if isinstance(val, bytes):
811 self.report_warning(
812 'Parameter outtmpl is bytes, but should be a unicode string. '
813 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
814 return outtmpl_dict
815
143db31d 816 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
817 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
818 template_dict = dict(info_dict)
a439a3a4 819 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 820
821 # duration_string
822 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 823 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 824 if info_dict.get('duration', None) is not None
825 else None)
826
827 # epoch
828 template_dict['epoch'] = int(time.time())
829
830 # autonumber
831 autonumber_size = self.params.get('autonumber_size')
832 if autonumber_size is None:
833 autonumber_size = 5
834 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
835
836 # resolution if not defined
837 if template_dict.get('resolution') is None:
838 if template_dict.get('width') and template_dict.get('height'):
839 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
840 elif template_dict.get('height'):
841 template_dict['resolution'] = '%sp' % template_dict['height']
842 elif template_dict.get('width'):
843 template_dict['resolution'] = '%dx?' % template_dict['width']
844
143db31d 845 # For fields playlist_index and autonumber convert all occurrences
846 # of %(field)s to %(field)0Nd for backward compatibility
847 field_size_compat_map = {
f59ae581 848 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
143db31d 849 'autonumber': autonumber_size,
850 }
851 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
852 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
853 if mobj:
854 outtmpl = re.sub(
855 FIELD_SIZE_COMPAT_RE,
856 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
857 outtmpl)
858
859 numeric_fields = list(self._NUMERIC_FIELDS)
a439a3a4 860 if sanitize is None:
861 sanitize = lambda k, v: v
143db31d 862
e625be0d 863 EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
864 # Field is of the form key1.key2...
865 # where keys (except first) can be string, int or slice
866 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
867 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
868 (?P<negate>-)?
869 (?P<fields>{0})
870 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
871 (?:>(?P<strf_format>.+?))?
872 (?:\|(?P<default>.*?))?
873 $'''.format(FIELD_RE))
874 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
875 MATH_FUNCTIONS = {
876 '+': float.__add__,
877 '-': float.__sub__,
878 }
879 for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
880 final_key = outer_mobj.group('key')
881 str_type = outer_mobj.group('type')
882 value = None
883 mobj = re.match(INTERNAL_FORMAT_RE, final_key)
884 if mobj is not None:
885 mobj = mobj.groupdict()
886 # Object traversal
887 fields = mobj['fields'].split('.')
888 value = traverse_dict(template_dict, fields)
889 # Negative
890 if mobj['negate']:
891 value = float_or_none(value)
892 if value is not None:
893 value *= -1
894 # Do maths
895 if mobj['maths']:
896 value = float_or_none(value)
897 operator = None
898 for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
899 if item == '':
900 value = None
901 if value is None:
902 break
903 if operator:
904 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
905 offset = float_or_none(item)
906 if offset is None:
907 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
908 try:
909 value = operator(value, multiplier * offset)
910 except (TypeError, ZeroDivisionError):
911 value = None
912 operator = None
913 else:
914 operator = MATH_FUNCTIONS[item]
915 # Datetime formatting
916 if mobj['strf_format']:
917 value = strftime_or_none(value, mobj['strf_format'])
918 # Set default
919 if value is None and mobj['default'] is not None:
920 value = mobj['default']
921 # Sanitize
922 if str_type in 'crs' and value is not None: # string
923 value = sanitize('%{}'.format(str_type) % fields[-1], value)
a439a3a4 924 else: # numeric
925 numeric_fields.append(final_key)
926 value = float_or_none(value)
143db31d 927 if value is not None:
a439a3a4 928 template_dict[final_key] = value
143db31d 929
930 # Missing numeric fields used together with integer presentation types
931 # in format specification will break the argument substitution since
932 # string NA placeholder is returned for missing fields. We will patch
933 # output template for missing fields to meet string presentation type.
934 for numeric_field in numeric_fields:
a439a3a4 935 if template_dict.get(numeric_field) is None:
143db31d 936 outtmpl = re.sub(
937 FORMAT_RE.format(re.escape(numeric_field)),
938 r'%({0})s'.format(numeric_field), outtmpl)
939
a439a3a4 940 template_dict = collections.defaultdict(lambda: na, (
941 (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
942 for k, v in template_dict.items() if v is not None))
143db31d 943 return outtmpl, template_dict
944
de6000d9 945 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 946 try:
586a91b6 947 sanitize = lambda k, v: sanitize_filename(
45598aab 948 compat_str(v),
1bb5c511 949 restricted=self.params.get('restrictfilenames'),
40df485f 950 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 951 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 952 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 953
15da37c7
S
954 # expand_path translates '%%' into '%' and '$$' into '$'
955 # correspondingly that is not what we want since we need to keep
956 # '%%' intact for template dict substitution step. Working around
957 # with boundary-alike separator hack.
961ea474 958 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
959 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
960
961 # outtmpl should be expand_path'ed before template dict substitution
962 # because meta fields may contain env variables we don't want to
963 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
964 # title "Hello $PATH", we don't want `$PATH` to be expanded.
965 filename = expand_path(outtmpl).replace(sep, '') % template_dict
966
143db31d 967 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 968 if force_ext is not None:
969 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
970
bdc3fd2f
U
971 # https://github.com/blackjack4494/youtube-dlc/issues/85
972 trim_file_name = self.params.get('trim_file_name', False)
973 if trim_file_name:
974 fn_groups = filename.rsplit('.')
975 ext = fn_groups[-1]
976 sub_ext = ''
977 if len(fn_groups) > 2:
978 sub_ext = fn_groups[-2]
979 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
980
0202b52a 981 return filename
8222d8de 982 except ValueError as err:
6febd1c1 983 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
984 return None
985
de6000d9 986 def prepare_filename(self, info_dict, dir_type='', warn=False):
987 """Generate the output filename."""
0202b52a 988 paths = self.params.get('paths', {})
989 assert isinstance(paths, dict)
de6000d9 990 filename = self._prepare_filename(info_dict, dir_type or 'default')
991
992 if warn and not self.__prepare_filename_warned:
993 if not paths:
994 pass
995 elif filename == '-':
996 self.report_warning('--paths is ignored when an outputting to stdout')
997 elif os.path.isabs(filename):
998 self.report_warning('--paths is ignored since an absolute path is given in output template')
999 self.__prepare_filename_warned = True
1000 if filename == '-' or not filename:
1001 return filename
1002
0202b52a 1003 homepath = expand_path(paths.get('home', '').strip())
1004 assert isinstance(homepath, compat_str)
1005 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1006 assert isinstance(subdir, compat_str)
c2934512 1007 path = os.path.join(homepath, subdir, filename)
1008
1009 # Temporary fix for #4787
1010 # 'Treat' all problem characters by passing filename through preferredencoding
1011 # to workaround encoding issues with subprocess on python2 @ Windows
1012 if sys.version_info < (3, 0) and sys.platform == 'win32':
1013 path = encodeFilename(path, True).decode(preferredencoding())
1014 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 1015
120fe513 1016 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1017 """ Returns None if the file should be downloaded """
8222d8de 1018
c77495e3 1019 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1020
8b0d7497 1021 def check_filter():
8b0d7497 1022 if 'title' in info_dict:
1023 # This can happen when we're just evaluating the playlist
1024 title = info_dict['title']
1025 matchtitle = self.params.get('matchtitle', False)
1026 if matchtitle:
1027 if not re.search(matchtitle, title, re.IGNORECASE):
1028 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029 rejecttitle = self.params.get('rejecttitle', False)
1030 if rejecttitle:
1031 if re.search(rejecttitle, title, re.IGNORECASE):
1032 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033 date = info_dict.get('upload_date')
1034 if date is not None:
1035 dateRange = self.params.get('daterange', DateRange())
1036 if date not in dateRange:
1037 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038 view_count = info_dict.get('view_count')
1039 if view_count is not None:
1040 min_views = self.params.get('min_views')
1041 if min_views is not None and view_count < min_views:
1042 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043 max_views = self.params.get('max_views')
1044 if max_views is not None and view_count > max_views:
1045 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1048
1049 if not incomplete:
1050 match_filter = self.params.get('match_filter')
1051 if match_filter is not None:
1052 ret = match_filter(info_dict)
1053 if ret is not None:
1054 return ret
1055 return None
1056
c77495e3 1057 if self.in_download_archive(info_dict):
1058 reason = '%s has already been recorded in the archive' % video_title
1059 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1060 else:
1061 reason = check_filter()
1062 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1063 if reason is not None:
120fe513 1064 if not silent:
1065 self.to_screen('[download] ' + reason)
c77495e3 1066 if self.params.get(break_opt, False):
1067 raise break_err()
8b0d7497 1068 return reason
fe7e0c98 1069
b6c45014
JMF
1070 @staticmethod
1071 def add_extra_info(info_dict, extra_info):
1072 '''Set the keys from extra_info in info dict if they are missing'''
1073 for key, value in extra_info.items():
1074 info_dict.setdefault(key, value)
1075
58f197b7 1076 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1077 process=True, force_generic_extractor=False):
41d1cca3 1078 """
1079 Return a list with a dictionary for each video extracted.
1080
1081 Arguments:
1082 url -- URL to extract
1083
1084 Keyword arguments:
1085 download -- whether to download videos during extraction
1086 ie_key -- extractor key hint
1087 extra_info -- dictionary containing the extra values to add to each result
1088 process -- whether to resolve all unresolved references (URLs, playlist items),
1089 must be True for download to work.
1090 force_generic_extractor -- force using the generic extractor
1091 """
fe7e0c98 1092
61aa5ba3 1093 if not ie_key and force_generic_extractor:
d22dec74
S
1094 ie_key = 'Generic'
1095
8222d8de 1096 if ie_key:
56c73665 1097 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1098 else:
1099 ies = self._ies
1100
1101 for ie in ies:
1102 if not ie.suitable(url):
1103 continue
1104
9a68de12 1105 ie_key = ie.ie_key()
1106 ie = self.get_info_extractor(ie_key)
8222d8de 1107 if not ie.working():
6febd1c1
PH
1108 self.report_warning('The program functionality for this site has been marked as broken, '
1109 'and will probably not work.')
8222d8de
JMF
1110
1111 try:
d0757229 1112 temp_id = str_or_none(
63be1aab 1113 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1114 else ie._match_id(url))
a0566bbf 1115 except (AssertionError, IndexError, AttributeError):
1116 temp_id = None
1117 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1118 self.to_screen("[%s] %s: has already been recorded in archive" % (
1119 ie_key, temp_id))
1120 break
58f197b7 1121 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1122 else:
1123 self.report_error('no suitable InfoExtractor for URL %s' % url)
1124
1125 def __handle_extraction_exceptions(func):
1126 def wrapper(self, *args, **kwargs):
1127 try:
1128 return func(self, *args, **kwargs)
773f291d
S
1129 except GeoRestrictedError as e:
1130 msg = e.msg
1131 if e.countries:
1132 msg += '\nThis video is available in %s.' % ', '.join(
1133 map(ISO3166Utils.short2full, e.countries))
1134 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1135 self.report_error(msg)
fb043a6e 1136 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1137 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1138 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1139 raise
8222d8de
JMF
1140 except Exception as e:
1141 if self.params.get('ignoreerrors', False):
9b9c5355 1142 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1143 else:
1144 raise
a0566bbf 1145 return wrapper
1146
1147 @__handle_extraction_exceptions
58f197b7 1148 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1149 ie_result = ie.extract(url)
1150 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1151 return
1152 if isinstance(ie_result, list):
1153 # Backwards compatibility: old IE result format
1154 ie_result = {
1155 '_type': 'compat_list',
1156 'entries': ie_result,
1157 }
a0566bbf 1158 self.add_default_extra_info(ie_result, ie, url)
1159 if process:
1160 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1161 else:
a0566bbf 1162 return ie_result
fe7e0c98 1163
ea38e55f
PH
1164 def add_default_extra_info(self, ie_result, ie, url):
1165 self.add_extra_info(ie_result, {
1166 'extractor': ie.IE_NAME,
1167 'webpage_url': url,
1168 'webpage_url_basename': url_basename(url),
1169 'extractor_key': ie.ie_key(),
1170 })
1171
8222d8de
JMF
1172 def process_ie_result(self, ie_result, download=True, extra_info={}):
1173 """
1174 Take the result of the ie(may be modified) and resolve all unresolved
1175 references (URLs, playlist items).
1176
1177 It will also download the videos if 'download'.
1178 Returns the resolved ie_result.
1179 """
e8ee972c
PH
1180 result_type = ie_result.get('_type', 'video')
1181
057a5206 1182 if result_type in ('url', 'url_transparent'):
134c6ea8 1183 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1184 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1185 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1186 or extract_flat is True):
de6000d9 1187 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1188 return ie_result
1189
8222d8de 1190 if result_type == 'video':
b6c45014 1191 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1192 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1193 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1194 if additional_urls:
1195 # TODO: Improve MetadataFromFieldPP to allow setting a list
1196 if isinstance(additional_urls, compat_str):
1197 additional_urls = [additional_urls]
1198 self.to_screen(
1199 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1200 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1201 ie_result['additional_entries'] = [
1202 self.extract_info(
1203 url, download, extra_info,
1204 force_generic_extractor=self.params.get('force_generic_extractor'))
1205 for url in additional_urls
1206 ]
1207 return ie_result
8222d8de
JMF
1208 elif result_type == 'url':
1209 # We have to add extra_info to the results because it may be
1210 # contained in a playlist
07cce701 1211 return self.extract_info(
1212 ie_result['url'], download,
1213 ie_key=ie_result.get('ie_key'),
1214 extra_info=extra_info)
7fc3fa05
PH
1215 elif result_type == 'url_transparent':
1216 # Use the information from the embedding page
1217 info = self.extract_info(
1218 ie_result['url'], ie_key=ie_result.get('ie_key'),
1219 extra_info=extra_info, download=False, process=False)
1220
1640eb09
S
1221 # extract_info may return None when ignoreerrors is enabled and
1222 # extraction failed with an error, don't crash and return early
1223 # in this case
1224 if not info:
1225 return info
1226
412c617d
PH
1227 force_properties = dict(
1228 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1229 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1230 if f in force_properties:
1231 del force_properties[f]
1232 new_result = info.copy()
1233 new_result.update(force_properties)
7fc3fa05 1234
0563f7ac
S
1235 # Extracted info may not be a video result (i.e.
1236 # info.get('_type', 'video') != video) but rather an url or
1237 # url_transparent. In such cases outer metadata (from ie_result)
1238 # should be propagated to inner one (info). For this to happen
1239 # _type of info should be overridden with url_transparent. This
067aa17e 1240 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1241 if new_result.get('_type') == 'url':
1242 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1243
1244 return self.process_ie_result(
1245 new_result, download=download, extra_info=extra_info)
40fcba5e 1246 elif result_type in ('playlist', 'multi_video'):
30a074c2 1247 # Protect from infinite recursion due to recursively nested playlists
1248 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1249 webpage_url = ie_result['webpage_url']
1250 if webpage_url in self._playlist_urls:
7e85e872 1251 self.to_screen(
30a074c2 1252 '[download] Skipping already downloaded playlist: %s'
1253 % ie_result.get('title') or ie_result.get('id'))
1254 return
7e85e872 1255
30a074c2 1256 self._playlist_level += 1
1257 self._playlist_urls.add(webpage_url)
bc516a3f 1258 self._sanitize_thumbnails(ie_result)
30a074c2 1259 try:
1260 return self.__process_playlist(ie_result, download)
1261 finally:
1262 self._playlist_level -= 1
1263 if not self._playlist_level:
1264 self._playlist_urls.clear()
8222d8de 1265 elif result_type == 'compat_list':
c9bf4114
PH
1266 self.report_warning(
1267 'Extractor %s returned a compat_list result. '
1268 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1269
8222d8de 1270 def _fixup(r):
9e1a5b84
JW
1271 self.add_extra_info(
1272 r,
9103bbc5
JMF
1273 {
1274 'extractor': ie_result['extractor'],
1275 'webpage_url': ie_result['webpage_url'],
29eb5174 1276 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1277 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1278 }
1279 )
8222d8de
JMF
1280 return r
1281 ie_result['entries'] = [
b6c45014 1282 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1283 for r in ie_result['entries']
1284 ]
1285 return ie_result
1286 else:
1287 raise Exception('Invalid result type: %s' % result_type)
1288
e92caff5 1289 def _ensure_dir_exists(self, path):
1290 return make_dir(path, self.report_error)
1291
30a074c2 1292 def __process_playlist(self, ie_result, download):
1293 # We process each entry in the playlist
1294 playlist = ie_result.get('title') or ie_result.get('id')
1295 self.to_screen('[download] Downloading playlist: %s' % playlist)
1296
498f5606 1297 if 'entries' not in ie_result:
1298 raise EntryNotInPlaylist()
1299 incomplete_entries = bool(ie_result.get('requested_entries'))
1300 if incomplete_entries:
1301 def fill_missing_entries(entries, indexes):
1302 ret = [None] * max(*indexes)
1303 for i, entry in zip(indexes, entries):
1304 ret[i - 1] = entry
1305 return ret
1306 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1307
30a074c2 1308 playlist_results = []
1309
56a8fb4f 1310 playliststart = self.params.get('playliststart', 1)
30a074c2 1311 playlistend = self.params.get('playlistend')
1312 # For backwards compatibility, interpret -1 as whole list
1313 if playlistend == -1:
1314 playlistend = None
1315
1316 playlistitems_str = self.params.get('playlist_items')
1317 playlistitems = None
1318 if playlistitems_str is not None:
1319 def iter_playlistitems(format):
1320 for string_segment in format.split(','):
1321 if '-' in string_segment:
1322 start, end = string_segment.split('-')
1323 for item in range(int(start), int(end) + 1):
1324 yield int(item)
1325 else:
1326 yield int(string_segment)
1327 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1328
1329 ie_entries = ie_result['entries']
56a8fb4f 1330 msg = (
1331 'Downloading %d videos' if not isinstance(ie_entries, list)
1332 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1333 if not isinstance(ie_entries, (list, PagedList)):
1334 ie_entries = LazyList(ie_entries)
1335
1336 entries = []
1337 for i in playlistitems or itertools.count(playliststart):
1338 if playlistitems is None and playlistend is not None and playlistend < i:
1339 break
1340 entry = None
1341 try:
1342 entry = ie_entries[i - 1]
1343 if entry is None:
498f5606 1344 raise EntryNotInPlaylist()
56a8fb4f 1345 except (IndexError, EntryNotInPlaylist):
1346 if incomplete_entries:
1347 raise EntryNotInPlaylist()
1348 elif not playlistitems:
1349 break
1350 entries.append(entry)
120fe513 1351 try:
1352 if entry is not None:
1353 self._match_entry(entry, incomplete=True, silent=True)
1354 except (ExistingVideoReached, RejectedVideoReached):
1355 break
56a8fb4f 1356 ie_result['entries'] = entries
30a074c2 1357
56a8fb4f 1358 # Save playlist_index before re-ordering
1359 entries = [
1360 ((playlistitems[i - 1] if playlistitems else i), entry)
1361 for i, entry in enumerate(entries, 1)
1362 if entry is not None]
1363 n_entries = len(entries)
498f5606 1364
498f5606 1365 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1366 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1367 ie_result['requested_entries'] = playlistitems
1368
1369 if self.params.get('allow_playlist_files', True):
1370 ie_copy = {
1371 'playlist': playlist,
1372 'playlist_id': ie_result.get('id'),
1373 'playlist_title': ie_result.get('title'),
1374 'playlist_uploader': ie_result.get('uploader'),
1375 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1376 'playlist_index': 0,
498f5606 1377 }
1378 ie_copy.update(dict(ie_result))
1379
1380 if self.params.get('writeinfojson', False):
1381 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1382 if not self._ensure_dir_exists(encodeFilename(infofn)):
1383 return
1384 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1385 self.to_screen('[info] Playlist metadata is already present')
1386 else:
1387 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1388 try:
1389 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1390 except (OSError, IOError):
1391 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1392
681de68e 1393 # TODO: This should be passed to ThumbnailsConvertor if necessary
1394 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1395
498f5606 1396 if self.params.get('writedescription', False):
1397 descfn = self.prepare_filename(ie_copy, 'pl_description')
1398 if not self._ensure_dir_exists(encodeFilename(descfn)):
1399 return
1400 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1401 self.to_screen('[info] Playlist description is already present')
1402 elif ie_result.get('description') is None:
1403 self.report_warning('There\'s no playlist description to write.')
1404 else:
1405 try:
1406 self.to_screen('[info] Writing playlist description to: ' + descfn)
1407 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1408 descfile.write(ie_result['description'])
1409 except (OSError, IOError):
1410 self.report_error('Cannot write playlist description file ' + descfn)
1411 return
30a074c2 1412
1413 if self.params.get('playlistreverse', False):
1414 entries = entries[::-1]
30a074c2 1415 if self.params.get('playlistrandom', False):
1416 random.shuffle(entries)
1417
1418 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1419
56a8fb4f 1420 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1421 failures = 0
1422 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1423 for i, entry_tuple in enumerate(entries, 1):
1424 playlist_index, entry = entry_tuple
53ed7066 1425 if 'playlist_index' in self.params.get('compat_options', []):
1426 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1427 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1428 # This __x_forwarded_for_ip thing is a bit ugly but requires
1429 # minimal changes
1430 if x_forwarded_for:
1431 entry['__x_forwarded_for_ip'] = x_forwarded_for
1432 extra = {
1433 'n_entries': n_entries,
f59ae581 1434 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1435 'playlist_index': playlist_index,
1436 'playlist_autonumber': i,
30a074c2 1437 'playlist': playlist,
1438 'playlist_id': ie_result.get('id'),
1439 'playlist_title': ie_result.get('title'),
1440 'playlist_uploader': ie_result.get('uploader'),
1441 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1442 'extractor': ie_result['extractor'],
1443 'webpage_url': ie_result['webpage_url'],
1444 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1445 'extractor_key': ie_result['extractor_key'],
1446 }
1447
1448 if self._match_entry(entry, incomplete=True) is not None:
1449 continue
1450
1451 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1452 if not entry_result:
1453 failures += 1
1454 if failures >= max_failures:
1455 self.report_error(
1456 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1457 break
30a074c2 1458 # TODO: skip failed (empty) entries?
1459 playlist_results.append(entry_result)
1460 ie_result['entries'] = playlist_results
1461 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1462 return ie_result
1463
a0566bbf 1464 @__handle_extraction_exceptions
1465 def __process_iterable_entry(self, entry, download, extra_info):
1466 return self.process_ie_result(
1467 entry, download=download, extra_info=extra_info)
1468
67134eab
JMF
1469 def _build_format_filter(self, filter_spec):
1470 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1471
1472 OPERATORS = {
1473 '<': operator.lt,
1474 '<=': operator.le,
1475 '>': operator.gt,
1476 '>=': operator.ge,
1477 '=': operator.eq,
1478 '!=': operator.ne,
1479 }
67134eab 1480 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1481 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1482 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1483 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1484 $
083c9df9 1485 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1486 m = operator_rex.search(filter_spec)
9ddb6925
S
1487 if m:
1488 try:
1489 comparison_value = int(m.group('value'))
1490 except ValueError:
1491 comparison_value = parse_filesize(m.group('value'))
1492 if comparison_value is None:
1493 comparison_value = parse_filesize(m.group('value') + 'B')
1494 if comparison_value is None:
1495 raise ValueError(
1496 'Invalid value %r in format specification %r' % (
67134eab 1497 m.group('value'), filter_spec))
9ddb6925
S
1498 op = OPERATORS[m.group('op')]
1499
083c9df9 1500 if not m:
9ddb6925
S
1501 STR_OPERATORS = {
1502 '=': operator.eq,
10d33b34
YCH
1503 '^=': lambda attr, value: attr.startswith(value),
1504 '$=': lambda attr, value: attr.endswith(value),
1505 '*=': lambda attr, value: value in attr,
9ddb6925 1506 }
67134eab 1507 str_operator_rex = re.compile(r'''(?x)
f96bff99 1508 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1509 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1510 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1511 \s*$
9ddb6925 1512 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1513 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1514 if m:
1515 comparison_value = m.group('value')
2cc779f4
S
1516 str_op = STR_OPERATORS[m.group('op')]
1517 if m.group('negation'):
e118a879 1518 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1519 else:
1520 op = str_op
083c9df9 1521
9ddb6925 1522 if not m:
67134eab 1523 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1524
1525 def _filter(f):
1526 actual_value = f.get(m.group('key'))
1527 if actual_value is None:
1528 return m.group('none_inclusive')
1529 return op(actual_value, comparison_value)
67134eab
JMF
1530 return _filter
1531
0017d9ad 1532 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1533
af0f7428
S
1534 def can_merge():
1535 merger = FFmpegMergerPP(self)
1536 return merger.available and merger.can_merge()
1537
91ebc640 1538 prefer_best = (
1539 not self.params.get('simulate', False)
1540 and download
1541 and (
1542 not can_merge()
19807826 1543 or info_dict.get('is_live', False)
de6000d9 1544 or self.outtmpl_dict['default'] == '-'))
53ed7066 1545 compat = (
1546 prefer_best
1547 or self.params.get('allow_multiple_audio_streams', False)
1548 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1549
1550 return (
53ed7066 1551 'best/bestvideo+bestaudio' if prefer_best
1552 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1553 else 'bestvideo+bestaudio/best')
0017d9ad 1554
67134eab
JMF
1555 def build_format_selector(self, format_spec):
1556 def syntax_error(note, start):
1557 message = (
1558 'Invalid format specification: '
1559 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1560 return SyntaxError(message)
1561
1562 PICKFIRST = 'PICKFIRST'
1563 MERGE = 'MERGE'
1564 SINGLE = 'SINGLE'
0130afb7 1565 GROUP = 'GROUP'
67134eab
JMF
1566 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1567
91ebc640 1568 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1569 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1570
e8e73840 1571 check_formats = self.params.get('check_formats')
1572
67134eab
JMF
1573 def _parse_filter(tokens):
1574 filter_parts = []
1575 for type, string, start, _, _ in tokens:
1576 if type == tokenize.OP and string == ']':
1577 return ''.join(filter_parts)
1578 else:
1579 filter_parts.append(string)
1580
232541df 1581 def _remove_unused_ops(tokens):
17cc1534 1582 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1583 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1584 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1585 last_string, last_start, last_end, last_line = None, None, None, None
1586 for type, string, start, end, line in tokens:
1587 if type == tokenize.OP and string == '[':
1588 if last_string:
1589 yield tokenize.NAME, last_string, last_start, last_end, last_line
1590 last_string = None
1591 yield type, string, start, end, line
1592 # everything inside brackets will be handled by _parse_filter
1593 for type, string, start, end, line in tokens:
1594 yield type, string, start, end, line
1595 if type == tokenize.OP and string == ']':
1596 break
1597 elif type == tokenize.OP and string in ALLOWED_OPS:
1598 if last_string:
1599 yield tokenize.NAME, last_string, last_start, last_end, last_line
1600 last_string = None
1601 yield type, string, start, end, line
1602 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1603 if not last_string:
1604 last_string = string
1605 last_start = start
1606 last_end = end
1607 else:
1608 last_string += string
1609 if last_string:
1610 yield tokenize.NAME, last_string, last_start, last_end, last_line
1611
cf2ac6df 1612 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1613 selectors = []
1614 current_selector = None
1615 for type, string, start, _, _ in tokens:
1616 # ENCODING is only defined in python 3.x
1617 if type == getattr(tokenize, 'ENCODING', None):
1618 continue
1619 elif type in [tokenize.NAME, tokenize.NUMBER]:
1620 current_selector = FormatSelector(SINGLE, string, [])
1621 elif type == tokenize.OP:
cf2ac6df
JMF
1622 if string == ')':
1623 if not inside_group:
1624 # ')' will be handled by the parentheses group
1625 tokens.restore_last_token()
67134eab 1626 break
cf2ac6df 1627 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1628 tokens.restore_last_token()
1629 break
cf2ac6df
JMF
1630 elif inside_choice and string == ',':
1631 tokens.restore_last_token()
1632 break
1633 elif string == ',':
0a31a350
JMF
1634 if not current_selector:
1635 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1636 selectors.append(current_selector)
1637 current_selector = None
1638 elif string == '/':
d96d604e
JMF
1639 if not current_selector:
1640 raise syntax_error('"/" must follow a format selector', start)
67134eab 1641 first_choice = current_selector
cf2ac6df 1642 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1643 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1644 elif string == '[':
1645 if not current_selector:
1646 current_selector = FormatSelector(SINGLE, 'best', [])
1647 format_filter = _parse_filter(tokens)
1648 current_selector.filters.append(format_filter)
0130afb7
JMF
1649 elif string == '(':
1650 if current_selector:
1651 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1652 group = _parse_format_selection(tokens, inside_group=True)
1653 current_selector = FormatSelector(GROUP, group, [])
67134eab 1654 elif string == '+':
d03cfdce 1655 if not current_selector:
1656 raise syntax_error('Unexpected "+"', start)
1657 selector_1 = current_selector
1658 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1659 if not selector_2:
1660 raise syntax_error('Expected a selector', start)
1661 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1662 else:
1663 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1664 elif type == tokenize.ENDMARKER:
1665 break
1666 if current_selector:
1667 selectors.append(current_selector)
1668 return selectors
1669
f8d4ad9a 1670 def _merge(formats_pair):
1671 format_1, format_2 = formats_pair
1672
1673 formats_info = []
1674 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1675 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1676
1677 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1678 get_no_more = {"video": False, "audio": False}
1679 for (i, fmt_info) in enumerate(formats_info):
1680 for aud_vid in ["audio", "video"]:
1681 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1682 if get_no_more[aud_vid]:
1683 formats_info.pop(i)
1684 get_no_more[aud_vid] = True
1685
1686 if len(formats_info) == 1:
1687 return formats_info[0]
1688
1689 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1690 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1691
1692 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1693 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1694
1695 output_ext = self.params.get('merge_output_format')
1696 if not output_ext:
1697 if the_only_video:
1698 output_ext = the_only_video['ext']
1699 elif the_only_audio and not video_fmts:
1700 output_ext = the_only_audio['ext']
1701 else:
1702 output_ext = 'mkv'
1703
1704 new_dict = {
1705 'requested_formats': formats_info,
1706 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1707 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1708 'ext': output_ext,
1709 }
1710
1711 if the_only_video:
1712 new_dict.update({
1713 'width': the_only_video.get('width'),
1714 'height': the_only_video.get('height'),
1715 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1716 'fps': the_only_video.get('fps'),
1717 'vcodec': the_only_video.get('vcodec'),
1718 'vbr': the_only_video.get('vbr'),
1719 'stretched_ratio': the_only_video.get('stretched_ratio'),
1720 })
1721
1722 if the_only_audio:
1723 new_dict.update({
1724 'acodec': the_only_audio.get('acodec'),
1725 'abr': the_only_audio.get('abr'),
1726 })
1727
1728 return new_dict
1729
e8e73840 1730 def _check_formats(formats):
1731 for f in formats:
1732 self.to_screen('[info] Testing format %s' % f['format_id'])
1733 paths = self.params.get('paths', {})
1734 temp_file = os.path.join(
1735 expand_path(paths.get('home', '').strip()),
1736 expand_path(paths.get('temp', '').strip()),
1737 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
fe346461 1738 try:
1739 dl, _ = self.dl(temp_file, f, test=True)
1740 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1741 dl = False
1742 finally:
1743 if os.path.exists(temp_file):
1744 os.remove(temp_file)
e8e73840 1745 if dl:
1746 yield f
1747 else:
1748 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1749
67134eab 1750 def _build_selector_function(selector):
909d24dd 1751 if isinstance(selector, list): # ,
67134eab
JMF
1752 fs = [_build_selector_function(s) for s in selector]
1753
317f7ab6 1754 def selector_function(ctx):
67134eab 1755 for f in fs:
317f7ab6 1756 for format in f(ctx):
67134eab
JMF
1757 yield format
1758 return selector_function
909d24dd 1759
1760 elif selector.type == GROUP: # ()
0130afb7 1761 selector_function = _build_selector_function(selector.selector)
909d24dd 1762
1763 elif selector.type == PICKFIRST: # /
67134eab
JMF
1764 fs = [_build_selector_function(s) for s in selector.selector]
1765
317f7ab6 1766 def selector_function(ctx):
67134eab 1767 for f in fs:
317f7ab6 1768 picked_formats = list(f(ctx))
67134eab
JMF
1769 if picked_formats:
1770 return picked_formats
1771 return []
67134eab 1772
909d24dd 1773 elif selector.type == SINGLE: # atom
598d185d 1774 format_spec = selector.selector or 'best'
909d24dd 1775
f8d4ad9a 1776 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1777 if format_spec == 'all':
1778 def selector_function(ctx):
1779 formats = list(ctx['formats'])
e8e73840 1780 if check_formats:
1781 formats = _check_formats(formats)
1782 for f in formats:
1783 yield f
f8d4ad9a 1784 elif format_spec == 'mergeall':
1785 def selector_function(ctx):
e8e73840 1786 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1787 if not formats:
1788 return
921b76ca 1789 merged_format = formats[-1]
1790 for f in formats[-2::-1]:
f8d4ad9a 1791 merged_format = _merge((merged_format, f))
1792 yield merged_format
909d24dd 1793
1794 else:
e8e73840 1795 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1796 mobj = re.match(
1797 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1798 format_spec)
1799 if mobj is not None:
1800 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1801 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1802 format_type = (mobj.group('type') or [None])[0]
1803 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1804 format_modified = mobj.group('mod') is not None
909d24dd 1805
1806 format_fallback = not format_type and not format_modified # for b, w
eff63539 1807 filter_f = (
1808 (lambda f: f.get('%scodec' % format_type) != 'none')
1809 if format_type and format_modified # bv*, ba*, wv*, wa*
1810 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1811 if format_type # bv, ba, wv, wa
1812 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1813 if not format_modified # b, w
1814 else None) # b*, w*
67134eab 1815 else:
909d24dd 1816 filter_f = ((lambda f: f.get('ext') == format_spec)
1817 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1818 else (lambda f: f.get('format_id') == format_spec)) # id
1819
1820 def selector_function(ctx):
1821 formats = list(ctx['formats'])
1822 if not formats:
1823 return
1824 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1825 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1826 # for extractors with incomplete formats (audio only (soundcloud)
1827 # or video only (imgur)) best/worst will fallback to
1828 # best/worst {video,audio}-only format
e8e73840 1829 matches = formats
1830 if format_reverse:
1831 matches = matches[::-1]
1832 if check_formats:
1833 matches = list(itertools.islice(_check_formats(matches), format_idx))
1834 n = len(matches)
1835 if -n <= format_idx - 1 < n:
1836 yield matches[format_idx - 1]
909d24dd 1837
1838 elif selector.type == MERGE: # +
d03cfdce 1839 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1840
317f7ab6
S
1841 def selector_function(ctx):
1842 for pair in itertools.product(
d03cfdce 1843 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1844 yield _merge(pair)
083c9df9 1845
67134eab 1846 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1847
317f7ab6
S
1848 def final_selector(ctx):
1849 ctx_copy = copy.deepcopy(ctx)
67134eab 1850 for _filter in filters:
317f7ab6
S
1851 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1852 return selector_function(ctx_copy)
67134eab 1853 return final_selector
083c9df9 1854
67134eab 1855 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1856 try:
232541df 1857 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1858 except tokenize.TokenError:
1859 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1860
1861 class TokenIterator(object):
1862 def __init__(self, tokens):
1863 self.tokens = tokens
1864 self.counter = 0
1865
1866 def __iter__(self):
1867 return self
1868
1869 def __next__(self):
1870 if self.counter >= len(self.tokens):
1871 raise StopIteration()
1872 value = self.tokens[self.counter]
1873 self.counter += 1
1874 return value
1875
1876 next = __next__
1877
1878 def restore_last_token(self):
1879 self.counter -= 1
1880
1881 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1882 return _build_selector_function(parsed_selector)
a9c58ad9 1883
e5660ee6
JMF
1884 def _calc_headers(self, info_dict):
1885 res = std_headers.copy()
1886
1887 add_headers = info_dict.get('http_headers')
1888 if add_headers:
1889 res.update(add_headers)
1890
1891 cookies = self._calc_cookies(info_dict)
1892 if cookies:
1893 res['Cookie'] = cookies
1894
0016b84e
S
1895 if 'X-Forwarded-For' not in res:
1896 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1897 if x_forwarded_for_ip:
1898 res['X-Forwarded-For'] = x_forwarded_for_ip
1899
e5660ee6
JMF
1900 return res
1901
1902 def _calc_cookies(self, info_dict):
5c2266df 1903 pr = sanitized_Request(info_dict['url'])
e5660ee6 1904 self.cookiejar.add_cookie_header(pr)
662435f7 1905 return pr.get_header('Cookie')
e5660ee6 1906
bc516a3f 1907 @staticmethod
1908 def _sanitize_thumbnails(info_dict):
1909 thumbnails = info_dict.get('thumbnails')
1910 if thumbnails is None:
1911 thumbnail = info_dict.get('thumbnail')
1912 if thumbnail:
1913 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1914 if thumbnails:
1915 thumbnails.sort(key=lambda t: (
1916 t.get('preference') if t.get('preference') is not None else -1,
1917 t.get('width') if t.get('width') is not None else -1,
1918 t.get('height') if t.get('height') is not None else -1,
1919 t.get('id') if t.get('id') is not None else '',
1920 t.get('url')))
1921 for i, t in enumerate(thumbnails):
1922 t['url'] = sanitize_url(t['url'])
1923 if t.get('width') and t.get('height'):
1924 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1925 if t.get('id') is None:
1926 t['id'] = '%d' % i
1927
dd82ffea
JMF
1928 def process_video_result(self, info_dict, download=True):
1929 assert info_dict.get('_type', 'video') == 'video'
1930
bec1fad2
PH
1931 if 'id' not in info_dict:
1932 raise ExtractorError('Missing "id" field in extractor result')
1933 if 'title' not in info_dict:
1934 raise ExtractorError('Missing "title" field in extractor result')
1935
c9969434
S
1936 def report_force_conversion(field, field_not, conversion):
1937 self.report_warning(
1938 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1939 % (field, field_not, conversion))
1940
1941 def sanitize_string_field(info, string_field):
1942 field = info.get(string_field)
1943 if field is None or isinstance(field, compat_str):
1944 return
1945 report_force_conversion(string_field, 'a string', 'string')
1946 info[string_field] = compat_str(field)
1947
1948 def sanitize_numeric_fields(info):
1949 for numeric_field in self._NUMERIC_FIELDS:
1950 field = info.get(numeric_field)
1951 if field is None or isinstance(field, compat_numeric_types):
1952 continue
1953 report_force_conversion(numeric_field, 'numeric', 'int')
1954 info[numeric_field] = int_or_none(field)
1955
1956 sanitize_string_field(info_dict, 'id')
1957 sanitize_numeric_fields(info_dict)
be6217b2 1958
dd82ffea
JMF
1959 if 'playlist' not in info_dict:
1960 # It isn't part of a playlist
1961 info_dict['playlist'] = None
1962 info_dict['playlist_index'] = None
1963
bc516a3f 1964 self._sanitize_thumbnails(info_dict)
d5519808 1965
b7b72db9 1966 if self.params.get('list_thumbnails'):
1967 self.list_thumbnails(info_dict)
1968 return
1969
536a55da 1970 thumbnail = info_dict.get('thumbnail')
bc516a3f 1971 thumbnails = info_dict.get('thumbnails')
536a55da
S
1972 if thumbnail:
1973 info_dict['thumbnail'] = sanitize_url(thumbnail)
1974 elif thumbnails:
d5519808
PH
1975 info_dict['thumbnail'] = thumbnails[-1]['url']
1976
c9ae7b95 1977 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1978 info_dict['display_id'] = info_dict['id']
1979
10db0d2f 1980 for ts_key, date_key in (
1981 ('timestamp', 'upload_date'),
1982 ('release_timestamp', 'release_date'),
1983 ):
1984 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1985 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1986 # see http://bugs.python.org/issue1646728)
1987 try:
1988 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1989 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1990 except (ValueError, OverflowError, OSError):
1991 pass
9d2ecdbc 1992
33d2fc2f
S
1993 # Auto generate title fields corresponding to the *_number fields when missing
1994 # in order to always have clean titles. This is very common for TV series.
1995 for field in ('chapter', 'season', 'episode'):
1996 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1997 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1998
05108a49
S
1999 for cc_kind in ('subtitles', 'automatic_captions'):
2000 cc = info_dict.get(cc_kind)
2001 if cc:
2002 for _, subtitle in cc.items():
2003 for subtitle_format in subtitle:
2004 if subtitle_format.get('url'):
2005 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2006 if subtitle_format.get('ext') is None:
2007 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2008
2009 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2010 subtitles = info_dict.get('subtitles')
4bba3716 2011
a504ced0 2012 if self.params.get('listsubtitles', False):
360e1ca5 2013 if 'automatic_captions' in info_dict:
05108a49
S
2014 self.list_subtitles(
2015 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2016 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2017 return
05108a49 2018
360e1ca5 2019 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2020 info_dict['id'], subtitles, automatic_captions)
a504ced0 2021
dd82ffea
JMF
2022 # We now pick which formats have to be downloaded
2023 if info_dict.get('formats') is None:
2024 # There's only one format available
2025 formats = [info_dict]
2026 else:
2027 formats = info_dict['formats']
2028
db95dc13 2029 if not formats:
b7da73eb 2030 if not self.params.get('ignore_no_formats_error'):
2031 raise ExtractorError('No video formats found!')
2032 else:
2033 self.report_warning('No video formats found!')
db95dc13 2034
73af5cc8
S
2035 def is_wellformed(f):
2036 url = f.get('url')
a5ac0c47 2037 if not url:
73af5cc8
S
2038 self.report_warning(
2039 '"url" field is missing or empty - skipping format, '
2040 'there is an error in extractor')
a5ac0c47
S
2041 return False
2042 if isinstance(url, bytes):
2043 sanitize_string_field(f, 'url')
2044 return True
73af5cc8
S
2045
2046 # Filter out malformed formats for better extraction robustness
2047 formats = list(filter(is_wellformed, formats))
2048
181c7053
S
2049 formats_dict = {}
2050
dd82ffea 2051 # We check that all the formats have the format and format_id fields
db95dc13 2052 for i, format in enumerate(formats):
c9969434
S
2053 sanitize_string_field(format, 'format_id')
2054 sanitize_numeric_fields(format)
dcf77cf1 2055 format['url'] = sanitize_url(format['url'])
e74e3b63 2056 if not format.get('format_id'):
8016c922 2057 format['format_id'] = compat_str(i)
e2effb08
S
2058 else:
2059 # Sanitize format_id from characters used in format selector expression
ec85ded8 2060 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2061 format_id = format['format_id']
2062 if format_id not in formats_dict:
2063 formats_dict[format_id] = []
2064 formats_dict[format_id].append(format)
2065
2066 # Make sure all formats have unique format_id
2067 for format_id, ambiguous_formats in formats_dict.items():
2068 if len(ambiguous_formats) > 1:
2069 for i, format in enumerate(ambiguous_formats):
2070 format['format_id'] = '%s-%d' % (format_id, i)
2071
2072 for i, format in enumerate(formats):
8c51aa65 2073 if format.get('format') is None:
6febd1c1 2074 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2075 id=format['format_id'],
2076 res=self.format_resolution(format),
6febd1c1 2077 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2078 )
c1002e96 2079 # Automatically determine file extension if missing
5b1d8575 2080 if format.get('ext') is None:
cce929ea 2081 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2082 # Automatically determine protocol if missing (useful for format
2083 # selection purposes)
6f0be937 2084 if format.get('protocol') is None:
b5559424 2085 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2086 # Add HTTP headers, so that external programs can use them from the
2087 # json output
2088 full_format_info = info_dict.copy()
2089 full_format_info.update(format)
2090 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2091 # Remove private housekeeping stuff
2092 if '__x_forwarded_for_ip' in info_dict:
2093 del info_dict['__x_forwarded_for_ip']
dd82ffea 2094
4bcc7bd1 2095 # TODO Central sorting goes here
99e206d5 2096
b7da73eb 2097 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2098 # only set the 'formats' fields if the original info_dict list them
2099 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2100 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2101 # which can't be exported to json
b3d9ef88 2102 info_dict['formats'] = formats
4ec82a72 2103
2104 info_dict, _ = self.pre_process(info_dict)
2105
cfb56d1a 2106 if self.params.get('listformats'):
b7da73eb 2107 if not info_dict.get('formats'):
2108 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2109 self.list_formats(info_dict)
2110 return
2111
de3ef3ed 2112 req_format = self.params.get('format')
a9c58ad9 2113 if req_format is None:
0017d9ad 2114 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2115 self.write_debug('Default format spec: %s' % req_format)
0017d9ad 2116
5acfa126 2117 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2118
2119 # While in format selection we may need to have an access to the original
2120 # format set in order to calculate some metrics or do some processing.
2121 # For now we need to be able to guess whether original formats provided
2122 # by extractor are incomplete or not (i.e. whether extractor provides only
2123 # video-only or audio-only formats) for proper formats selection for
2124 # extractors with such incomplete formats (see
067aa17e 2125 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2126 # Since formats may be filtered during format selection and may not match
2127 # the original formats the results may be incorrect. Thus original formats
2128 # or pre-calculated metrics should be passed to format selection routines
2129 # as well.
2130 # We will pass a context object containing all necessary additional data
2131 # instead of just formats.
2132 # This fixes incorrect format selection issue (see
067aa17e 2133 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2134 incomplete_formats = (
317f7ab6 2135 # All formats are video-only or
3089bc74 2136 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2137 # all formats are audio-only
3089bc74 2138 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2139
2140 ctx = {
2141 'formats': formats,
2142 'incomplete_formats': incomplete_formats,
2143 }
2144
2145 formats_to_download = list(format_selector(ctx))
dd82ffea 2146 if not formats_to_download:
b7da73eb 2147 if not self.params.get('ignore_no_formats_error'):
2148 raise ExtractorError('Requested format is not available', expected=True)
2149 else:
2150 self.report_warning('Requested format is not available')
2151 elif download:
2152 self.to_screen(
07cce701 2153 '[info] %s: Downloading %d format(s): %s' % (
2154 info_dict['id'], len(formats_to_download),
2155 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2156 for fmt in formats_to_download:
dd82ffea 2157 new_info = dict(info_dict)
4ec82a72 2158 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2159 new_info['__original_infodict'] = info_dict
b7da73eb 2160 new_info.update(fmt)
dd82ffea
JMF
2161 self.process_info(new_info)
2162 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2163 if formats_to_download:
2164 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2165 return info_dict
2166
98c70d6f 2167 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2168 """Select the requested subtitles and their format"""
98c70d6f
JMF
2169 available_subs = {}
2170 if normal_subtitles and self.params.get('writesubtitles'):
2171 available_subs.update(normal_subtitles)
2172 if automatic_captions and self.params.get('writeautomaticsub'):
2173 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2174 if lang not in available_subs:
2175 available_subs[lang] = cap_info
2176
4d171848
JMF
2177 if (not self.params.get('writesubtitles') and not
2178 self.params.get('writeautomaticsub') or not
2179 available_subs):
2180 return None
a504ced0 2181
c32b0aab 2182 all_sub_langs = available_subs.keys()
a504ced0 2183 if self.params.get('allsubtitles', False):
c32b0aab 2184 requested_langs = all_sub_langs
2185 elif self.params.get('subtitleslangs', False):
2186 requested_langs = set()
2187 for lang in self.params.get('subtitleslangs'):
2188 if lang == 'all':
2189 requested_langs.update(all_sub_langs)
2190 continue
2191 discard = lang[0] == '-'
2192 if discard:
2193 lang = lang[1:]
2194 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2195 if discard:
2196 for lang in current_langs:
2197 requested_langs.discard(lang)
2198 else:
2199 requested_langs.update(current_langs)
2200 elif 'en' in available_subs:
2201 requested_langs = ['en']
a504ced0 2202 else:
c32b0aab 2203 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2204 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2205
2206 formats_query = self.params.get('subtitlesformat', 'best')
2207 formats_preference = formats_query.split('/') if formats_query else []
2208 subs = {}
2209 for lang in requested_langs:
2210 formats = available_subs.get(lang)
2211 if formats is None:
2212 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2213 continue
a504ced0
JMF
2214 for ext in formats_preference:
2215 if ext == 'best':
2216 f = formats[-1]
2217 break
2218 matches = list(filter(lambda f: f['ext'] == ext, formats))
2219 if matches:
2220 f = matches[-1]
2221 break
2222 else:
2223 f = formats[-1]
2224 self.report_warning(
2225 'No subtitle format found matching "%s" for language %s, '
2226 'using %s' % (formats_query, lang, f['ext']))
2227 subs[lang] = f
2228 return subs
2229
d06daf23 2230 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2231 def print_mandatory(field, actual_field=None):
2232 if actual_field is None:
2233 actual_field = field
d06daf23 2234 if (self.params.get('force%s' % field, False)
53c18592 2235 and (not incomplete or info_dict.get(actual_field) is not None)):
2236 self.to_stdout(info_dict[actual_field])
d06daf23
S
2237
2238 def print_optional(field):
2239 if (self.params.get('force%s' % field, False)
2240 and info_dict.get(field) is not None):
2241 self.to_stdout(info_dict[field])
2242
53c18592 2243 info_dict = info_dict.copy()
2244 if filename is not None:
2245 info_dict['filename'] = filename
2246 if info_dict.get('requested_formats') is not None:
2247 # For RTMP URLs, also include the playpath
2248 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2249 elif 'url' in info_dict:
2250 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2251
2252 for tmpl in self.params.get('forceprint', []):
2253 if re.match(r'\w+$', tmpl):
2254 tmpl = '%({})s'.format(tmpl)
2255 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2256 self.to_stdout(tmpl % info_copy)
2257
d06daf23
S
2258 print_mandatory('title')
2259 print_mandatory('id')
53c18592 2260 print_mandatory('url', 'urls')
d06daf23
S
2261 print_optional('thumbnail')
2262 print_optional('description')
53c18592 2263 print_optional('filename')
d06daf23
S
2264 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2265 self.to_stdout(formatSeconds(info_dict['duration']))
2266 print_mandatory('format')
53c18592 2267
d06daf23 2268 if self.params.get('forcejson', False):
277d6ff5 2269 self.post_extract(info_dict)
75d43ca0 2270 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2271
e8e73840 2272 def dl(self, name, info, subtitle=False, test=False):
2273
2274 if test:
2275 verbose = self.params.get('verbose')
2276 params = {
2277 'test': True,
2278 'quiet': not verbose,
2279 'verbose': verbose,
2280 'noprogress': not verbose,
2281 'nopart': True,
2282 'skip_unavailable_fragments': False,
2283 'keep_fragments': False,
2284 'overwrites': True,
2285 '_no_ytdl_file': True,
2286 }
2287 else:
2288 params = self.params
2289 fd = get_suitable_downloader(info, params)(self, params)
2290 if not test:
2291 for ph in self._progress_hooks:
2292 fd.add_progress_hook(ph)
18e674b4 2293 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2294 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2295 new_info = dict(info)
2296 if new_info.get('http_headers') is None:
2297 new_info['http_headers'] = self._calc_headers(new_info)
2298 return fd.download(name, new_info, subtitle)
2299
8222d8de
JMF
2300 def process_info(self, info_dict):
2301 """Process a single resolved IE result."""
2302
2303 assert info_dict.get('_type', 'video') == 'video'
fd288278 2304
0202b52a 2305 info_dict.setdefault('__postprocessors', [])
2306
fd288278
PH
2307 max_downloads = self.params.get('max_downloads')
2308 if max_downloads is not None:
2309 if self._num_downloads >= int(max_downloads):
2310 raise MaxDownloadsReached()
8222d8de 2311
d06daf23 2312 # TODO: backward compatibility, to be removed
8222d8de 2313 info_dict['fulltitle'] = info_dict['title']
8222d8de 2314
11b85ce6 2315 if 'format' not in info_dict:
8222d8de
JMF
2316 info_dict['format'] = info_dict['ext']
2317
c77495e3 2318 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2319 return
2320
277d6ff5 2321 self.post_extract(info_dict)
fd288278 2322 self._num_downloads += 1
8222d8de 2323
dcf64d43 2324 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2325 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2326 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2327 files_to_move = {}
8222d8de
JMF
2328
2329 # Forced printings
0202b52a 2330 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2331
8222d8de 2332 if self.params.get('simulate', False):
2d30509f 2333 if self.params.get('force_write_download_archive', False):
2334 self.record_download_archive(info_dict)
2335
2336 # Do nothing else if in simulate mode
8222d8de
JMF
2337 return
2338
de6000d9 2339 if full_filename is None:
8222d8de
JMF
2340 return
2341
e92caff5 2342 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2343 return
e92caff5 2344 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2345 return
2346
2347 if self.params.get('writedescription', False):
de6000d9 2348 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2349 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2350 return
0c3d0f51 2351 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2352 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2353 elif info_dict.get('description') is None:
2354 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2355 else:
2356 try:
6febd1c1 2357 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2358 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2359 descfile.write(info_dict['description'])
7b6fefc9 2360 except (OSError, IOError):
6febd1c1 2361 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2362 return
8222d8de 2363
1fb07d10 2364 if self.params.get('writeannotations', False):
de6000d9 2365 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2366 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2367 return
0c3d0f51 2368 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2369 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2370 elif not info_dict.get('annotations'):
2371 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2372 else:
2373 try:
6febd1c1 2374 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2375 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2376 annofile.write(info_dict['annotations'])
2377 except (KeyError, TypeError):
6febd1c1 2378 self.report_warning('There are no annotations to write.')
7b6fefc9 2379 except (OSError, IOError):
6febd1c1 2380 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2381 return
1fb07d10 2382
c4a91be7 2383 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2384 self.params.get('writeautomaticsub')])
c4a91be7 2385
c84dd8a9 2386 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2387 # subtitles download errors are already managed as troubles in relevant IE
2388 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2389 subtitles = info_dict['requested_subtitles']
fa57af1e 2390 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2391 for sub_lang, sub_info in subtitles.items():
2392 sub_format = sub_info['ext']
56d868db 2393 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2394 sub_filename_final = subtitles_filename(
2395 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2396 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2397 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2398 sub_info['filepath'] = sub_filename
0202b52a 2399 files_to_move[sub_filename] = sub_filename_final
a504ced0 2400 else:
0c9df79e 2401 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2402 if sub_info.get('data') is not None:
2403 try:
2404 # Use newline='' to prevent conversion of newline characters
067aa17e 2405 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2406 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2407 subfile.write(sub_info['data'])
dcf64d43 2408 sub_info['filepath'] = sub_filename
0202b52a 2409 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2410 except (OSError, IOError):
2411 self.report_error('Cannot write subtitles file ' + sub_filename)
2412 return
7b6fefc9 2413 else:
5ff1bc0c 2414 try:
e8e73840 2415 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2416 sub_info['filepath'] = sub_filename
0202b52a 2417 files_to_move[sub_filename] = sub_filename_final
fe346461 2418 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2419 self.report_warning('Unable to download subtitle for "%s": %s' %
2420 (sub_lang, error_to_compat_str(err)))
2421 continue
8222d8de 2422
8222d8de 2423 if self.params.get('writeinfojson', False):
de6000d9 2424 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2425 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2426 return
0c3d0f51 2427 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2428 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2429 else:
66c935fb 2430 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2431 try:
75d43ca0 2432 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2433 except (OSError, IOError):
66c935fb 2434 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2435 return
de6000d9 2436 info_dict['__infojson_filename'] = infofn
8222d8de 2437
56d868db 2438 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2439 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2440 thumb_filename = replace_extension(
2441 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2442 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2443
732044af 2444 # Write internet shortcut files
2445 url_link = webloc_link = desktop_link = False
2446 if self.params.get('writelink', False):
2447 if sys.platform == "darwin": # macOS.
2448 webloc_link = True
2449 elif sys.platform.startswith("linux"):
2450 desktop_link = True
2451 else: # if sys.platform in ['win32', 'cygwin']:
2452 url_link = True
2453 if self.params.get('writeurllink', False):
2454 url_link = True
2455 if self.params.get('writewebloclink', False):
2456 webloc_link = True
2457 if self.params.get('writedesktoplink', False):
2458 desktop_link = True
2459
2460 if url_link or webloc_link or desktop_link:
2461 if 'webpage_url' not in info_dict:
2462 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2463 return
2464 ascii_url = iri_to_uri(info_dict['webpage_url'])
2465
2466 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2467 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2468 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2469 self.to_screen('[info] Internet shortcut is already present')
2470 else:
2471 try:
2472 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2473 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2474 template_vars = {'url': ascii_url}
2475 if embed_filename:
2476 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2477 linkfile.write(template % template_vars)
2478 except (OSError, IOError):
2479 self.report_error('Cannot write internet shortcut ' + linkfn)
2480 return False
2481 return True
2482
2483 if url_link:
2484 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2485 return
2486 if webloc_link:
2487 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2488 return
2489 if desktop_link:
2490 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2491 return
2492
56d868db 2493 try:
2494 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2495 except PostProcessingError as err:
2496 self.report_error('Preprocessing: %s' % str(err))
2497 return
2498
732044af 2499 must_record_download_archive = False
56d868db 2500 if self.params.get('skip_download', False):
2501 info_dict['filepath'] = temp_filename
2502 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2503 info_dict['__files_to_move'] = files_to_move
2504 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2505 else:
2506 # Download
4340deca 2507 try:
0202b52a 2508
6b591b29 2509 def existing_file(*filepaths):
2510 ext = info_dict.get('ext')
2511 final_ext = self.params.get('final_ext', ext)
2512 existing_files = []
2513 for file in orderedSet(filepaths):
2514 if final_ext != ext:
2515 converted = replace_extension(file, final_ext, ext)
2516 if os.path.exists(encodeFilename(converted)):
2517 existing_files.append(converted)
2518 if os.path.exists(encodeFilename(file)):
2519 existing_files.append(file)
2520
2521 if not existing_files or self.params.get('overwrites', False):
2522 for file in orderedSet(existing_files):
2523 self.report_file_delete(file)
2524 os.remove(encodeFilename(file))
2525 return None
2526
2527 self.report_file_already_downloaded(existing_files[0])
2528 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2529 return existing_files[0]
0202b52a 2530
2531 success = True
4340deca 2532 if info_dict.get('requested_formats') is not None:
81cd954a
S
2533
2534 def compatible_formats(formats):
d03cfdce 2535 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2536 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2537 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2538 if len(video_formats) > 2 or len(audio_formats) > 2:
2539 return False
2540
81cd954a 2541 # Check extension
d03cfdce 2542 exts = set(format.get('ext') for format in formats)
2543 COMPATIBLE_EXTS = (
2544 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2545 set(('webm',)),
2546 )
2547 for ext_sets in COMPATIBLE_EXTS:
2548 if ext_sets.issuperset(exts):
2549 return True
81cd954a
S
2550 # TODO: Check acodec/vcodec
2551 return False
2552
2553 requested_formats = info_dict['requested_formats']
0202b52a 2554 old_ext = info_dict['ext']
4d971a16 2555 if self.params.get('merge_output_format') is None:
2556 if not compatible_formats(requested_formats):
2557 info_dict['ext'] = 'mkv'
2558 self.report_warning(
2559 'Requested formats are incompatible for merge and will be merged into mkv.')
2560 if (info_dict['ext'] == 'webm'
2561 and self.params.get('writethumbnail', False)
2562 and info_dict.get('thumbnails')):
2563 info_dict['ext'] = 'mkv'
2564 self.report_warning(
2565 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2566
2567 def correct_ext(filename):
2568 filename_real_ext = os.path.splitext(filename)[1][1:]
2569 filename_wo_ext = (
2570 os.path.splitext(filename)[0]
2571 if filename_real_ext == old_ext
2572 else filename)
2573 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2574
38c6902b 2575 # Ensure filename always has a correct extension for successful merge
0202b52a 2576 full_filename = correct_ext(full_filename)
2577 temp_filename = correct_ext(temp_filename)
2578 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2579 info_dict['__real_download'] = False
18e674b4 2580
2581 _protocols = set(determine_protocol(f) for f in requested_formats)
2582 if len(_protocols) == 1:
2583 info_dict['protocol'] = _protocols.pop()
2584 directly_mergable = (
2585 'no-direct-merge' not in self.params.get('compat_opts', [])
2586 and info_dict.get('protocol') is not None # All requested formats have same protocol
2587 and not self.params.get('allow_unplayable_formats')
2588 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2589 if directly_mergable:
2590 info_dict['url'] = requested_formats[0]['url']
2591 # Treat it as a single download
2592 dl_filename = existing_file(full_filename, temp_filename)
2593 if dl_filename is None:
2594 success, real_download = self.dl(temp_filename, info_dict)
2595 info_dict['__real_download'] = real_download
2596 else:
2597 downloaded = []
2598 merger = FFmpegMergerPP(self)
2599 if self.params.get('allow_unplayable_formats'):
2600 self.report_warning(
2601 'You have requested merging of multiple formats '
2602 'while also allowing unplayable formats to be downloaded. '
2603 'The formats won\'t be merged to prevent data corruption.')
2604 elif not merger.available:
2605 self.report_warning(
2606 'You have requested merging of multiple formats but ffmpeg is not installed. '
2607 'The formats won\'t be merged.')
2608
2609 if dl_filename is None:
2610 for f in requested_formats:
2611 new_info = dict(info_dict)
2612 del new_info['requested_formats']
2613 new_info.update(f)
2614 fname = prepend_extension(
2615 self.prepare_filename(new_info, 'temp'),
2616 'f%s' % f['format_id'], new_info['ext'])
2617 if not self._ensure_dir_exists(fname):
2618 return
2619 downloaded.append(fname)
2620 partial_success, real_download = self.dl(fname, new_info)
2621 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2622 success = success and partial_success
2623 if merger.available and not self.params.get('allow_unplayable_formats'):
2624 info_dict['__postprocessors'].append(merger)
2625 info_dict['__files_to_merge'] = downloaded
2626 # Even if there were no downloads, it is being merged only now
2627 info_dict['__real_download'] = True
2628 else:
2629 for file in downloaded:
2630 files_to_move[file] = None
4340deca
P
2631 else:
2632 # Just a single file
0202b52a 2633 dl_filename = existing_file(full_filename, temp_filename)
2634 if dl_filename is None:
e8e73840 2635 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2636 info_dict['__real_download'] = real_download
2637
0202b52a 2638 dl_filename = dl_filename or temp_filename
c571435f 2639 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2640
3158150c 2641 except network_exceptions as err:
7960b056 2642 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2643 return
2644 except (OSError, IOError) as err:
2645 raise UnavailableVideoError(err)
2646 except (ContentTooShortError, ) as err:
2647 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2648 return
8222d8de 2649
de6000d9 2650 if success and full_filename != '-':
6271f1ca 2651 # Fixup content
62cd676c
PH
2652 fixup_policy = self.params.get('fixup')
2653 if fixup_policy is None:
2654 fixup_policy = 'detect_or_warn'
2655
e4172ac9 2656 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2657
6271f1ca
PH
2658 stretched_ratio = info_dict.get('stretched_ratio')
2659 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2660 if fixup_policy == 'warn':
2661 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2662 info_dict['id'], stretched_ratio))
2663 elif fixup_policy == 'detect_or_warn':
2664 stretched_pp = FFmpegFixupStretchedPP(self)
2665 if stretched_pp.available:
6271f1ca
PH
2666 info_dict['__postprocessors'].append(stretched_pp)
2667 else:
2668 self.report_warning(
d1e4a464
S
2669 '%s: Non-uniform pixel ratio (%s). %s'
2670 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2671 else:
62cd676c
PH
2672 assert fixup_policy in ('ignore', 'never')
2673
3089bc74 2674 if (info_dict.get('requested_formats') is None
6b591b29 2675 and info_dict.get('container') == 'm4a_dash'
2676 and info_dict.get('ext') == 'm4a'):
62cd676c 2677 if fixup_policy == 'warn':
d1e4a464
S
2678 self.report_warning(
2679 '%s: writing DASH m4a. '
2680 'Only some players support this container.'
2681 % info_dict['id'])
62cd676c
PH
2682 elif fixup_policy == 'detect_or_warn':
2683 fixup_pp = FFmpegFixupM4aPP(self)
2684 if fixup_pp.available:
62cd676c
PH
2685 info_dict['__postprocessors'].append(fixup_pp)
2686 else:
2687 self.report_warning(
d1e4a464
S
2688 '%s: writing DASH m4a. '
2689 'Only some players support this container. %s'
2690 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2691 else:
2692 assert fixup_policy in ('ignore', 'never')
6271f1ca 2693
0a473f2f 2694 if ('protocol' in info_dict
2695 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2696 if fixup_policy == 'warn':
a02682fd 2697 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2698 info_dict['id']))
2699 elif fixup_policy == 'detect_or_warn':
2700 fixup_pp = FFmpegFixupM3u8PP(self)
2701 if fixup_pp.available:
f17f8651 2702 info_dict['__postprocessors'].append(fixup_pp)
2703 else:
2704 self.report_warning(
a02682fd 2705 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2706 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2707 else:
2708 assert fixup_policy in ('ignore', 'never')
2709
8222d8de 2710 try:
23c1a667 2711 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2712 except PostProcessingError as err:
2713 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2714 return
ab8e5e51
AM
2715 try:
2716 for ph in self._post_hooks:
23c1a667 2717 ph(info_dict['filepath'])
ab8e5e51
AM
2718 except Exception as err:
2719 self.report_error('post hooks: %s' % str(err))
2720 return
2d30509f 2721 must_record_download_archive = True
2722
2723 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2724 self.record_download_archive(info_dict)
c3e6ffba 2725 max_downloads = self.params.get('max_downloads')
2726 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2727 raise MaxDownloadsReached()
8222d8de
JMF
2728
2729 def download(self, url_list):
2730 """Download a given list of URLs."""
de6000d9 2731 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2732 if (len(url_list) > 1
2733 and outtmpl != '-'
2734 and '%' not in outtmpl
2735 and self.params.get('max_downloads') != 1):
acd69589 2736 raise SameFileError(outtmpl)
8222d8de
JMF
2737
2738 for url in url_list:
2739 try:
5f6a1245 2740 # It also downloads the videos
61aa5ba3
S
2741 res = self.extract_info(
2742 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2743 except UnavailableVideoError:
6febd1c1 2744 self.report_error('unable to download video')
8222d8de 2745 except MaxDownloadsReached:
8b0d7497 2746 self.to_screen('[info] Maximum number of downloaded files reached')
2747 raise
2748 except ExistingVideoReached:
d83cb531 2749 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2750 raise
2751 except RejectedVideoReached:
d83cb531 2752 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2753 raise
63e0be34
PH
2754 else:
2755 if self.params.get('dump_single_json', False):
277d6ff5 2756 self.post_extract(res)
75d43ca0 2757 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2758
2759 return self._download_retcode
2760
1dcc4c0c 2761 def download_with_info_file(self, info_filename):
31bd3925
JMF
2762 with contextlib.closing(fileinput.FileInput(
2763 [info_filename], mode='r',
2764 openhook=fileinput.hook_encoded('utf-8'))) as f:
2765 # FileInput doesn't have a read method, we can't call json.load
498f5606 2766 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2767 try:
2768 self.process_ie_result(info, download=True)
498f5606 2769 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2770 webpage_url = info.get('webpage_url')
2771 if webpage_url is not None:
6febd1c1 2772 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2773 return self.download([webpage_url])
2774 else:
2775 raise
2776 return self._download_retcode
1dcc4c0c 2777
cb202fd2 2778 @staticmethod
75d43ca0 2779 def filter_requested_info(info_dict, actually_filter=True):
ae8f99e6 2780 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2781 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2782 if actually_filter:
2783 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries')
2784 empty_values = (None, {}, [], set(), tuple())
2785 reject = lambda k, v: k not in keep_keys and (
2786 k.startswith('_') or k in remove_keys or v in empty_values)
2787 else:
394dcd44 2788 info_dict['epoch'] = int(time.time())
ae8f99e6 2789 reject = lambda k, v: k in remove_keys
5226731e 2790 filter_fn = lambda obj: (
ae8f99e6 2791 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
a515a78d 2792 else obj if not isinstance(obj, dict)
ae8f99e6 2793 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2794 return filter_fn(info_dict)
cb202fd2 2795
dcf64d43 2796 def run_pp(self, pp, infodict):
5bfa4862 2797 files_to_delete = []
dcf64d43 2798 if '__files_to_move' not in infodict:
2799 infodict['__files_to_move'] = {}
af819c21 2800 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2801 if not files_to_delete:
dcf64d43 2802 return infodict
5bfa4862 2803
2804 if self.params.get('keepvideo', False):
2805 for f in files_to_delete:
dcf64d43 2806 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2807 else:
2808 for old_filename in set(files_to_delete):
2809 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2810 try:
2811 os.remove(encodeFilename(old_filename))
2812 except (IOError, OSError):
2813 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2814 if old_filename in infodict['__files_to_move']:
2815 del infodict['__files_to_move'][old_filename]
2816 return infodict
5bfa4862 2817
277d6ff5 2818 @staticmethod
2819 def post_extract(info_dict):
2820 def actual_post_extract(info_dict):
2821 if info_dict.get('_type') in ('playlist', 'multi_video'):
2822 for video_dict in info_dict.get('entries', {}):
b050d210 2823 actual_post_extract(video_dict or {})
277d6ff5 2824 return
2825
07cce701 2826 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2827 extra = post_extractor().items()
2828 info_dict.update(extra)
07cce701 2829 info_dict.pop('__post_extractor', None)
277d6ff5 2830
4ec82a72 2831 original_infodict = info_dict.get('__original_infodict') or {}
2832 original_infodict.update(extra)
2833 original_infodict.pop('__post_extractor', None)
2834
b050d210 2835 actual_post_extract(info_dict or {})
277d6ff5 2836
56d868db 2837 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2838 info = dict(ie_info)
56d868db 2839 info['__files_to_move'] = files_to_move or {}
2840 for pp in self._pps[key]:
dcf64d43 2841 info = self.run_pp(pp, info)
56d868db 2842 return info, info.pop('__files_to_move', None)
5bfa4862 2843
dcf64d43 2844 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2845 """Run all the postprocessors on the given file."""
2846 info = dict(ie_info)
2847 info['filepath'] = filename
dcf64d43 2848 info['__files_to_move'] = files_to_move or {}
0202b52a 2849
56d868db 2850 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2851 info = self.run_pp(pp, info)
2852 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2853 del info['__files_to_move']
56d868db 2854 for pp in self._pps['after_move']:
dcf64d43 2855 info = self.run_pp(pp, info)
23c1a667 2856 return info
c1c9a79c 2857
5db07df6 2858 def _make_archive_id(self, info_dict):
e9fef7ee
S
2859 video_id = info_dict.get('id')
2860 if not video_id:
2861 return
5db07df6
PH
2862 # Future-proof against any change in case
2863 # and backwards compatibility with prior versions
e9fef7ee 2864 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2865 if extractor is None:
1211bb6d
S
2866 url = str_or_none(info_dict.get('url'))
2867 if not url:
2868 return
e9fef7ee
S
2869 # Try to find matching extractor for the URL and take its ie_key
2870 for ie in self._ies:
1211bb6d 2871 if ie.suitable(url):
e9fef7ee
S
2872 extractor = ie.ie_key()
2873 break
2874 else:
2875 return
d0757229 2876 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2877
2878 def in_download_archive(self, info_dict):
2879 fn = self.params.get('download_archive')
2880 if fn is None:
2881 return False
2882
2883 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2884 if not vid_id:
7012b23c 2885 return False # Incomplete video information
5db07df6 2886
a45e8619 2887 return vid_id in self.archive
c1c9a79c
PH
2888
2889 def record_download_archive(self, info_dict):
2890 fn = self.params.get('download_archive')
2891 if fn is None:
2892 return
5db07df6
PH
2893 vid_id = self._make_archive_id(info_dict)
2894 assert vid_id
c1c9a79c 2895 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2896 archive_file.write(vid_id + '\n')
a45e8619 2897 self.archive.add(vid_id)
dd82ffea 2898
8c51aa65 2899 @staticmethod
8abeeb94 2900 def format_resolution(format, default='unknown'):
fb04e403
PH
2901 if format.get('vcodec') == 'none':
2902 return 'audio only'
f49d89ee
PH
2903 if format.get('resolution') is not None:
2904 return format['resolution']
35615307
DA
2905 if format.get('width') and format.get('height'):
2906 res = '%dx%d' % (format['width'], format['height'])
2907 elif format.get('height'):
2908 res = '%sp' % format['height']
2909 elif format.get('width'):
388ae76b 2910 res = '%dx?' % format['width']
8c51aa65 2911 else:
8abeeb94 2912 res = default
8c51aa65
JMF
2913 return res
2914
c57f7757
PH
2915 def _format_note(self, fdict):
2916 res = ''
2917 if fdict.get('ext') in ['f4f', 'f4m']:
2918 res += '(unsupported) '
32f90364
PH
2919 if fdict.get('language'):
2920 if res:
2921 res += ' '
9016d76f 2922 res += '[%s] ' % fdict['language']
c57f7757
PH
2923 if fdict.get('format_note') is not None:
2924 res += fdict['format_note'] + ' '
2925 if fdict.get('tbr') is not None:
2926 res += '%4dk ' % fdict['tbr']
2927 if fdict.get('container') is not None:
2928 if res:
2929 res += ', '
2930 res += '%s container' % fdict['container']
3089bc74
S
2931 if (fdict.get('vcodec') is not None
2932 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2933 if res:
2934 res += ', '
2935 res += fdict['vcodec']
91c7271a 2936 if fdict.get('vbr') is not None:
c57f7757
PH
2937 res += '@'
2938 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2939 res += 'video@'
2940 if fdict.get('vbr') is not None:
2941 res += '%4dk' % fdict['vbr']
fbb21cf5 2942 if fdict.get('fps') is not None:
5d583bdf
S
2943 if res:
2944 res += ', '
2945 res += '%sfps' % fdict['fps']
c57f7757
PH
2946 if fdict.get('acodec') is not None:
2947 if res:
2948 res += ', '
2949 if fdict['acodec'] == 'none':
2950 res += 'video only'
2951 else:
2952 res += '%-5s' % fdict['acodec']
2953 elif fdict.get('abr') is not None:
2954 if res:
2955 res += ', '
2956 res += 'audio'
2957 if fdict.get('abr') is not None:
2958 res += '@%3dk' % fdict['abr']
2959 if fdict.get('asr') is not None:
2960 res += ' (%5dHz)' % fdict['asr']
2961 if fdict.get('filesize') is not None:
2962 if res:
2963 res += ', '
2964 res += format_bytes(fdict['filesize'])
9732d77e
PH
2965 elif fdict.get('filesize_approx') is not None:
2966 if res:
2967 res += ', '
2968 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2969 return res
91c7271a 2970
76d321f6 2971 def _format_note_table(self, f):
2972 def join_fields(*vargs):
2973 return ', '.join((val for val in vargs if val != ''))
2974
2975 return join_fields(
2976 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2977 format_field(f, 'language', '[%s]'),
2978 format_field(f, 'format_note'),
2979 format_field(f, 'container', ignore=(None, f.get('ext'))),
2980 format_field(f, 'asr', '%5dHz'))
2981
c57f7757 2982 def list_formats(self, info_dict):
94badb25 2983 formats = info_dict.get('formats', [info_dict])
53ed7066 2984 new_format = (
2985 'list-formats' not in self.params.get('compat_opts', [])
2986 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 2987 if new_format:
2988 table = [
2989 [
2990 format_field(f, 'format_id'),
2991 format_field(f, 'ext'),
2992 self.format_resolution(f),
2993 format_field(f, 'fps', '%d'),
2994 '|',
2995 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2996 format_field(f, 'tbr', '%4dk'),
52a8a1e1 2997 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 2998 '|',
2999 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3000 format_field(f, 'vbr', '%4dk'),
3001 format_field(f, 'acodec', default='unknown').replace('none', ''),
3002 format_field(f, 'abr', '%3dk'),
3003 format_field(f, 'asr', '%5dHz'),
3004 self._format_note_table(f)]
3005 for f in formats
3006 if f.get('preference') is None or f['preference'] >= -1000]
3007 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3008 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3009 else:
3010 table = [
3011 [
3012 format_field(f, 'format_id'),
3013 format_field(f, 'ext'),
3014 self.format_resolution(f),
3015 self._format_note(f)]
3016 for f in formats
3017 if f.get('preference') is None or f['preference'] >= -1000]
3018 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3019
cfb56d1a 3020 self.to_screen(
76d321f6 3021 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3022 header_line,
3023 table,
3024 delim=new_format,
3025 extraGap=(0 if new_format else 1),
3026 hideEmpty=new_format)))
cfb56d1a
PH
3027
3028 def list_thumbnails(self, info_dict):
3029 thumbnails = info_dict.get('thumbnails')
3030 if not thumbnails:
b7b72db9 3031 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3032 return
cfb56d1a
PH
3033
3034 self.to_screen(
3035 '[info] Thumbnails for %s:' % info_dict['id'])
3036 self.to_screen(render_table(
3037 ['ID', 'width', 'height', 'URL'],
3038 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3039
360e1ca5 3040 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3041 if not subtitles:
360e1ca5 3042 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3043 return
a504ced0 3044 self.to_screen(
edab9dbf 3045 'Available %s for %s:' % (name, video_id))
2412044c 3046
3047 def _row(lang, formats):
7aee40c1 3048 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
2412044c 3049 if len(set(names)) == 1:
7aee40c1 3050 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3051 return [lang, ', '.join(names), ', '.join(exts)]
3052
edab9dbf 3053 self.to_screen(render_table(
2412044c 3054 ['Language', 'Name', 'Formats'],
3055 [_row(lang, formats) for lang, formats in subtitles.items()],
3056 hideEmpty=True))
a504ced0 3057
dca08720
PH
3058 def urlopen(self, req):
3059 """ Start an HTTP download """
82d8a8b6 3060 if isinstance(req, compat_basestring):
67dda517 3061 req = sanitized_Request(req)
19a41fc6 3062 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3063
3064 def print_debug_header(self):
3065 if not self.params.get('verbose'):
3066 return
62fec3b2 3067
4192b51c 3068 if type('') is not compat_str:
067aa17e 3069 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3070 self.report_warning(
3071 'Your Python is broken! Update to a newer and supported version')
3072
c6afed48
PH
3073 stdout_encoding = getattr(
3074 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3075 encoding_str = (
734f90bb
PH
3076 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3077 locale.getpreferredencoding(),
3078 sys.getfilesystemencoding(),
c6afed48 3079 stdout_encoding,
b0472057 3080 self.get_encoding()))
4192b51c 3081 write_string(encoding_str, encoding=None)
734f90bb 3082
e5813e53 3083 source = (
3084 '(exe)' if hasattr(sys, 'frozen')
3085 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3086 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3087 else '')
3088 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3089 if _LAZY_LOADER:
f74980cb 3090 self._write_string('[debug] Lazy loading extractors enabled\n')
3091 if _PLUGIN_CLASSES:
3092 self._write_string(
3093 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3094 if self.params.get('compat_opts'):
3095 self._write_string(
3096 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3097 try:
3098 sp = subprocess.Popen(
3099 ['git', 'rev-parse', '--short', 'HEAD'],
3100 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3101 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3102 out, err = process_communicate_or_kill(sp)
dca08720
PH
3103 out = out.decode().strip()
3104 if re.match('[0-9a-f]+', out):
f74980cb 3105 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3106 except Exception:
dca08720
PH
3107 try:
3108 sys.exc_clear()
70a1165b 3109 except Exception:
dca08720 3110 pass
b300cda4
S
3111
3112 def python_implementation():
3113 impl_name = platform.python_implementation()
3114 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3115 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3116 return impl_name
3117
e5813e53 3118 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3119 platform.python_version(),
3120 python_implementation(),
3121 platform.architecture()[0],
b300cda4 3122 platform_name()))
d28b5171 3123
73fac4e9 3124 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3125 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3126 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3127 exe_str = ', '.join(
3128 '%s %s' % (exe, v)
3129 for exe, v in sorted(exe_versions.items())
3130 if v
3131 )
3132 if not exe_str:
3133 exe_str = 'none'
3134 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3135
3136 proxy_map = {}
3137 for handler in self._opener.handlers:
3138 if hasattr(handler, 'proxies'):
3139 proxy_map.update(handler.proxies)
734f90bb 3140 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3141
58b1f00d
PH
3142 if self.params.get('call_home', False):
3143 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3144 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3145 return
58b1f00d
PH
3146 latest_version = self.urlopen(
3147 'https://yt-dl.org/latest/version').read().decode('utf-8')
3148 if version_tuple(latest_version) > version_tuple(__version__):
3149 self.report_warning(
3150 'You are using an outdated version (newest version: %s)! '
3151 'See https://yt-dl.org/update if you need help updating.' %
3152 latest_version)
3153
e344693b 3154 def _setup_opener(self):
6ad14cab 3155 timeout_val = self.params.get('socket_timeout')
19a41fc6 3156 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3157
dca08720
PH
3158 opts_cookiefile = self.params.get('cookiefile')
3159 opts_proxy = self.params.get('proxy')
3160
3161 if opts_cookiefile is None:
3162 self.cookiejar = compat_cookiejar.CookieJar()
3163 else:
590bc6f6 3164 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3165 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3166 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3167 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3168
6a3f4c3f 3169 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3170 if opts_proxy is not None:
3171 if opts_proxy == '':
3172 proxies = {}
3173 else:
3174 proxies = {'http': opts_proxy, 'https': opts_proxy}
3175 else:
3176 proxies = compat_urllib_request.getproxies()
067aa17e 3177 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3178 if 'http' in proxies and 'https' not in proxies:
3179 proxies['https'] = proxies['http']
91410c9b 3180 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3181
3182 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3183 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3184 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3185 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3186 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3187
3188 # When passing our own FileHandler instance, build_opener won't add the
3189 # default FileHandler and allows us to disable the file protocol, which
3190 # can be used for malicious purposes (see
067aa17e 3191 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3192 file_handler = compat_urllib_request.FileHandler()
3193
3194 def file_open(*args, **kwargs):
7a5c1cfe 3195 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3196 file_handler.file_open = file_open
3197
3198 opener = compat_urllib_request.build_opener(
fca6dba8 3199 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3200
dca08720
PH
3201 # Delete the default user-agent header, which would otherwise apply in
3202 # cases where our custom HTTP handler doesn't come into play
067aa17e 3203 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3204 opener.addheaders = []
3205 self._opener = opener
62fec3b2
PH
3206
3207 def encode(self, s):
3208 if isinstance(s, bytes):
3209 return s # Already encoded
3210
3211 try:
3212 return s.encode(self.get_encoding())
3213 except UnicodeEncodeError as err:
3214 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3215 raise
3216
3217 def get_encoding(self):
3218 encoding = self.params.get('encoding')
3219 if encoding is None:
3220 encoding = preferredencoding()
3221 return encoding
ec82d85a 3222
de6000d9 3223 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3224 write_all = self.params.get('write_all_thumbnails', False)
3225 thumbnails = []
3226 if write_all or self.params.get('writethumbnail', False):
0202b52a 3227 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3228 multiple = write_all and len(thumbnails) > 1
ec82d85a 3229
0202b52a 3230 ret = []
6c4fd172 3231 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3232 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3233 suffix = '%s.' % t['id'] if multiple else ''
3234 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3235 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3236
0c3d0f51 3237 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3238 ret.append(suffix + thumb_ext)
ec82d85a
PH
3239 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3240 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3241 else:
5ef7d9bd 3242 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3243 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3244 try:
3245 uf = self.urlopen(t['url'])
d3d89c32 3246 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3247 shutil.copyfileobj(uf, thumbf)
de6000d9 3248 ret.append(suffix + thumb_ext)
ec82d85a
PH
3249 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3250 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3251 t['filepath'] = thumb_filename
3158150c 3252 except network_exceptions as err:
ec82d85a 3253 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3254 (t['url'], error_to_compat_str(err)))
6c4fd172 3255 if ret and not write_all:
3256 break
0202b52a 3257 return ret