]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[Youtube] Extract more formats for `music.youtube` URLs (#311)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
de6000d9 51 OUTTMPL_TYPES,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
9b9c5355 60 error_to_compat_str,
498f5606 61 EntryNotInPlaylist,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
143db31d 68 FORMAT_RE,
525ef922 69 formatSeconds,
773f291d 70 GeoRestrictedError,
c9969434 71 int_or_none,
732044af 72 iri_to_uri,
773f291d 73 ISO3166Utils,
ce02ed60 74 locked_file,
0202b52a 75 make_dir,
dca08720 76 make_HTTPS_handler,
ce02ed60 77 MaxDownloadsReached,
3158150c 78 network_exceptions,
cd6fc19e 79 orderedSet,
b7ab0590 80 PagedList,
083c9df9 81 parse_filesize,
91410c9b 82 PerRequestProxyHandler,
dca08720 83 platform_name,
eedb7ba5 84 PostProcessingError,
ce02ed60 85 preferredencoding,
eedb7ba5 86 prepend_extension,
e8e73840 87 random_uuidv4,
51fb4995 88 register_socks_protocols,
cfb56d1a 89 render_table,
eedb7ba5 90 replace_extension,
8b0d7497 91 RejectedVideoReached,
ce02ed60
PH
92 SameFileError,
93 sanitize_filename,
1bb5c511 94 sanitize_path,
dcf77cf1 95 sanitize_url,
67dda517 96 sanitized_Request,
e5660ee6 97 std_headers,
1211bb6d 98 str_or_none,
e29663c6 99 strftime_or_none,
ce02ed60 100 subtitles_filename,
732044af 101 to_high_limit_path,
a439a3a4 102 traverse_dict,
ce02ed60 103 UnavailableVideoError,
29eb5174 104 url_basename,
58b1f00d 105 version_tuple,
ce02ed60
PH
106 write_json_file,
107 write_string,
1bab3437 108 YoutubeDLCookieJar,
6a3f4c3f 109 YoutubeDLCookieProcessor,
dca08720 110 YoutubeDLHandler,
fca6dba8 111 YoutubeDLRedirectHandler,
f5b1bca9 112 process_communicate_or_kill,
ce02ed60 113)
a0e07d31 114from .cache import Cache
52a8a1e1 115from .extractor import (
116 gen_extractor_classes,
117 get_info_extractor,
118 _LAZY_LOADER,
119 _PLUGIN_CLASSES
120)
4c54b89e 121from .extractor.openload import PhantomJSwrapper
52a8a1e1 122from .downloader import (
123 get_suitable_downloader,
124 shorten_protocol_name
125)
4c83c967 126from .downloader.rtmp import rtmpdump_version
4f026faf 127from .postprocessor import (
f17f8651 128 FFmpegFixupM3u8PP,
62cd676c 129 FFmpegFixupM4aPP,
6271f1ca 130 FFmpegFixupStretchedPP,
4f026faf
PH
131 FFmpegMergerPP,
132 FFmpegPostProcessor,
0202b52a 133 # FFmpegSubtitlesConvertorPP,
4f026faf 134 get_postprocessor,
0202b52a 135 MoveFilesAfterDownloadPP,
4f026faf 136)
dca08720 137from .version import __version__
8222d8de 138
e9c0cdd3
YCH
139if compat_os_name == 'nt':
140 import ctypes
141
2459b6e1 142
8222d8de
JMF
143class YoutubeDL(object):
144 """YoutubeDL class.
145
146 YoutubeDL objects are the ones responsible of downloading the
147 actual video file and writing it to disk if the user has requested
148 it, among some other tasks. In most cases there should be one per
149 program. As, given a video URL, the downloader doesn't know how to
150 extract all the needed information, task that InfoExtractors do, it
151 has to pass the URL to one of them.
152
153 For this, YoutubeDL objects have a method that allows
154 InfoExtractors to be registered in a given order. When it is passed
155 a URL, the YoutubeDL object handles it to the first InfoExtractor it
156 finds that reports being able to handle it. The InfoExtractor extracts
157 all the information about the video or videos the URL refers to, and
158 YoutubeDL process the extracted information, possibly using a File
159 Downloader to download the video.
160
161 YoutubeDL objects accept a lot of parameters. In order not to saturate
162 the object constructor with arguments, it receives a dictionary of
163 options instead. These options are available through the params
164 attribute for the InfoExtractors to use. The YoutubeDL also
165 registers itself as the downloader in charge for the InfoExtractors
166 that are added to it, so this is a "mutual registration".
167
168 Available options:
169
170 username: Username for authentication purposes.
171 password: Password for authentication purposes.
180940e0 172 videopassword: Password for accessing a video.
1da50aa3
S
173 ap_mso: Adobe Pass multiple-system operator identifier.
174 ap_username: Multiple-system operator account username.
175 ap_password: Multiple-system operator account password.
8222d8de
JMF
176 usenetrc: Use netrc for authentication instead.
177 verbose: Print additional info to stdout.
178 quiet: Do not print messages to stdout.
ad8915b7 179 no_warnings: Do not print out anything for warnings.
53c18592 180 forceprint: A list of templates to force print
181 forceurl: Force printing final URL. (Deprecated)
182 forcetitle: Force printing title. (Deprecated)
183 forceid: Force printing ID. (Deprecated)
184 forcethumbnail: Force printing thumbnail URL. (Deprecated)
185 forcedescription: Force printing description. (Deprecated)
186 forcefilename: Force printing final filename. (Deprecated)
187 forceduration: Force printing duration. (Deprecated)
8694c600 188 forcejson: Force printing info_dict as JSON.
63e0be34
PH
189 dump_single_json: Force printing the info_dict of the whole playlist
190 (or video) as a single JSON line.
c25228e5 191 force_write_download_archive: Force writing download archive regardless
192 of 'skip_download' or 'simulate'.
8222d8de 193 simulate: Do not download the video files.
eb8a4433 194 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 195 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 196 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
197 extracting metadata even if the video is not actually
198 available for download (experimental)
c25228e5 199 format_sort: How to sort the video formats. see "Sorting Formats"
200 for more details.
201 format_sort_force: Force the given format_sort. see "Sorting Formats"
202 for more details.
203 allow_multiple_video_streams: Allow multiple video streams to be merged
204 into a single file
205 allow_multiple_audio_streams: Allow multiple audio streams to be merged
206 into a single file
4524baf0 207 paths: Dictionary of output paths. The allowed keys are 'home'
208 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 209 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 210 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
211 A string a also accepted for backward compatibility
a820dc72
RA
212 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
213 restrictfilenames: Do not allow "&" and spaces in file names
214 trim_file_name: Limit length of filename (extension excluded)
4524baf0 215 windowsfilenames: Force the filenames to be windows compatible
a820dc72 216 ignoreerrors: Do not stop on download errors
7a5c1cfe 217 (Default True when running yt-dlp,
a820dc72 218 but False when directly accessing YoutubeDL class)
26e2805c 219 skip_playlist_after_errors: Number of allowed failures until the rest of
220 the playlist is skipped
d22dec74 221 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 222 overwrites: Overwrite all video and metadata files if True,
223 overwrite only non-video files if None
224 and don't overwrite any file if False
8222d8de
JMF
225 playliststart: Playlist item to start at.
226 playlistend: Playlist item to end at.
c14e88f0 227 playlist_items: Specific indices of playlist to download.
ff815fe6 228 playlistreverse: Download playlist items in reverse order.
75822ca7 229 playlistrandom: Download playlist items in random order.
8222d8de
JMF
230 matchtitle: Download only matching titles.
231 rejecttitle: Reject downloads for matching titles.
8bf9319e 232 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
233 logtostderr: Log messages to stderr instead of stdout.
234 writedescription: Write the video description to a .description file
235 writeinfojson: Write the video description to a .info.json file
75d43ca0 236 clean_infojson: Remove private fields from the infojson
06167fbb 237 writecomments: Extract video comments. This will not be written to disk
238 unless writeinfojson is also given
1fb07d10 239 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 240 writethumbnail: Write the thumbnail image to a file
c25228e5 241 allow_playlist_files: Whether to write playlists' description, infojson etc
242 also to disk when using the 'write*' options
ec82d85a 243 write_all_thumbnails: Write all thumbnail formats to files
732044af 244 writelink: Write an internet shortcut file, depending on the
245 current platform (.url/.webloc/.desktop)
246 writeurllink: Write a Windows internet shortcut file (.url)
247 writewebloclink: Write a macOS internet shortcut file (.webloc)
248 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 249 writesubtitles: Write the video subtitles to a file
741dd8ea 250 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 251 allsubtitles: Deprecated - Use subtitlelangs = ['all']
252 Downloads all the subtitles of the video
0b7f3118 253 (requires writesubtitles or writeautomaticsub)
8222d8de 254 listsubtitles: Lists all available subtitles for the video
a504ced0 255 subtitlesformat: The format code for subtitles
c32b0aab 256 subtitleslangs: List of languages of the subtitles to download (can be regex).
257 The list may contain "all" to refer to all the available
258 subtitles. The language can be prefixed with a "-" to
259 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
260 keepvideo: Keep the video file after post-processing
261 daterange: A DateRange object, download only if the upload_date is in the range.
262 skip_download: Skip the actual download of the video file
c35f9e72 263 cachedir: Location of the cache files in the filesystem.
a0e07d31 264 False to disable filesystem cache.
47192f92 265 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
266 age_limit: An integer representing the user's age in years.
267 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
268 min_views: An integer representing the minimum view count the video
269 must have in order to not be skipped.
270 Videos without view count information are always
271 downloaded. None for no limit.
272 max_views: An integer representing the maximum view count.
273 Videos that are more popular than that are not
274 downloaded.
275 Videos without view count information are always
276 downloaded. None for no limit.
277 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
278 Videos already present in the file are not downloaded
279 again.
8a51f564 280 break_on_existing: Stop the download process after attempting to download a
281 file that is in the archive.
282 break_on_reject: Stop the download process when encountering a video that
283 has been filtered out.
284 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 285 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
286 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
287 At the moment, this is only supported by YouTube.
a1ee09e8 288 proxy: URL of the proxy server to use
38cce791 289 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 290 on geo-restricted sites.
e344693b 291 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
292 bidi_workaround: Work around buggy terminals without bidirectional text
293 support, using fridibi
a0ddb8a2 294 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 295 include_ads: Download ads as well
04b4d394
PH
296 default_search: Prepend this string if an input url is not valid.
297 'auto' for elaborate guessing
62fec3b2 298 encoding: Use this encoding instead of the system-specified.
e8ee972c 299 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
300 Pass in 'in_playlist' to only show this behavior for
301 playlist items.
4f026faf 302 postprocessors: A list of dictionaries, each with an entry
71b640cc 303 * key: The name of the postprocessor. See
7a5c1cfe 304 yt_dlp/postprocessor/__init__.py for a list.
56d868db 305 * when: When to run the postprocessor. Can be one of
306 pre_process|before_dl|post_process|after_move.
307 Assumed to be 'post_process' if not given
ab8e5e51
AM
308 post_hooks: A list of functions that get called as the final step
309 for each video file, after all postprocessors have been
310 called. The filename will be passed as the only argument.
71b640cc
PH
311 progress_hooks: A list of functions that get called on download
312 progress, with a dictionary with the entries
5cda4eda 313 * status: One of "downloading", "error", or "finished".
ee69b99a 314 Check this first and ignore unknown values.
71b640cc 315
5cda4eda 316 If status is one of "downloading", or "finished", the
ee69b99a
PH
317 following properties may also be present:
318 * filename: The final filename (always present)
5cda4eda 319 * tmpfilename: The filename we're currently writing to
71b640cc
PH
320 * downloaded_bytes: Bytes on disk
321 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
322 * total_bytes_estimate: Guess of the eventual file size,
323 None if unavailable.
324 * elapsed: The number of seconds since download started.
71b640cc
PH
325 * eta: The estimated time in seconds, None if unknown
326 * speed: The download speed in bytes/second, None if
327 unknown
5cda4eda
PH
328 * fragment_index: The counter of the currently
329 downloaded video fragment.
330 * fragment_count: The number of fragments (= individual
331 files that will be merged)
71b640cc
PH
332
333 Progress hooks are guaranteed to be called at least once
334 (with status "finished") if the download is successful.
45598f15 335 merge_output_format: Extension to use when merging formats.
6b591b29 336 final_ext: Expected final extension; used to detect when the file was
337 already downloaded and converted. "merge_output_format" is
338 replaced by this extension when given
6271f1ca
PH
339 fixup: Automatically correct known faults of the file.
340 One of:
341 - "never": do nothing
342 - "warn": only emit a warning
343 - "detect_or_warn": check whether we can do anything
62cd676c 344 about it, warn otherwise (default)
504f20dd 345 source_address: Client-side IP address to bind to.
6ec6cb4e 346 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 347 yt-dlp servers for debugging. (BROKEN)
1cf376f5 348 sleep_interval_requests: Number of seconds to sleep between requests
349 during extraction
7aa589a5
S
350 sleep_interval: Number of seconds to sleep before each download when
351 used alone or a lower bound of a range for randomized
352 sleep before each download (minimum possible number
353 of seconds to sleep) when used along with
354 max_sleep_interval.
355 max_sleep_interval:Upper bound of a range for randomized sleep before each
356 download (maximum possible number of seconds to sleep).
357 Must only be used along with sleep_interval.
358 Actual sleep time will be a random float from range
359 [sleep_interval; max_sleep_interval].
1cf376f5 360 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
361 listformats: Print an overview of available video formats and exit.
362 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
363 match_filter: A function that gets called with the info_dict of
364 every video.
365 If it returns a message, the video is ignored.
366 If it returns None, the video is downloaded.
367 match_filter_func in utils.py is one example for this.
7e5db8c9 368 no_color: Do not emit color codes in output.
0a840f58 369 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 370 HTTP header
0a840f58 371 geo_bypass_country:
773f291d
S
372 Two-letter ISO 3166-2 country code that will be used for
373 explicit geographic restriction bypassing via faking
504f20dd 374 X-Forwarded-For HTTP header
5f95927a
S
375 geo_bypass_ip_block:
376 IP range in CIDR notation that will be used similarly to
504f20dd 377 geo_bypass_country
71b640cc 378
85729c51 379 The following options determine which downloader is picked:
52a8a1e1 380 external_downloader: A dictionary of protocol keys and the executable of the
381 external downloader to use for it. The allowed protocols
382 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
383 Set the value to 'native' to use the native downloader
384 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
385 or {'m3u8': 'ffmpeg'} instead.
386 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
387 if True, otherwise use ffmpeg/avconv if False, otherwise
388 use downloader suggested by extractor if None.
53ed7066 389 compat_opts: Compatibility options. See "Differences in default behavior".
d908aa63 390 Note that only format-sort, format-spec, no-live-chat, no-attach-info-json
53ed7066 391 playlist-index, list-formats, no-youtube-channel-redirect
392 and no-youtube-unavailable-videos works when used via the API
fe7e0c98 393
8222d8de 394 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 395 the downloader (see yt_dlp/downloader/common.py):
8222d8de 396 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 397 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 398 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 399 http_chunk_size.
76b1bd67
JMF
400
401 The following options are used by the post processors:
d4a24f40 402 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 403 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
404 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
405 to the binary or its containing directory.
43820c03 406 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
407 and a list of additional command-line arguments for the
408 postprocessor/executable. The dict can also have "PP+EXE" keys
409 which are used when the given exe is used by the given PP.
410 Use 'default' as the name for arguments to passed to all PP
e409895f 411
412 The following options are used by the extractors:
62bff2c1 413 extractor_retries: Number of times to retry for known errors
414 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 415 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 416 discontinuities such as ad breaks (default: False)
3600fd59 417 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 418 data will be downloaded and processed by extractor.
419 You can reduce network I/O by disabling it if you don't
420 care about DASH. (only for youtube)
e409895f 421 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 422 data will be downloaded and processed by extractor.
423 You can reduce network I/O by disabling it if you don't
424 care about HLS. (only for youtube)
8222d8de
JMF
425 """
426
c9969434
S
427 _NUMERIC_FIELDS = set((
428 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
429 'timestamp', 'upload_year', 'upload_month', 'upload_day',
430 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
431 'average_rating', 'comment_count', 'age_limit',
432 'start_time', 'end_time',
433 'chapter_number', 'season_number', 'episode_number',
434 'track_number', 'disc_number', 'release_year',
435 'playlist_index',
436 ))
437
8222d8de
JMF
438 params = None
439 _ies = []
56d868db 440 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 441 __prepare_filename_warned = False
1cf376f5 442 _first_webpage_request = True
8222d8de
JMF
443 _download_retcode = None
444 _num_downloads = None
30a074c2 445 _playlist_level = 0
446 _playlist_urls = set()
8222d8de
JMF
447 _screen_file = None
448
3511266b 449 def __init__(self, params=None, auto_init=True):
8222d8de 450 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
451 if params is None:
452 params = {}
8222d8de 453 self._ies = []
56c73665 454 self._ies_instances = {}
56d868db 455 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 456 self.__prepare_filename_warned = False
1cf376f5 457 self._first_webpage_request = True
ab8e5e51 458 self._post_hooks = []
933605d7 459 self._progress_hooks = []
8222d8de
JMF
460 self._download_retcode = 0
461 self._num_downloads = 0
462 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 463 self._err_file = sys.stderr
4abf617b
S
464 self.params = {
465 # Default parameters
466 'nocheckcertificate': False,
467 }
468 self.params.update(params)
a0e07d31 469 self.cache = Cache(self)
34308b30 470
a61f4b28 471 if sys.version_info < (3, 6):
472 self.report_warning(
473 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
474 'Update to Python 3.6 or above' % sys.version_info[:2])
475
be5df5ee
S
476 def check_deprecated(param, option, suggestion):
477 if self.params.get(param) is not None:
53ed7066 478 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
479 return True
480 return False
481
482 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
483 if self.params.get('geo_verification_proxy') is None:
484 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
485
0d1bb027 486 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
487 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 488 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 489
490 for msg in self.params.get('warnings', []):
491 self.report_warning(msg)
492
6b591b29 493 if self.params.get('final_ext'):
494 if self.params.get('merge_output_format'):
495 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
496 self.params['merge_output_format'] = self.params['final_ext']
497
b9d973be 498 if 'overwrites' in self.params and self.params['overwrites'] is None:
499 del self.params['overwrites']
500
0783b09b 501 if params.get('bidi_workaround', False):
1c088fa8
PH
502 try:
503 import pty
504 master, slave = pty.openpty()
003c69a8 505 width = compat_get_terminal_size().columns
1c088fa8
PH
506 if width is None:
507 width_args = []
508 else:
509 width_args = ['-w', str(width)]
5d681e96 510 sp_kwargs = dict(
1c088fa8
PH
511 stdin=subprocess.PIPE,
512 stdout=slave,
513 stderr=self._err_file)
5d681e96
PH
514 try:
515 self._output_process = subprocess.Popen(
516 ['bidiv'] + width_args, **sp_kwargs
517 )
518 except OSError:
5d681e96
PH
519 self._output_process = subprocess.Popen(
520 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
521 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 522 except OSError as ose:
66e7ace1 523 if ose.errno == errno.ENOENT:
6febd1c1 524 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
525 else:
526 raise
0783b09b 527
3089bc74
S
528 if (sys.platform != 'win32'
529 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
530 and not params.get('restrictfilenames', False)):
e9137224 531 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 532 self.report_warning(
6febd1c1 533 'Assuming --restrict-filenames since file system encoding '
1b725173 534 'cannot encode all characters. '
6febd1c1 535 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 536 self.params['restrictfilenames'] = True
34308b30 537
de6000d9 538 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 539
dca08720
PH
540 self._setup_opener()
541
4cd0a709 542 """Preload the archive, if any is specified"""
543 def preload_download_archive(fn):
544 if fn is None:
545 return False
0760b0a7 546 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 547 try:
548 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
549 for line in archive_file:
550 self.archive.add(line.strip())
551 except IOError as ioe:
552 if ioe.errno != errno.ENOENT:
553 raise
554 return False
555 return True
556
557 self.archive = set()
558 preload_download_archive(self.params.get('download_archive'))
559
3511266b
PH
560 if auto_init:
561 self.print_debug_header()
562 self.add_default_info_extractors()
563
4f026faf
PH
564 for pp_def_raw in self.params.get('postprocessors', []):
565 pp_class = get_postprocessor(pp_def_raw['key'])
566 pp_def = dict(pp_def_raw)
567 del pp_def['key']
5bfa4862 568 if 'when' in pp_def:
569 when = pp_def['when']
570 del pp_def['when']
571 else:
56d868db 572 when = 'post_process'
4f026faf 573 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 574 self.add_post_processor(pp, when=when)
4f026faf 575
ab8e5e51
AM
576 for ph in self.params.get('post_hooks', []):
577 self.add_post_hook(ph)
578
71b640cc
PH
579 for ph in self.params.get('progress_hooks', []):
580 self.add_progress_hook(ph)
581
51fb4995
YCH
582 register_socks_protocols()
583
7d4111ed
PH
584 def warn_if_short_id(self, argv):
585 # short YouTube ID starting with dash?
586 idxs = [
587 i for i, a in enumerate(argv)
588 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
589 if idxs:
590 correct_argv = (
7a5c1cfe 591 ['yt-dlp']
3089bc74
S
592 + [a for i, a in enumerate(argv) if i not in idxs]
593 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
594 )
595 self.report_warning(
596 'Long argument string detected. '
597 'Use -- to separate parameters and URLs, like this:\n%s\n' %
598 args_to_str(correct_argv))
599
8222d8de
JMF
600 def add_info_extractor(self, ie):
601 """Add an InfoExtractor object to the end of the list."""
602 self._ies.append(ie)
e52d7f85
JMF
603 if not isinstance(ie, type):
604 self._ies_instances[ie.ie_key()] = ie
605 ie.set_downloader(self)
8222d8de 606
56c73665
JMF
607 def get_info_extractor(self, ie_key):
608 """
609 Get an instance of an IE with name ie_key, it will try to get one from
610 the _ies list, if there's no instance it will create a new one and add
611 it to the extractor list.
612 """
613 ie = self._ies_instances.get(ie_key)
614 if ie is None:
615 ie = get_info_extractor(ie_key)()
616 self.add_info_extractor(ie)
617 return ie
618
023fa8c4
JMF
619 def add_default_info_extractors(self):
620 """
621 Add the InfoExtractors returned by gen_extractors to the end of the list
622 """
e52d7f85 623 for ie in gen_extractor_classes():
023fa8c4
JMF
624 self.add_info_extractor(ie)
625
56d868db 626 def add_post_processor(self, pp, when='post_process'):
8222d8de 627 """Add a PostProcessor object to the end of the chain."""
5bfa4862 628 self._pps[when].append(pp)
8222d8de
JMF
629 pp.set_downloader(self)
630
ab8e5e51
AM
631 def add_post_hook(self, ph):
632 """Add the post hook"""
633 self._post_hooks.append(ph)
634
933605d7
JMF
635 def add_progress_hook(self, ph):
636 """Add the progress hook (currently only for the file downloader)"""
637 self._progress_hooks.append(ph)
8ab470f1 638
1c088fa8 639 def _bidi_workaround(self, message):
5d681e96 640 if not hasattr(self, '_output_channel'):
1c088fa8
PH
641 return message
642
5d681e96 643 assert hasattr(self, '_output_process')
11b85ce6 644 assert isinstance(message, compat_str)
6febd1c1
PH
645 line_count = message.count('\n') + 1
646 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 647 self._output_process.stdin.flush()
6febd1c1 648 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 649 for _ in range(line_count))
6febd1c1 650 return res[:-len('\n')]
1c088fa8 651
734f90bb 652 def _write_string(self, s, out=None):
b58ddb32 653 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 654
848887eb 655 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 656 """Print message to stdout"""
8bf9319e 657 if self.params.get('logger'):
43afe285 658 self.params['logger'].debug(message)
848887eb 659 elif not quiet:
1c088fa8 660 message = self._bidi_workaround(message)
6febd1c1 661 terminator = ['\n', ''][skip_eol]
8222d8de 662 output = message + terminator
1c088fa8 663
734f90bb 664 self._write_string(output, self._screen_file)
8222d8de
JMF
665
666 def to_stderr(self, message):
0760b0a7 667 """Print message to stderr"""
11b85ce6 668 assert isinstance(message, compat_str)
8bf9319e 669 if self.params.get('logger'):
43afe285
IB
670 self.params['logger'].error(message)
671 else:
1c088fa8 672 message = self._bidi_workaround(message)
6febd1c1 673 output = message + '\n'
734f90bb 674 self._write_string(output, self._err_file)
8222d8de 675
1e5b9a95
PH
676 def to_console_title(self, message):
677 if not self.params.get('consoletitle', False):
678 return
4bede0d8
C
679 if compat_os_name == 'nt':
680 if ctypes.windll.kernel32.GetConsoleWindow():
681 # c_wchar_p() might not be necessary if `message` is
682 # already of type unicode()
683 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 684 elif 'TERM' in os.environ:
b46696bd 685 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 686
bdde425c
PH
687 def save_console_title(self):
688 if not self.params.get('consoletitle', False):
689 return
94c3442e
S
690 if self.params.get('simulate', False):
691 return
4bede0d8 692 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 693 # Save the title on stack
734f90bb 694 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
695
696 def restore_console_title(self):
697 if not self.params.get('consoletitle', False):
698 return
94c3442e
S
699 if self.params.get('simulate', False):
700 return
4bede0d8 701 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 702 # Restore the title from stack
734f90bb 703 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
704
705 def __enter__(self):
706 self.save_console_title()
707 return self
708
709 def __exit__(self, *args):
710 self.restore_console_title()
f89197d7 711
dca08720 712 if self.params.get('cookiefile') is not None:
1bab3437 713 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 714
8222d8de
JMF
715 def trouble(self, message=None, tb=None):
716 """Determine action to take when a download problem appears.
717
718 Depending on if the downloader has been configured to ignore
719 download errors or not, this method may throw an exception or
720 not when errors are found, after printing the message.
721
722 tb, if given, is additional traceback information.
723 """
724 if message is not None:
725 self.to_stderr(message)
726 if self.params.get('verbose'):
727 if tb is None:
728 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 729 tb = ''
8222d8de 730 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 731 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 732 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
733 else:
734 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 735 tb = ''.join(tb_data)
8222d8de
JMF
736 self.to_stderr(tb)
737 if not self.params.get('ignoreerrors', False):
738 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
739 exc_info = sys.exc_info()[1].exc_info
740 else:
741 exc_info = sys.exc_info()
742 raise DownloadError(message, exc_info)
743 self._download_retcode = 1
744
0760b0a7 745 def to_screen(self, message, skip_eol=False):
746 """Print message to stdout if not in quiet mode"""
747 self.to_stdout(
748 message, skip_eol, quiet=self.params.get('quiet', False))
749
8222d8de
JMF
750 def report_warning(self, message):
751 '''
752 Print the message to stderr, it will be prefixed with 'WARNING:'
753 If stderr is a tty file the 'WARNING:' will be colored
754 '''
6d07ce01
JMF
755 if self.params.get('logger') is not None:
756 self.params['logger'].warning(message)
8222d8de 757 else:
ad8915b7
PH
758 if self.params.get('no_warnings'):
759 return
e9c0cdd3 760 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
761 _msg_header = '\033[0;33mWARNING:\033[0m'
762 else:
763 _msg_header = 'WARNING:'
764 warning_message = '%s %s' % (_msg_header, message)
765 self.to_stderr(warning_message)
8222d8de
JMF
766
767 def report_error(self, message, tb=None):
768 '''
769 Do the same as trouble, but prefixes the message with 'ERROR:', colored
770 in red if stderr is a tty file.
771 '''
e9c0cdd3 772 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 773 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 774 else:
6febd1c1
PH
775 _msg_header = 'ERROR:'
776 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
777 self.trouble(error_message, tb)
778
0760b0a7 779 def write_debug(self, message):
780 '''Log debug message or Print message to stderr'''
781 if not self.params.get('verbose', False):
782 return
783 message = '[debug] %s' % message
784 if self.params.get('logger'):
785 self.params['logger'].debug(message)
786 else:
787 self._write_string('%s\n' % message)
788
8222d8de
JMF
789 def report_file_already_downloaded(self, file_name):
790 """Report file has already been fully downloaded."""
791 try:
6febd1c1 792 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 793 except UnicodeEncodeError:
6febd1c1 794 self.to_screen('[download] The file has already been downloaded')
8222d8de 795
0c3d0f51 796 def report_file_delete(self, file_name):
797 """Report that existing file will be deleted."""
798 try:
c25228e5 799 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 800 except UnicodeEncodeError:
c25228e5 801 self.to_screen('Deleting existing file')
0c3d0f51 802
de6000d9 803 def parse_outtmpl(self):
804 outtmpl_dict = self.params.get('outtmpl', {})
805 if not isinstance(outtmpl_dict, dict):
806 outtmpl_dict = {'default': outtmpl_dict}
807 outtmpl_dict.update({
808 k: v for k, v in DEFAULT_OUTTMPL.items()
809 if not outtmpl_dict.get(k)})
810 for key, val in outtmpl_dict.items():
811 if isinstance(val, bytes):
812 self.report_warning(
813 'Parameter outtmpl is bytes, but should be a unicode string. '
814 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
815 return outtmpl_dict
816
143db31d 817 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
818 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
819 template_dict = dict(info_dict)
a439a3a4 820 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 821
822 # duration_string
823 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 824 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 825 if info_dict.get('duration', None) is not None
826 else None)
827
828 # epoch
829 template_dict['epoch'] = int(time.time())
830
831 # autonumber
832 autonumber_size = self.params.get('autonumber_size')
833 if autonumber_size is None:
834 autonumber_size = 5
835 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
836
837 # resolution if not defined
838 if template_dict.get('resolution') is None:
839 if template_dict.get('width') and template_dict.get('height'):
840 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
841 elif template_dict.get('height'):
842 template_dict['resolution'] = '%sp' % template_dict['height']
843 elif template_dict.get('width'):
844 template_dict['resolution'] = '%dx?' % template_dict['width']
845
143db31d 846 # For fields playlist_index and autonumber convert all occurrences
847 # of %(field)s to %(field)0Nd for backward compatibility
848 field_size_compat_map = {
f59ae581 849 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
143db31d 850 'autonumber': autonumber_size,
851 }
852 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
853 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
854 if mobj:
855 outtmpl = re.sub(
856 FIELD_SIZE_COMPAT_RE,
857 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
858 outtmpl)
859
860 numeric_fields = list(self._NUMERIC_FIELDS)
a439a3a4 861 if sanitize is None:
862 sanitize = lambda k, v: v
143db31d 863
e625be0d 864 EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
865 # Field is of the form key1.key2...
866 # where keys (except first) can be string, int or slice
867 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
868 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
869 (?P<negate>-)?
870 (?P<fields>{0})
871 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
872 (?:>(?P<strf_format>.+?))?
873 (?:\|(?P<default>.*?))?
874 $'''.format(FIELD_RE))
875 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
876 MATH_FUNCTIONS = {
877 '+': float.__add__,
878 '-': float.__sub__,
879 }
880 for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
881 final_key = outer_mobj.group('key')
882 str_type = outer_mobj.group('type')
883 value = None
884 mobj = re.match(INTERNAL_FORMAT_RE, final_key)
885 if mobj is not None:
886 mobj = mobj.groupdict()
887 # Object traversal
888 fields = mobj['fields'].split('.')
889 value = traverse_dict(template_dict, fields)
890 # Negative
891 if mobj['negate']:
892 value = float_or_none(value)
893 if value is not None:
894 value *= -1
895 # Do maths
896 if mobj['maths']:
897 value = float_or_none(value)
898 operator = None
899 for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
900 if item == '':
901 value = None
902 if value is None:
903 break
904 if operator:
905 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
906 offset = float_or_none(item)
907 if offset is None:
908 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
909 try:
910 value = operator(value, multiplier * offset)
911 except (TypeError, ZeroDivisionError):
912 value = None
913 operator = None
914 else:
915 operator = MATH_FUNCTIONS[item]
916 # Datetime formatting
917 if mobj['strf_format']:
918 value = strftime_or_none(value, mobj['strf_format'])
919 # Set default
920 if value is None and mobj['default'] is not None:
921 value = mobj['default']
922 # Sanitize
923 if str_type in 'crs' and value is not None: # string
924 value = sanitize('%{}'.format(str_type) % fields[-1], value)
a439a3a4 925 else: # numeric
926 numeric_fields.append(final_key)
927 value = float_or_none(value)
143db31d 928 if value is not None:
a439a3a4 929 template_dict[final_key] = value
143db31d 930
931 # Missing numeric fields used together with integer presentation types
932 # in format specification will break the argument substitution since
933 # string NA placeholder is returned for missing fields. We will patch
934 # output template for missing fields to meet string presentation type.
935 for numeric_field in numeric_fields:
a439a3a4 936 if template_dict.get(numeric_field) is None:
143db31d 937 outtmpl = re.sub(
938 FORMAT_RE.format(re.escape(numeric_field)),
939 r'%({0})s'.format(numeric_field), outtmpl)
940
a439a3a4 941 template_dict = collections.defaultdict(lambda: na, (
942 (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
943 for k, v in template_dict.items() if v is not None))
143db31d 944 return outtmpl, template_dict
945
de6000d9 946 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 947 try:
586a91b6 948 sanitize = lambda k, v: sanitize_filename(
45598aab 949 compat_str(v),
1bb5c511 950 restricted=self.params.get('restrictfilenames'),
40df485f 951 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 952 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 953 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 954
15da37c7
S
955 # expand_path translates '%%' into '%' and '$$' into '$'
956 # correspondingly that is not what we want since we need to keep
957 # '%%' intact for template dict substitution step. Working around
958 # with boundary-alike separator hack.
961ea474 959 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
960 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
961
962 # outtmpl should be expand_path'ed before template dict substitution
963 # because meta fields may contain env variables we don't want to
964 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
965 # title "Hello $PATH", we don't want `$PATH` to be expanded.
966 filename = expand_path(outtmpl).replace(sep, '') % template_dict
967
143db31d 968 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 969 if force_ext is not None:
970 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
971
bdc3fd2f
U
972 # https://github.com/blackjack4494/youtube-dlc/issues/85
973 trim_file_name = self.params.get('trim_file_name', False)
974 if trim_file_name:
975 fn_groups = filename.rsplit('.')
976 ext = fn_groups[-1]
977 sub_ext = ''
978 if len(fn_groups) > 2:
979 sub_ext = fn_groups[-2]
980 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
981
0202b52a 982 return filename
8222d8de 983 except ValueError as err:
6febd1c1 984 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
985 return None
986
de6000d9 987 def prepare_filename(self, info_dict, dir_type='', warn=False):
988 """Generate the output filename."""
0202b52a 989 paths = self.params.get('paths', {})
990 assert isinstance(paths, dict)
de6000d9 991 filename = self._prepare_filename(info_dict, dir_type or 'default')
992
993 if warn and not self.__prepare_filename_warned:
994 if not paths:
995 pass
996 elif filename == '-':
997 self.report_warning('--paths is ignored when an outputting to stdout')
998 elif os.path.isabs(filename):
999 self.report_warning('--paths is ignored since an absolute path is given in output template')
1000 self.__prepare_filename_warned = True
1001 if filename == '-' or not filename:
1002 return filename
1003
0202b52a 1004 homepath = expand_path(paths.get('home', '').strip())
1005 assert isinstance(homepath, compat_str)
1006 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1007 assert isinstance(subdir, compat_str)
c2934512 1008 path = os.path.join(homepath, subdir, filename)
1009
1010 # Temporary fix for #4787
1011 # 'Treat' all problem characters by passing filename through preferredencoding
1012 # to workaround encoding issues with subprocess on python2 @ Windows
1013 if sys.version_info < (3, 0) and sys.platform == 'win32':
1014 path = encodeFilename(path, True).decode(preferredencoding())
1015 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 1016
442c37b7 1017 def _match_entry(self, info_dict, incomplete):
ecdec191 1018 """ Returns None if the file should be downloaded """
8222d8de 1019
8b0d7497 1020 def check_filter():
1021 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1022 if 'title' in info_dict:
1023 # This can happen when we're just evaluating the playlist
1024 title = info_dict['title']
1025 matchtitle = self.params.get('matchtitle', False)
1026 if matchtitle:
1027 if not re.search(matchtitle, title, re.IGNORECASE):
1028 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029 rejecttitle = self.params.get('rejecttitle', False)
1030 if rejecttitle:
1031 if re.search(rejecttitle, title, re.IGNORECASE):
1032 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033 date = info_dict.get('upload_date')
1034 if date is not None:
1035 dateRange = self.params.get('daterange', DateRange())
1036 if date not in dateRange:
1037 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038 view_count = info_dict.get('view_count')
1039 if view_count is not None:
1040 min_views = self.params.get('min_views')
1041 if min_views is not None and view_count < min_views:
1042 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043 max_views = self.params.get('max_views')
1044 if max_views is not None and view_count > max_views:
1045 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047 return 'Skipping "%s" because it is age restricted' % video_title
1048 if self.in_download_archive(info_dict):
1049 return '%s has already been recorded in archive' % video_title
1050
1051 if not incomplete:
1052 match_filter = self.params.get('match_filter')
1053 if match_filter is not None:
1054 ret = match_filter(info_dict)
1055 if ret is not None:
1056 return ret
1057 return None
1058
1059 reason = check_filter()
1060 if reason is not None:
1061 self.to_screen('[download] ' + reason)
d83cb531 1062 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 1063 raise ExistingVideoReached()
d83cb531 1064 elif self.params.get('break_on_reject', False):
8b0d7497 1065 raise RejectedVideoReached()
1066 return reason
fe7e0c98 1067
b6c45014
JMF
1068 @staticmethod
1069 def add_extra_info(info_dict, extra_info):
1070 '''Set the keys from extra_info in info dict if they are missing'''
1071 for key, value in extra_info.items():
1072 info_dict.setdefault(key, value)
1073
58f197b7 1074 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1075 process=True, force_generic_extractor=False):
41d1cca3 1076 """
1077 Return a list with a dictionary for each video extracted.
1078
1079 Arguments:
1080 url -- URL to extract
1081
1082 Keyword arguments:
1083 download -- whether to download videos during extraction
1084 ie_key -- extractor key hint
1085 extra_info -- dictionary containing the extra values to add to each result
1086 process -- whether to resolve all unresolved references (URLs, playlist items),
1087 must be True for download to work.
1088 force_generic_extractor -- force using the generic extractor
1089 """
fe7e0c98 1090
61aa5ba3 1091 if not ie_key and force_generic_extractor:
d22dec74
S
1092 ie_key = 'Generic'
1093
8222d8de 1094 if ie_key:
56c73665 1095 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1096 else:
1097 ies = self._ies
1098
1099 for ie in ies:
1100 if not ie.suitable(url):
1101 continue
1102
9a68de12 1103 ie_key = ie.ie_key()
1104 ie = self.get_info_extractor(ie_key)
8222d8de 1105 if not ie.working():
6febd1c1
PH
1106 self.report_warning('The program functionality for this site has been marked as broken, '
1107 'and will probably not work.')
8222d8de
JMF
1108
1109 try:
d0757229 1110 temp_id = str_or_none(
63be1aab 1111 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1112 else ie._match_id(url))
a0566bbf 1113 except (AssertionError, IndexError, AttributeError):
1114 temp_id = None
1115 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1116 self.to_screen("[%s] %s: has already been recorded in archive" % (
1117 ie_key, temp_id))
1118 break
58f197b7 1119 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1120 else:
1121 self.report_error('no suitable InfoExtractor for URL %s' % url)
1122
1123 def __handle_extraction_exceptions(func):
1124 def wrapper(self, *args, **kwargs):
1125 try:
1126 return func(self, *args, **kwargs)
773f291d
S
1127 except GeoRestrictedError as e:
1128 msg = e.msg
1129 if e.countries:
1130 msg += '\nThis video is available in %s.' % ', '.join(
1131 map(ISO3166Utils.short2full, e.countries))
1132 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1133 self.report_error(msg)
fb043a6e 1134 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1135 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1136 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1137 raise
8222d8de
JMF
1138 except Exception as e:
1139 if self.params.get('ignoreerrors', False):
9b9c5355 1140 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1141 else:
1142 raise
a0566bbf 1143 return wrapper
1144
1145 @__handle_extraction_exceptions
58f197b7 1146 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1147 ie_result = ie.extract(url)
1148 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1149 return
1150 if isinstance(ie_result, list):
1151 # Backwards compatibility: old IE result format
1152 ie_result = {
1153 '_type': 'compat_list',
1154 'entries': ie_result,
1155 }
a0566bbf 1156 self.add_default_extra_info(ie_result, ie, url)
1157 if process:
1158 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1159 else:
a0566bbf 1160 return ie_result
fe7e0c98 1161
ea38e55f
PH
1162 def add_default_extra_info(self, ie_result, ie, url):
1163 self.add_extra_info(ie_result, {
1164 'extractor': ie.IE_NAME,
1165 'webpage_url': url,
1166 'webpage_url_basename': url_basename(url),
1167 'extractor_key': ie.ie_key(),
1168 })
1169
8222d8de
JMF
1170 def process_ie_result(self, ie_result, download=True, extra_info={}):
1171 """
1172 Take the result of the ie(may be modified) and resolve all unresolved
1173 references (URLs, playlist items).
1174
1175 It will also download the videos if 'download'.
1176 Returns the resolved ie_result.
1177 """
e8ee972c
PH
1178 result_type = ie_result.get('_type', 'video')
1179
057a5206 1180 if result_type in ('url', 'url_transparent'):
134c6ea8 1181 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1182 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1183 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1184 or extract_flat is True):
de6000d9 1185 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1186 return ie_result
1187
8222d8de 1188 if result_type == 'video':
b6c45014 1189 self.add_extra_info(ie_result, extra_info)
feee2ecf 1190 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1191 elif result_type == 'url':
1192 # We have to add extra_info to the results because it may be
1193 # contained in a playlist
1194 return self.extract_info(ie_result['url'],
58f197b7 1195 download,
8222d8de
JMF
1196 ie_key=ie_result.get('ie_key'),
1197 extra_info=extra_info)
7fc3fa05
PH
1198 elif result_type == 'url_transparent':
1199 # Use the information from the embedding page
1200 info = self.extract_info(
1201 ie_result['url'], ie_key=ie_result.get('ie_key'),
1202 extra_info=extra_info, download=False, process=False)
1203
1640eb09
S
1204 # extract_info may return None when ignoreerrors is enabled and
1205 # extraction failed with an error, don't crash and return early
1206 # in this case
1207 if not info:
1208 return info
1209
412c617d
PH
1210 force_properties = dict(
1211 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1212 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1213 if f in force_properties:
1214 del force_properties[f]
1215 new_result = info.copy()
1216 new_result.update(force_properties)
7fc3fa05 1217
0563f7ac
S
1218 # Extracted info may not be a video result (i.e.
1219 # info.get('_type', 'video') != video) but rather an url or
1220 # url_transparent. In such cases outer metadata (from ie_result)
1221 # should be propagated to inner one (info). For this to happen
1222 # _type of info should be overridden with url_transparent. This
067aa17e 1223 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1224 if new_result.get('_type') == 'url':
1225 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1226
1227 return self.process_ie_result(
1228 new_result, download=download, extra_info=extra_info)
40fcba5e 1229 elif result_type in ('playlist', 'multi_video'):
30a074c2 1230 # Protect from infinite recursion due to recursively nested playlists
1231 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1232 webpage_url = ie_result['webpage_url']
1233 if webpage_url in self._playlist_urls:
7e85e872 1234 self.to_screen(
30a074c2 1235 '[download] Skipping already downloaded playlist: %s'
1236 % ie_result.get('title') or ie_result.get('id'))
1237 return
7e85e872 1238
30a074c2 1239 self._playlist_level += 1
1240 self._playlist_urls.add(webpage_url)
1241 try:
1242 return self.__process_playlist(ie_result, download)
1243 finally:
1244 self._playlist_level -= 1
1245 if not self._playlist_level:
1246 self._playlist_urls.clear()
8222d8de 1247 elif result_type == 'compat_list':
c9bf4114
PH
1248 self.report_warning(
1249 'Extractor %s returned a compat_list result. '
1250 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1251
8222d8de 1252 def _fixup(r):
9e1a5b84
JW
1253 self.add_extra_info(
1254 r,
9103bbc5
JMF
1255 {
1256 'extractor': ie_result['extractor'],
1257 'webpage_url': ie_result['webpage_url'],
29eb5174 1258 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1259 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1260 }
1261 )
8222d8de
JMF
1262 return r
1263 ie_result['entries'] = [
b6c45014 1264 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1265 for r in ie_result['entries']
1266 ]
1267 return ie_result
1268 else:
1269 raise Exception('Invalid result type: %s' % result_type)
1270
e92caff5 1271 def _ensure_dir_exists(self, path):
1272 return make_dir(path, self.report_error)
1273
30a074c2 1274 def __process_playlist(self, ie_result, download):
1275 # We process each entry in the playlist
1276 playlist = ie_result.get('title') or ie_result.get('id')
1277 self.to_screen('[download] Downloading playlist: %s' % playlist)
1278
498f5606 1279 if 'entries' not in ie_result:
1280 raise EntryNotInPlaylist()
1281 incomplete_entries = bool(ie_result.get('requested_entries'))
1282 if incomplete_entries:
1283 def fill_missing_entries(entries, indexes):
1284 ret = [None] * max(*indexes)
1285 for i, entry in zip(indexes, entries):
1286 ret[i - 1] = entry
1287 return ret
1288 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1289
30a074c2 1290 playlist_results = []
1291
1292 playliststart = self.params.get('playliststart', 1) - 1
1293 playlistend = self.params.get('playlistend')
1294 # For backwards compatibility, interpret -1 as whole list
1295 if playlistend == -1:
1296 playlistend = None
1297
1298 playlistitems_str = self.params.get('playlist_items')
1299 playlistitems = None
1300 if playlistitems_str is not None:
1301 def iter_playlistitems(format):
1302 for string_segment in format.split(','):
1303 if '-' in string_segment:
1304 start, end = string_segment.split('-')
1305 for item in range(int(start), int(end) + 1):
1306 yield int(item)
1307 else:
1308 yield int(string_segment)
1309 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1310
1311 ie_entries = ie_result['entries']
1312
1313 def make_playlistitems_entries(list_ie_entries):
1314 num_entries = len(list_ie_entries)
498f5606 1315 for i in playlistitems:
1316 if -num_entries < i <= num_entries:
1317 yield list_ie_entries[i - 1]
1318 elif incomplete_entries:
1319 raise EntryNotInPlaylist()
30a074c2 1320
1321 if isinstance(ie_entries, list):
1322 n_all_entries = len(ie_entries)
1323 if playlistitems:
498f5606 1324 entries = list(make_playlistitems_entries(ie_entries))
30a074c2 1325 else:
1326 entries = ie_entries[playliststart:playlistend]
1327 n_entries = len(entries)
498f5606 1328 msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
30a074c2 1329 elif isinstance(ie_entries, PagedList):
1330 if playlistitems:
1331 entries = []
1332 for item in playlistitems:
1333 entries.extend(ie_entries.getslice(
1334 item - 1, item
1335 ))
1336 else:
1337 entries = ie_entries.getslice(
1338 playliststart, playlistend)
1339 n_entries = len(entries)
498f5606 1340 msg = 'Downloading %d videos' % n_entries
30a074c2 1341 else: # iterable
1342 if playlistitems:
498f5606 1343 entries = list(make_playlistitems_entries(list(itertools.islice(
1344 ie_entries, 0, max(playlistitems)))))
30a074c2 1345 else:
1346 entries = list(itertools.islice(
1347 ie_entries, playliststart, playlistend))
1348 n_entries = len(entries)
498f5606 1349 msg = 'Downloading %d videos' % n_entries
1350
1351 if any((entry is None for entry in entries)):
1352 raise EntryNotInPlaylist()
1353 if not playlistitems and (playliststart or playlistend):
1354 playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1355 ie_result['entries'] = entries
1356 ie_result['requested_entries'] = playlistitems
1357
1358 if self.params.get('allow_playlist_files', True):
1359 ie_copy = {
1360 'playlist': playlist,
1361 'playlist_id': ie_result.get('id'),
1362 'playlist_title': ie_result.get('title'),
1363 'playlist_uploader': ie_result.get('uploader'),
1364 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1365 'playlist_index': 0,
498f5606 1366 }
1367 ie_copy.update(dict(ie_result))
1368
1369 if self.params.get('writeinfojson', False):
1370 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1371 if not self._ensure_dir_exists(encodeFilename(infofn)):
1372 return
1373 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1374 self.to_screen('[info] Playlist metadata is already present')
1375 else:
1376 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1377 try:
1378 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1379 except (OSError, IOError):
1380 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1381
1382 if self.params.get('writedescription', False):
1383 descfn = self.prepare_filename(ie_copy, 'pl_description')
1384 if not self._ensure_dir_exists(encodeFilename(descfn)):
1385 return
1386 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1387 self.to_screen('[info] Playlist description is already present')
1388 elif ie_result.get('description') is None:
1389 self.report_warning('There\'s no playlist description to write.')
1390 else:
1391 try:
1392 self.to_screen('[info] Writing playlist description to: ' + descfn)
1393 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1394 descfile.write(ie_result['description'])
1395 except (OSError, IOError):
1396 self.report_error('Cannot write playlist description file ' + descfn)
1397 return
30a074c2 1398
71729754 1399 # Save playlist_index before re-ordering
1400 entries = [
1401 ((playlistitems[i - 1] if playlistitems else i), entry)
1402 for i, entry in enumerate(entries, 1)]
1403
30a074c2 1404 if self.params.get('playlistreverse', False):
1405 entries = entries[::-1]
30a074c2 1406 if self.params.get('playlistrandom', False):
1407 random.shuffle(entries)
1408
1409 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1410
498f5606 1411 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
26e2805c 1412 failures = 0
1413 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1414 for i, entry_tuple in enumerate(entries, 1):
1415 playlist_index, entry = entry_tuple
53ed7066 1416 if 'playlist_index' in self.params.get('compat_options', []):
1417 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1418 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1419 # This __x_forwarded_for_ip thing is a bit ugly but requires
1420 # minimal changes
1421 if x_forwarded_for:
1422 entry['__x_forwarded_for_ip'] = x_forwarded_for
1423 extra = {
1424 'n_entries': n_entries,
f59ae581 1425 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1426 'playlist_index': playlist_index,
1427 'playlist_autonumber': i,
30a074c2 1428 'playlist': playlist,
1429 'playlist_id': ie_result.get('id'),
1430 'playlist_title': ie_result.get('title'),
1431 'playlist_uploader': ie_result.get('uploader'),
1432 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1433 'extractor': ie_result['extractor'],
1434 'webpage_url': ie_result['webpage_url'],
1435 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1436 'extractor_key': ie_result['extractor_key'],
1437 }
1438
1439 if self._match_entry(entry, incomplete=True) is not None:
1440 continue
1441
1442 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1443 if not entry_result:
1444 failures += 1
1445 if failures >= max_failures:
1446 self.report_error(
1447 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1448 break
30a074c2 1449 # TODO: skip failed (empty) entries?
1450 playlist_results.append(entry_result)
1451 ie_result['entries'] = playlist_results
1452 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1453 return ie_result
1454
a0566bbf 1455 @__handle_extraction_exceptions
1456 def __process_iterable_entry(self, entry, download, extra_info):
1457 return self.process_ie_result(
1458 entry, download=download, extra_info=extra_info)
1459
67134eab
JMF
1460 def _build_format_filter(self, filter_spec):
1461 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1462
1463 OPERATORS = {
1464 '<': operator.lt,
1465 '<=': operator.le,
1466 '>': operator.gt,
1467 '>=': operator.ge,
1468 '=': operator.eq,
1469 '!=': operator.ne,
1470 }
67134eab 1471 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1472 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1473 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1474 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1475 $
083c9df9 1476 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1477 m = operator_rex.search(filter_spec)
9ddb6925
S
1478 if m:
1479 try:
1480 comparison_value = int(m.group('value'))
1481 except ValueError:
1482 comparison_value = parse_filesize(m.group('value'))
1483 if comparison_value is None:
1484 comparison_value = parse_filesize(m.group('value') + 'B')
1485 if comparison_value is None:
1486 raise ValueError(
1487 'Invalid value %r in format specification %r' % (
67134eab 1488 m.group('value'), filter_spec))
9ddb6925
S
1489 op = OPERATORS[m.group('op')]
1490
083c9df9 1491 if not m:
9ddb6925
S
1492 STR_OPERATORS = {
1493 '=': operator.eq,
10d33b34
YCH
1494 '^=': lambda attr, value: attr.startswith(value),
1495 '$=': lambda attr, value: attr.endswith(value),
1496 '*=': lambda attr, value: value in attr,
9ddb6925 1497 }
67134eab 1498 str_operator_rex = re.compile(r'''(?x)
f96bff99 1499 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1500 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1501 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1502 \s*$
9ddb6925 1503 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1504 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1505 if m:
1506 comparison_value = m.group('value')
2cc779f4
S
1507 str_op = STR_OPERATORS[m.group('op')]
1508 if m.group('negation'):
e118a879 1509 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1510 else:
1511 op = str_op
083c9df9 1512
9ddb6925 1513 if not m:
67134eab 1514 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1515
1516 def _filter(f):
1517 actual_value = f.get(m.group('key'))
1518 if actual_value is None:
1519 return m.group('none_inclusive')
1520 return op(actual_value, comparison_value)
67134eab
JMF
1521 return _filter
1522
0017d9ad 1523 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1524
af0f7428
S
1525 def can_merge():
1526 merger = FFmpegMergerPP(self)
1527 return merger.available and merger.can_merge()
1528
91ebc640 1529 prefer_best = (
1530 not self.params.get('simulate', False)
1531 and download
1532 and (
1533 not can_merge()
19807826 1534 or info_dict.get('is_live', False)
de6000d9 1535 or self.outtmpl_dict['default'] == '-'))
53ed7066 1536 compat = (
1537 prefer_best
1538 or self.params.get('allow_multiple_audio_streams', False)
1539 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1540
1541 return (
53ed7066 1542 'best/bestvideo+bestaudio' if prefer_best
1543 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1544 else 'bestvideo+bestaudio/best')
0017d9ad 1545
67134eab
JMF
1546 def build_format_selector(self, format_spec):
1547 def syntax_error(note, start):
1548 message = (
1549 'Invalid format specification: '
1550 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1551 return SyntaxError(message)
1552
1553 PICKFIRST = 'PICKFIRST'
1554 MERGE = 'MERGE'
1555 SINGLE = 'SINGLE'
0130afb7 1556 GROUP = 'GROUP'
67134eab
JMF
1557 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1558
91ebc640 1559 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1560 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1561
e8e73840 1562 check_formats = self.params.get('check_formats')
1563
67134eab
JMF
1564 def _parse_filter(tokens):
1565 filter_parts = []
1566 for type, string, start, _, _ in tokens:
1567 if type == tokenize.OP and string == ']':
1568 return ''.join(filter_parts)
1569 else:
1570 filter_parts.append(string)
1571
232541df 1572 def _remove_unused_ops(tokens):
17cc1534 1573 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1574 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1575 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1576 last_string, last_start, last_end, last_line = None, None, None, None
1577 for type, string, start, end, line in tokens:
1578 if type == tokenize.OP and string == '[':
1579 if last_string:
1580 yield tokenize.NAME, last_string, last_start, last_end, last_line
1581 last_string = None
1582 yield type, string, start, end, line
1583 # everything inside brackets will be handled by _parse_filter
1584 for type, string, start, end, line in tokens:
1585 yield type, string, start, end, line
1586 if type == tokenize.OP and string == ']':
1587 break
1588 elif type == tokenize.OP and string in ALLOWED_OPS:
1589 if last_string:
1590 yield tokenize.NAME, last_string, last_start, last_end, last_line
1591 last_string = None
1592 yield type, string, start, end, line
1593 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1594 if not last_string:
1595 last_string = string
1596 last_start = start
1597 last_end = end
1598 else:
1599 last_string += string
1600 if last_string:
1601 yield tokenize.NAME, last_string, last_start, last_end, last_line
1602
cf2ac6df 1603 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1604 selectors = []
1605 current_selector = None
1606 for type, string, start, _, _ in tokens:
1607 # ENCODING is only defined in python 3.x
1608 if type == getattr(tokenize, 'ENCODING', None):
1609 continue
1610 elif type in [tokenize.NAME, tokenize.NUMBER]:
1611 current_selector = FormatSelector(SINGLE, string, [])
1612 elif type == tokenize.OP:
cf2ac6df
JMF
1613 if string == ')':
1614 if not inside_group:
1615 # ')' will be handled by the parentheses group
1616 tokens.restore_last_token()
67134eab 1617 break
cf2ac6df 1618 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1619 tokens.restore_last_token()
1620 break
cf2ac6df
JMF
1621 elif inside_choice and string == ',':
1622 tokens.restore_last_token()
1623 break
1624 elif string == ',':
0a31a350
JMF
1625 if not current_selector:
1626 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1627 selectors.append(current_selector)
1628 current_selector = None
1629 elif string == '/':
d96d604e
JMF
1630 if not current_selector:
1631 raise syntax_error('"/" must follow a format selector', start)
67134eab 1632 first_choice = current_selector
cf2ac6df 1633 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1634 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1635 elif string == '[':
1636 if not current_selector:
1637 current_selector = FormatSelector(SINGLE, 'best', [])
1638 format_filter = _parse_filter(tokens)
1639 current_selector.filters.append(format_filter)
0130afb7
JMF
1640 elif string == '(':
1641 if current_selector:
1642 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1643 group = _parse_format_selection(tokens, inside_group=True)
1644 current_selector = FormatSelector(GROUP, group, [])
67134eab 1645 elif string == '+':
d03cfdce 1646 if not current_selector:
1647 raise syntax_error('Unexpected "+"', start)
1648 selector_1 = current_selector
1649 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1650 if not selector_2:
1651 raise syntax_error('Expected a selector', start)
1652 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1653 else:
1654 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1655 elif type == tokenize.ENDMARKER:
1656 break
1657 if current_selector:
1658 selectors.append(current_selector)
1659 return selectors
1660
f8d4ad9a 1661 def _merge(formats_pair):
1662 format_1, format_2 = formats_pair
1663
1664 formats_info = []
1665 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1666 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1667
1668 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1669 get_no_more = {"video": False, "audio": False}
1670 for (i, fmt_info) in enumerate(formats_info):
1671 for aud_vid in ["audio", "video"]:
1672 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1673 if get_no_more[aud_vid]:
1674 formats_info.pop(i)
1675 get_no_more[aud_vid] = True
1676
1677 if len(formats_info) == 1:
1678 return formats_info[0]
1679
1680 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1681 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1682
1683 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1684 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1685
1686 output_ext = self.params.get('merge_output_format')
1687 if not output_ext:
1688 if the_only_video:
1689 output_ext = the_only_video['ext']
1690 elif the_only_audio and not video_fmts:
1691 output_ext = the_only_audio['ext']
1692 else:
1693 output_ext = 'mkv'
1694
1695 new_dict = {
1696 'requested_formats': formats_info,
1697 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1698 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1699 'ext': output_ext,
1700 }
1701
1702 if the_only_video:
1703 new_dict.update({
1704 'width': the_only_video.get('width'),
1705 'height': the_only_video.get('height'),
1706 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1707 'fps': the_only_video.get('fps'),
1708 'vcodec': the_only_video.get('vcodec'),
1709 'vbr': the_only_video.get('vbr'),
1710 'stretched_ratio': the_only_video.get('stretched_ratio'),
1711 })
1712
1713 if the_only_audio:
1714 new_dict.update({
1715 'acodec': the_only_audio.get('acodec'),
1716 'abr': the_only_audio.get('abr'),
1717 })
1718
1719 return new_dict
1720
e8e73840 1721 def _check_formats(formats):
1722 for f in formats:
1723 self.to_screen('[info] Testing format %s' % f['format_id'])
1724 paths = self.params.get('paths', {})
1725 temp_file = os.path.join(
1726 expand_path(paths.get('home', '').strip()),
1727 expand_path(paths.get('temp', '').strip()),
1728 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
fe346461 1729 try:
1730 dl, _ = self.dl(temp_file, f, test=True)
1731 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1732 dl = False
1733 finally:
1734 if os.path.exists(temp_file):
1735 os.remove(temp_file)
e8e73840 1736 if dl:
1737 yield f
1738 else:
1739 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1740
67134eab 1741 def _build_selector_function(selector):
909d24dd 1742 if isinstance(selector, list): # ,
67134eab
JMF
1743 fs = [_build_selector_function(s) for s in selector]
1744
317f7ab6 1745 def selector_function(ctx):
67134eab 1746 for f in fs:
317f7ab6 1747 for format in f(ctx):
67134eab
JMF
1748 yield format
1749 return selector_function
909d24dd 1750
1751 elif selector.type == GROUP: # ()
0130afb7 1752 selector_function = _build_selector_function(selector.selector)
909d24dd 1753
1754 elif selector.type == PICKFIRST: # /
67134eab
JMF
1755 fs = [_build_selector_function(s) for s in selector.selector]
1756
317f7ab6 1757 def selector_function(ctx):
67134eab 1758 for f in fs:
317f7ab6 1759 picked_formats = list(f(ctx))
67134eab
JMF
1760 if picked_formats:
1761 return picked_formats
1762 return []
67134eab 1763
909d24dd 1764 elif selector.type == SINGLE: # atom
598d185d 1765 format_spec = selector.selector or 'best'
909d24dd 1766
f8d4ad9a 1767 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1768 if format_spec == 'all':
1769 def selector_function(ctx):
1770 formats = list(ctx['formats'])
e8e73840 1771 if check_formats:
1772 formats = _check_formats(formats)
1773 for f in formats:
1774 yield f
f8d4ad9a 1775 elif format_spec == 'mergeall':
1776 def selector_function(ctx):
e8e73840 1777 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1778 if not formats:
1779 return
921b76ca 1780 merged_format = formats[-1]
1781 for f in formats[-2::-1]:
f8d4ad9a 1782 merged_format = _merge((merged_format, f))
1783 yield merged_format
909d24dd 1784
1785 else:
e8e73840 1786 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1787 mobj = re.match(
1788 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1789 format_spec)
1790 if mobj is not None:
1791 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1792 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1793 format_type = (mobj.group('type') or [None])[0]
1794 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1795 format_modified = mobj.group('mod') is not None
909d24dd 1796
1797 format_fallback = not format_type and not format_modified # for b, w
eff63539 1798 filter_f = (
1799 (lambda f: f.get('%scodec' % format_type) != 'none')
1800 if format_type and format_modified # bv*, ba*, wv*, wa*
1801 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1802 if format_type # bv, ba, wv, wa
1803 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1804 if not format_modified # b, w
1805 else None) # b*, w*
67134eab 1806 else:
909d24dd 1807 filter_f = ((lambda f: f.get('ext') == format_spec)
1808 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1809 else (lambda f: f.get('format_id') == format_spec)) # id
1810
1811 def selector_function(ctx):
1812 formats = list(ctx['formats'])
1813 if not formats:
1814 return
1815 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1816 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1817 # for extractors with incomplete formats (audio only (soundcloud)
1818 # or video only (imgur)) best/worst will fallback to
1819 # best/worst {video,audio}-only format
e8e73840 1820 matches = formats
1821 if format_reverse:
1822 matches = matches[::-1]
1823 if check_formats:
1824 matches = list(itertools.islice(_check_formats(matches), format_idx))
1825 n = len(matches)
1826 if -n <= format_idx - 1 < n:
1827 yield matches[format_idx - 1]
909d24dd 1828
1829 elif selector.type == MERGE: # +
d03cfdce 1830 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1831
317f7ab6
S
1832 def selector_function(ctx):
1833 for pair in itertools.product(
d03cfdce 1834 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1835 yield _merge(pair)
083c9df9 1836
67134eab 1837 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1838
317f7ab6
S
1839 def final_selector(ctx):
1840 ctx_copy = copy.deepcopy(ctx)
67134eab 1841 for _filter in filters:
317f7ab6
S
1842 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1843 return selector_function(ctx_copy)
67134eab 1844 return final_selector
083c9df9 1845
67134eab 1846 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1847 try:
232541df 1848 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1849 except tokenize.TokenError:
1850 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1851
1852 class TokenIterator(object):
1853 def __init__(self, tokens):
1854 self.tokens = tokens
1855 self.counter = 0
1856
1857 def __iter__(self):
1858 return self
1859
1860 def __next__(self):
1861 if self.counter >= len(self.tokens):
1862 raise StopIteration()
1863 value = self.tokens[self.counter]
1864 self.counter += 1
1865 return value
1866
1867 next = __next__
1868
1869 def restore_last_token(self):
1870 self.counter -= 1
1871
1872 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1873 return _build_selector_function(parsed_selector)
a9c58ad9 1874
e5660ee6
JMF
1875 def _calc_headers(self, info_dict):
1876 res = std_headers.copy()
1877
1878 add_headers = info_dict.get('http_headers')
1879 if add_headers:
1880 res.update(add_headers)
1881
1882 cookies = self._calc_cookies(info_dict)
1883 if cookies:
1884 res['Cookie'] = cookies
1885
0016b84e
S
1886 if 'X-Forwarded-For' not in res:
1887 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1888 if x_forwarded_for_ip:
1889 res['X-Forwarded-For'] = x_forwarded_for_ip
1890
e5660ee6
JMF
1891 return res
1892
1893 def _calc_cookies(self, info_dict):
5c2266df 1894 pr = sanitized_Request(info_dict['url'])
e5660ee6 1895 self.cookiejar.add_cookie_header(pr)
662435f7 1896 return pr.get_header('Cookie')
e5660ee6 1897
dd82ffea
JMF
1898 def process_video_result(self, info_dict, download=True):
1899 assert info_dict.get('_type', 'video') == 'video'
1900
bec1fad2
PH
1901 if 'id' not in info_dict:
1902 raise ExtractorError('Missing "id" field in extractor result')
1903 if 'title' not in info_dict:
1904 raise ExtractorError('Missing "title" field in extractor result')
1905
c9969434
S
1906 def report_force_conversion(field, field_not, conversion):
1907 self.report_warning(
1908 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1909 % (field, field_not, conversion))
1910
1911 def sanitize_string_field(info, string_field):
1912 field = info.get(string_field)
1913 if field is None or isinstance(field, compat_str):
1914 return
1915 report_force_conversion(string_field, 'a string', 'string')
1916 info[string_field] = compat_str(field)
1917
1918 def sanitize_numeric_fields(info):
1919 for numeric_field in self._NUMERIC_FIELDS:
1920 field = info.get(numeric_field)
1921 if field is None or isinstance(field, compat_numeric_types):
1922 continue
1923 report_force_conversion(numeric_field, 'numeric', 'int')
1924 info[numeric_field] = int_or_none(field)
1925
1926 sanitize_string_field(info_dict, 'id')
1927 sanitize_numeric_fields(info_dict)
be6217b2 1928
dd82ffea
JMF
1929 if 'playlist' not in info_dict:
1930 # It isn't part of a playlist
1931 info_dict['playlist'] = None
1932 info_dict['playlist_index'] = None
1933
d5519808 1934 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1935 if thumbnails is None:
1936 thumbnail = info_dict.get('thumbnail')
1937 if thumbnail:
a7a14d95 1938 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1939 if thumbnails:
be6d7229 1940 thumbnails.sort(key=lambda t: (
d37708fc
RA
1941 t.get('preference') if t.get('preference') is not None else -1,
1942 t.get('width') if t.get('width') is not None else -1,
1943 t.get('height') if t.get('height') is not None else -1,
1944 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1945 for i, t in enumerate(thumbnails):
dcf77cf1 1946 t['url'] = sanitize_url(t['url'])
9603e8a7 1947 if t.get('width') and t.get('height'):
d5519808 1948 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1949 if t.get('id') is None:
1950 t['id'] = '%d' % i
d5519808 1951
b7b72db9 1952 if self.params.get('list_thumbnails'):
1953 self.list_thumbnails(info_dict)
1954 return
1955
536a55da
S
1956 thumbnail = info_dict.get('thumbnail')
1957 if thumbnail:
1958 info_dict['thumbnail'] = sanitize_url(thumbnail)
1959 elif thumbnails:
d5519808
PH
1960 info_dict['thumbnail'] = thumbnails[-1]['url']
1961
c9ae7b95 1962 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1963 info_dict['display_id'] = info_dict['id']
1964
10db0d2f 1965 for ts_key, date_key in (
1966 ('timestamp', 'upload_date'),
1967 ('release_timestamp', 'release_date'),
1968 ):
1969 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1970 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1971 # see http://bugs.python.org/issue1646728)
1972 try:
1973 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1974 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1975 except (ValueError, OverflowError, OSError):
1976 pass
9d2ecdbc 1977
33d2fc2f
S
1978 # Auto generate title fields corresponding to the *_number fields when missing
1979 # in order to always have clean titles. This is very common for TV series.
1980 for field in ('chapter', 'season', 'episode'):
1981 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1982 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1983
05108a49
S
1984 for cc_kind in ('subtitles', 'automatic_captions'):
1985 cc = info_dict.get(cc_kind)
1986 if cc:
1987 for _, subtitle in cc.items():
1988 for subtitle_format in subtitle:
1989 if subtitle_format.get('url'):
1990 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1991 if subtitle_format.get('ext') is None:
1992 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1993
1994 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1995 subtitles = info_dict.get('subtitles')
4bba3716 1996
a504ced0 1997 if self.params.get('listsubtitles', False):
360e1ca5 1998 if 'automatic_captions' in info_dict:
05108a49
S
1999 self.list_subtitles(
2000 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 2001 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 2002 return
05108a49 2003
360e1ca5 2004 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2005 info_dict['id'], subtitles, automatic_captions)
a504ced0 2006
dd82ffea
JMF
2007 # We now pick which formats have to be downloaded
2008 if info_dict.get('formats') is None:
2009 # There's only one format available
2010 formats = [info_dict]
2011 else:
2012 formats = info_dict['formats']
2013
db95dc13 2014 if not formats:
b7da73eb 2015 if not self.params.get('ignore_no_formats_error'):
2016 raise ExtractorError('No video formats found!')
2017 else:
2018 self.report_warning('No video formats found!')
db95dc13 2019
73af5cc8
S
2020 def is_wellformed(f):
2021 url = f.get('url')
a5ac0c47 2022 if not url:
73af5cc8
S
2023 self.report_warning(
2024 '"url" field is missing or empty - skipping format, '
2025 'there is an error in extractor')
a5ac0c47
S
2026 return False
2027 if isinstance(url, bytes):
2028 sanitize_string_field(f, 'url')
2029 return True
73af5cc8
S
2030
2031 # Filter out malformed formats for better extraction robustness
2032 formats = list(filter(is_wellformed, formats))
2033
181c7053
S
2034 formats_dict = {}
2035
dd82ffea 2036 # We check that all the formats have the format and format_id fields
db95dc13 2037 for i, format in enumerate(formats):
c9969434
S
2038 sanitize_string_field(format, 'format_id')
2039 sanitize_numeric_fields(format)
dcf77cf1 2040 format['url'] = sanitize_url(format['url'])
e74e3b63 2041 if not format.get('format_id'):
8016c922 2042 format['format_id'] = compat_str(i)
e2effb08
S
2043 else:
2044 # Sanitize format_id from characters used in format selector expression
ec85ded8 2045 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2046 format_id = format['format_id']
2047 if format_id not in formats_dict:
2048 formats_dict[format_id] = []
2049 formats_dict[format_id].append(format)
2050
2051 # Make sure all formats have unique format_id
2052 for format_id, ambiguous_formats in formats_dict.items():
2053 if len(ambiguous_formats) > 1:
2054 for i, format in enumerate(ambiguous_formats):
2055 format['format_id'] = '%s-%d' % (format_id, i)
2056
2057 for i, format in enumerate(formats):
8c51aa65 2058 if format.get('format') is None:
6febd1c1 2059 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2060 id=format['format_id'],
2061 res=self.format_resolution(format),
6febd1c1 2062 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2063 )
c1002e96 2064 # Automatically determine file extension if missing
5b1d8575 2065 if format.get('ext') is None:
cce929ea 2066 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2067 # Automatically determine protocol if missing (useful for format
2068 # selection purposes)
6f0be937 2069 if format.get('protocol') is None:
b5559424 2070 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2071 # Add HTTP headers, so that external programs can use them from the
2072 # json output
2073 full_format_info = info_dict.copy()
2074 full_format_info.update(format)
2075 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2076 # Remove private housekeeping stuff
2077 if '__x_forwarded_for_ip' in info_dict:
2078 del info_dict['__x_forwarded_for_ip']
dd82ffea 2079
4bcc7bd1 2080 # TODO Central sorting goes here
99e206d5 2081
b7da73eb 2082 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2083 # only set the 'formats' fields if the original info_dict list them
2084 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2085 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2086 # which can't be exported to json
b3d9ef88 2087 info_dict['formats'] = formats
cfb56d1a 2088 if self.params.get('listformats'):
b7da73eb 2089 if not info_dict.get('formats'):
2090 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2091 self.list_formats(info_dict)
2092 return
2093
de3ef3ed 2094 req_format = self.params.get('format')
a9c58ad9 2095 if req_format is None:
0017d9ad 2096 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2097 self.write_debug('Default format spec: %s' % req_format)
0017d9ad 2098
5acfa126 2099 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2100
2101 # While in format selection we may need to have an access to the original
2102 # format set in order to calculate some metrics or do some processing.
2103 # For now we need to be able to guess whether original formats provided
2104 # by extractor are incomplete or not (i.e. whether extractor provides only
2105 # video-only or audio-only formats) for proper formats selection for
2106 # extractors with such incomplete formats (see
067aa17e 2107 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2108 # Since formats may be filtered during format selection and may not match
2109 # the original formats the results may be incorrect. Thus original formats
2110 # or pre-calculated metrics should be passed to format selection routines
2111 # as well.
2112 # We will pass a context object containing all necessary additional data
2113 # instead of just formats.
2114 # This fixes incorrect format selection issue (see
067aa17e 2115 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2116 incomplete_formats = (
317f7ab6 2117 # All formats are video-only or
3089bc74 2118 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2119 # all formats are audio-only
3089bc74 2120 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2121
2122 ctx = {
2123 'formats': formats,
2124 'incomplete_formats': incomplete_formats,
2125 }
2126
2127 formats_to_download = list(format_selector(ctx))
dd82ffea 2128 if not formats_to_download:
b7da73eb 2129 if not self.params.get('ignore_no_formats_error'):
2130 raise ExtractorError('Requested format is not available', expected=True)
2131 else:
2132 self.report_warning('Requested format is not available')
2133 elif download:
2134 self.to_screen(
2135 '[info] %s: Downloading format(s) %s'
2136 % (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
dd82ffea 2137 if len(formats_to_download) > 1:
b7da73eb 2138 self.to_screen(
2139 '[info] %s: Downloading video in %s formats'
2140 % (info_dict['id'], len(formats_to_download)))
2141 for fmt in formats_to_download:
dd82ffea 2142 new_info = dict(info_dict)
b7da73eb 2143 new_info.update(fmt)
dd82ffea
JMF
2144 self.process_info(new_info)
2145 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2146 if formats_to_download:
2147 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2148 return info_dict
2149
98c70d6f 2150 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2151 """Select the requested subtitles and their format"""
98c70d6f
JMF
2152 available_subs = {}
2153 if normal_subtitles and self.params.get('writesubtitles'):
2154 available_subs.update(normal_subtitles)
2155 if automatic_captions and self.params.get('writeautomaticsub'):
2156 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2157 if lang not in available_subs:
2158 available_subs[lang] = cap_info
2159
4d171848
JMF
2160 if (not self.params.get('writesubtitles') and not
2161 self.params.get('writeautomaticsub') or not
2162 available_subs):
2163 return None
a504ced0 2164
c32b0aab 2165 all_sub_langs = available_subs.keys()
a504ced0 2166 if self.params.get('allsubtitles', False):
c32b0aab 2167 requested_langs = all_sub_langs
2168 elif self.params.get('subtitleslangs', False):
2169 requested_langs = set()
2170 for lang in self.params.get('subtitleslangs'):
2171 if lang == 'all':
2172 requested_langs.update(all_sub_langs)
2173 continue
2174 discard = lang[0] == '-'
2175 if discard:
2176 lang = lang[1:]
2177 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2178 if discard:
2179 for lang in current_langs:
2180 requested_langs.discard(lang)
2181 else:
2182 requested_langs.update(current_langs)
2183 elif 'en' in available_subs:
2184 requested_langs = ['en']
a504ced0 2185 else:
c32b0aab 2186 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2187 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2188
2189 formats_query = self.params.get('subtitlesformat', 'best')
2190 formats_preference = formats_query.split('/') if formats_query else []
2191 subs = {}
2192 for lang in requested_langs:
2193 formats = available_subs.get(lang)
2194 if formats is None:
2195 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2196 continue
a504ced0
JMF
2197 for ext in formats_preference:
2198 if ext == 'best':
2199 f = formats[-1]
2200 break
2201 matches = list(filter(lambda f: f['ext'] == ext, formats))
2202 if matches:
2203 f = matches[-1]
2204 break
2205 else:
2206 f = formats[-1]
2207 self.report_warning(
2208 'No subtitle format found matching "%s" for language %s, '
2209 'using %s' % (formats_query, lang, f['ext']))
2210 subs[lang] = f
2211 return subs
2212
d06daf23 2213 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2214 def print_mandatory(field, actual_field=None):
2215 if actual_field is None:
2216 actual_field = field
d06daf23 2217 if (self.params.get('force%s' % field, False)
53c18592 2218 and (not incomplete or info_dict.get(actual_field) is not None)):
2219 self.to_stdout(info_dict[actual_field])
d06daf23
S
2220
2221 def print_optional(field):
2222 if (self.params.get('force%s' % field, False)
2223 and info_dict.get(field) is not None):
2224 self.to_stdout(info_dict[field])
2225
53c18592 2226 info_dict = info_dict.copy()
2227 if filename is not None:
2228 info_dict['filename'] = filename
2229 if info_dict.get('requested_formats') is not None:
2230 # For RTMP URLs, also include the playpath
2231 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2232 elif 'url' in info_dict:
2233 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2234
2235 for tmpl in self.params.get('forceprint', []):
2236 if re.match(r'\w+$', tmpl):
2237 tmpl = '%({})s'.format(tmpl)
2238 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2239 self.to_stdout(tmpl % info_copy)
2240
d06daf23
S
2241 print_mandatory('title')
2242 print_mandatory('id')
53c18592 2243 print_mandatory('url', 'urls')
d06daf23
S
2244 print_optional('thumbnail')
2245 print_optional('description')
53c18592 2246 print_optional('filename')
d06daf23
S
2247 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2248 self.to_stdout(formatSeconds(info_dict['duration']))
2249 print_mandatory('format')
53c18592 2250
d06daf23 2251 if self.params.get('forcejson', False):
277d6ff5 2252 self.post_extract(info_dict)
75d43ca0 2253 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2254
e8e73840 2255 def dl(self, name, info, subtitle=False, test=False):
2256
2257 if test:
2258 verbose = self.params.get('verbose')
2259 params = {
2260 'test': True,
2261 'quiet': not verbose,
2262 'verbose': verbose,
2263 'noprogress': not verbose,
2264 'nopart': True,
2265 'skip_unavailable_fragments': False,
2266 'keep_fragments': False,
2267 'overwrites': True,
2268 '_no_ytdl_file': True,
2269 }
2270 else:
2271 params = self.params
2272 fd = get_suitable_downloader(info, params)(self, params)
2273 if not test:
2274 for ph in self._progress_hooks:
2275 fd.add_progress_hook(ph)
0760b0a7 2276 self.write_debug('Invoking downloader on %r' % info.get('url'))
e8e73840 2277 new_info = dict(info)
2278 if new_info.get('http_headers') is None:
2279 new_info['http_headers'] = self._calc_headers(new_info)
2280 return fd.download(name, new_info, subtitle)
2281
8222d8de
JMF
2282 def process_info(self, info_dict):
2283 """Process a single resolved IE result."""
2284
2285 assert info_dict.get('_type', 'video') == 'video'
fd288278 2286
0202b52a 2287 info_dict.setdefault('__postprocessors', [])
2288
fd288278
PH
2289 max_downloads = self.params.get('max_downloads')
2290 if max_downloads is not None:
2291 if self._num_downloads >= int(max_downloads):
2292 raise MaxDownloadsReached()
8222d8de 2293
d06daf23 2294 # TODO: backward compatibility, to be removed
8222d8de 2295 info_dict['fulltitle'] = info_dict['title']
8222d8de 2296
11b85ce6 2297 if 'format' not in info_dict:
8222d8de
JMF
2298 info_dict['format'] = info_dict['ext']
2299
8b0d7497 2300 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
2301 return
2302
277d6ff5 2303 self.post_extract(info_dict)
fd288278 2304 self._num_downloads += 1
8222d8de 2305
56d868db 2306 info_dict, _ = self.pre_process(info_dict)
5bfa4862 2307
dcf64d43 2308 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2309 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2310 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2311 files_to_move = {}
8222d8de
JMF
2312
2313 # Forced printings
0202b52a 2314 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2315
8222d8de 2316 if self.params.get('simulate', False):
2d30509f 2317 if self.params.get('force_write_download_archive', False):
2318 self.record_download_archive(info_dict)
2319
2320 # Do nothing else if in simulate mode
8222d8de
JMF
2321 return
2322
de6000d9 2323 if full_filename is None:
8222d8de
JMF
2324 return
2325
e92caff5 2326 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2327 return
e92caff5 2328 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2329 return
2330
2331 if self.params.get('writedescription', False):
de6000d9 2332 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2333 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2334 return
0c3d0f51 2335 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2336 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2337 elif info_dict.get('description') is None:
2338 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2339 else:
2340 try:
6febd1c1 2341 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2342 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2343 descfile.write(info_dict['description'])
7b6fefc9 2344 except (OSError, IOError):
6febd1c1 2345 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2346 return
8222d8de 2347
1fb07d10 2348 if self.params.get('writeannotations', False):
de6000d9 2349 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2350 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2351 return
0c3d0f51 2352 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2353 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2354 elif not info_dict.get('annotations'):
2355 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2356 else:
2357 try:
6febd1c1 2358 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2359 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2360 annofile.write(info_dict['annotations'])
2361 except (KeyError, TypeError):
6febd1c1 2362 self.report_warning('There are no annotations to write.')
7b6fefc9 2363 except (OSError, IOError):
6febd1c1 2364 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2365 return
1fb07d10 2366
c4a91be7 2367 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2368 self.params.get('writeautomaticsub')])
c4a91be7 2369
c84dd8a9 2370 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2371 # subtitles download errors are already managed as troubles in relevant IE
2372 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2373 subtitles = info_dict['requested_subtitles']
fa57af1e 2374 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2375 for sub_lang, sub_info in subtitles.items():
2376 sub_format = sub_info['ext']
56d868db 2377 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2378 sub_filename_final = subtitles_filename(
2379 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2380 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2381 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2382 sub_info['filepath'] = sub_filename
0202b52a 2383 files_to_move[sub_filename] = sub_filename_final
a504ced0 2384 else:
0c9df79e 2385 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2386 if sub_info.get('data') is not None:
2387 try:
2388 # Use newline='' to prevent conversion of newline characters
067aa17e 2389 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2390 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2391 subfile.write(sub_info['data'])
dcf64d43 2392 sub_info['filepath'] = sub_filename
0202b52a 2393 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2394 except (OSError, IOError):
2395 self.report_error('Cannot write subtitles file ' + sub_filename)
2396 return
7b6fefc9 2397 else:
5ff1bc0c 2398 try:
e8e73840 2399 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2400 sub_info['filepath'] = sub_filename
0202b52a 2401 files_to_move[sub_filename] = sub_filename_final
fe346461 2402 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2403 self.report_warning('Unable to download subtitle for "%s": %s' %
2404 (sub_lang, error_to_compat_str(err)))
2405 continue
8222d8de 2406
8222d8de 2407 if self.params.get('writeinfojson', False):
de6000d9 2408 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2409 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2410 return
0c3d0f51 2411 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2412 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2413 else:
66c935fb 2414 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2415 try:
75d43ca0 2416 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2417 except (OSError, IOError):
66c935fb 2418 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2419 return
de6000d9 2420 info_dict['__infojson_filename'] = infofn
8222d8de 2421
56d868db 2422 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2423 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2424 thumb_filename = replace_extension(
2425 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2426 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2427
732044af 2428 # Write internet shortcut files
2429 url_link = webloc_link = desktop_link = False
2430 if self.params.get('writelink', False):
2431 if sys.platform == "darwin": # macOS.
2432 webloc_link = True
2433 elif sys.platform.startswith("linux"):
2434 desktop_link = True
2435 else: # if sys.platform in ['win32', 'cygwin']:
2436 url_link = True
2437 if self.params.get('writeurllink', False):
2438 url_link = True
2439 if self.params.get('writewebloclink', False):
2440 webloc_link = True
2441 if self.params.get('writedesktoplink', False):
2442 desktop_link = True
2443
2444 if url_link or webloc_link or desktop_link:
2445 if 'webpage_url' not in info_dict:
2446 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2447 return
2448 ascii_url = iri_to_uri(info_dict['webpage_url'])
2449
2450 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2451 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2452 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2453 self.to_screen('[info] Internet shortcut is already present')
2454 else:
2455 try:
2456 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2457 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2458 template_vars = {'url': ascii_url}
2459 if embed_filename:
2460 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2461 linkfile.write(template % template_vars)
2462 except (OSError, IOError):
2463 self.report_error('Cannot write internet shortcut ' + linkfn)
2464 return False
2465 return True
2466
2467 if url_link:
2468 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2469 return
2470 if webloc_link:
2471 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2472 return
2473 if desktop_link:
2474 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2475 return
2476
56d868db 2477 try:
2478 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2479 except PostProcessingError as err:
2480 self.report_error('Preprocessing: %s' % str(err))
2481 return
2482
732044af 2483 must_record_download_archive = False
56d868db 2484 if self.params.get('skip_download', False):
2485 info_dict['filepath'] = temp_filename
2486 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2487 info_dict['__files_to_move'] = files_to_move
2488 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2489 else:
2490 # Download
4340deca 2491 try:
0202b52a 2492
6b591b29 2493 def existing_file(*filepaths):
2494 ext = info_dict.get('ext')
2495 final_ext = self.params.get('final_ext', ext)
2496 existing_files = []
2497 for file in orderedSet(filepaths):
2498 if final_ext != ext:
2499 converted = replace_extension(file, final_ext, ext)
2500 if os.path.exists(encodeFilename(converted)):
2501 existing_files.append(converted)
2502 if os.path.exists(encodeFilename(file)):
2503 existing_files.append(file)
2504
2505 if not existing_files or self.params.get('overwrites', False):
2506 for file in orderedSet(existing_files):
2507 self.report_file_delete(file)
2508 os.remove(encodeFilename(file))
2509 return None
2510
2511 self.report_file_already_downloaded(existing_files[0])
2512 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2513 return existing_files[0]
0202b52a 2514
2515 success = True
4340deca
P
2516 if info_dict.get('requested_formats') is not None:
2517 downloaded = []
d47aeb22 2518 merger = FFmpegMergerPP(self)
63ad4d43 2519 if self.params.get('allow_unplayable_formats'):
2520 self.report_warning(
2521 'You have requested merging of multiple formats '
2522 'while also allowing unplayable formats to be downloaded. '
2523 'The formats won\'t be merged to prevent data corruption.')
2524 elif not merger.available:
2525 self.report_warning(
2526 'You have requested merging of multiple formats but ffmpeg is not installed. '
2527 'The formats won\'t be merged.')
81cd954a
S
2528
2529 def compatible_formats(formats):
d03cfdce 2530 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2531 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2532 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2533 if len(video_formats) > 2 or len(audio_formats) > 2:
2534 return False
2535
81cd954a 2536 # Check extension
d03cfdce 2537 exts = set(format.get('ext') for format in formats)
2538 COMPATIBLE_EXTS = (
2539 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2540 set(('webm',)),
2541 )
2542 for ext_sets in COMPATIBLE_EXTS:
2543 if ext_sets.issuperset(exts):
2544 return True
81cd954a
S
2545 # TODO: Check acodec/vcodec
2546 return False
2547
2548 requested_formats = info_dict['requested_formats']
0202b52a 2549 old_ext = info_dict['ext']
4d971a16 2550 if self.params.get('merge_output_format') is None:
2551 if not compatible_formats(requested_formats):
2552 info_dict['ext'] = 'mkv'
2553 self.report_warning(
2554 'Requested formats are incompatible for merge and will be merged into mkv.')
2555 if (info_dict['ext'] == 'webm'
2556 and self.params.get('writethumbnail', False)
2557 and info_dict.get('thumbnails')):
2558 info_dict['ext'] = 'mkv'
2559 self.report_warning(
2560 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2561
2562 def correct_ext(filename):
2563 filename_real_ext = os.path.splitext(filename)[1][1:]
2564 filename_wo_ext = (
2565 os.path.splitext(filename)[0]
2566 if filename_real_ext == old_ext
2567 else filename)
2568 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2569
38c6902b 2570 # Ensure filename always has a correct extension for successful merge
0202b52a 2571 full_filename = correct_ext(full_filename)
2572 temp_filename = correct_ext(temp_filename)
2573 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2574 info_dict['__real_download'] = False
0202b52a 2575 if dl_filename is None:
81cd954a 2576 for f in requested_formats:
5b5fbc08
JMF
2577 new_info = dict(info_dict)
2578 new_info.update(f)
c5c9bf0c 2579 fname = prepend_extension(
de6000d9 2580 self.prepare_filename(new_info, 'temp'),
c5c9bf0c 2581 'f%s' % f['format_id'], new_info['ext'])
e92caff5 2582 if not self._ensure_dir_exists(fname):
c5c9bf0c 2583 return
5b5fbc08 2584 downloaded.append(fname)
e8e73840 2585 partial_success, real_download = self.dl(fname, new_info)
1ea24129 2586 info_dict['__real_download'] = info_dict['__real_download'] or real_download
5b5fbc08 2587 success = success and partial_success
63ad4d43 2588 if merger.available and not self.params.get('allow_unplayable_formats'):
efabc161 2589 info_dict['__postprocessors'].append(merger)
1ea24129 2590 info_dict['__files_to_merge'] = downloaded
2591 # Even if there were no downloads, it is being merged only now
2592 info_dict['__real_download'] = True
42bb0c59 2593 else:
2594 for file in downloaded:
2595 files_to_move[file] = None
4340deca
P
2596 else:
2597 # Just a single file
0202b52a 2598 dl_filename = existing_file(full_filename, temp_filename)
2599 if dl_filename is None:
e8e73840 2600 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2601 info_dict['__real_download'] = real_download
2602
0202b52a 2603 dl_filename = dl_filename or temp_filename
c571435f 2604 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2605
3158150c 2606 except network_exceptions as err:
7960b056 2607 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2608 return
2609 except (OSError, IOError) as err:
2610 raise UnavailableVideoError(err)
2611 except (ContentTooShortError, ) as err:
2612 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2613 return
8222d8de 2614
de6000d9 2615 if success and full_filename != '-':
6271f1ca 2616 # Fixup content
62cd676c
PH
2617 fixup_policy = self.params.get('fixup')
2618 if fixup_policy is None:
2619 fixup_policy = 'detect_or_warn'
2620
e4172ac9 2621 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2622
6271f1ca
PH
2623 stretched_ratio = info_dict.get('stretched_ratio')
2624 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2625 if fixup_policy == 'warn':
2626 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2627 info_dict['id'], stretched_ratio))
2628 elif fixup_policy == 'detect_or_warn':
2629 stretched_pp = FFmpegFixupStretchedPP(self)
2630 if stretched_pp.available:
6271f1ca
PH
2631 info_dict['__postprocessors'].append(stretched_pp)
2632 else:
2633 self.report_warning(
d1e4a464
S
2634 '%s: Non-uniform pixel ratio (%s). %s'
2635 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2636 else:
62cd676c
PH
2637 assert fixup_policy in ('ignore', 'never')
2638
3089bc74 2639 if (info_dict.get('requested_formats') is None
6b591b29 2640 and info_dict.get('container') == 'm4a_dash'
2641 and info_dict.get('ext') == 'm4a'):
62cd676c 2642 if fixup_policy == 'warn':
d1e4a464
S
2643 self.report_warning(
2644 '%s: writing DASH m4a. '
2645 'Only some players support this container.'
2646 % info_dict['id'])
62cd676c
PH
2647 elif fixup_policy == 'detect_or_warn':
2648 fixup_pp = FFmpegFixupM4aPP(self)
2649 if fixup_pp.available:
62cd676c
PH
2650 info_dict['__postprocessors'].append(fixup_pp)
2651 else:
2652 self.report_warning(
d1e4a464
S
2653 '%s: writing DASH m4a. '
2654 'Only some players support this container. %s'
2655 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2656 else:
2657 assert fixup_policy in ('ignore', 'never')
6271f1ca 2658
0a473f2f 2659 if ('protocol' in info_dict
2660 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2661 if fixup_policy == 'warn':
a02682fd 2662 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2663 info_dict['id']))
2664 elif fixup_policy == 'detect_or_warn':
2665 fixup_pp = FFmpegFixupM3u8PP(self)
2666 if fixup_pp.available:
f17f8651 2667 info_dict['__postprocessors'].append(fixup_pp)
2668 else:
2669 self.report_warning(
a02682fd 2670 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2671 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2672 else:
2673 assert fixup_policy in ('ignore', 'never')
2674
8222d8de 2675 try:
23c1a667 2676 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2677 except PostProcessingError as err:
2678 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2679 return
ab8e5e51
AM
2680 try:
2681 for ph in self._post_hooks:
23c1a667 2682 ph(info_dict['filepath'])
ab8e5e51
AM
2683 except Exception as err:
2684 self.report_error('post hooks: %s' % str(err))
2685 return
2d30509f 2686 must_record_download_archive = True
2687
2688 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2689 self.record_download_archive(info_dict)
c3e6ffba 2690 max_downloads = self.params.get('max_downloads')
2691 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2692 raise MaxDownloadsReached()
8222d8de
JMF
2693
2694 def download(self, url_list):
2695 """Download a given list of URLs."""
de6000d9 2696 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2697 if (len(url_list) > 1
2698 and outtmpl != '-'
2699 and '%' not in outtmpl
2700 and self.params.get('max_downloads') != 1):
acd69589 2701 raise SameFileError(outtmpl)
8222d8de
JMF
2702
2703 for url in url_list:
2704 try:
5f6a1245 2705 # It also downloads the videos
61aa5ba3
S
2706 res = self.extract_info(
2707 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2708 except UnavailableVideoError:
6febd1c1 2709 self.report_error('unable to download video')
8222d8de 2710 except MaxDownloadsReached:
8b0d7497 2711 self.to_screen('[info] Maximum number of downloaded files reached')
2712 raise
2713 except ExistingVideoReached:
d83cb531 2714 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2715 raise
2716 except RejectedVideoReached:
d83cb531 2717 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2718 raise
63e0be34
PH
2719 else:
2720 if self.params.get('dump_single_json', False):
277d6ff5 2721 self.post_extract(res)
75d43ca0 2722 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2723
2724 return self._download_retcode
2725
1dcc4c0c 2726 def download_with_info_file(self, info_filename):
31bd3925
JMF
2727 with contextlib.closing(fileinput.FileInput(
2728 [info_filename], mode='r',
2729 openhook=fileinput.hook_encoded('utf-8'))) as f:
2730 # FileInput doesn't have a read method, we can't call json.load
498f5606 2731 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2732 try:
2733 self.process_ie_result(info, download=True)
498f5606 2734 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2735 webpage_url = info.get('webpage_url')
2736 if webpage_url is not None:
6febd1c1 2737 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2738 return self.download([webpage_url])
2739 else:
2740 raise
2741 return self._download_retcode
1dcc4c0c 2742
cb202fd2 2743 @staticmethod
75d43ca0 2744 def filter_requested_info(info_dict, actually_filter=True):
2745 if not actually_filter:
394dcd44 2746 info_dict['epoch'] = int(time.time())
75d43ca0 2747 return info_dict
5226731e 2748 exceptions = {
498f5606 2749 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
5226731e 2750 'keep': ['_type'],
2751 }
2752 keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2753 filter_fn = lambda obj: (
a515a78d 2754 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2755 else obj if not isinstance(obj, dict)
2756 else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
5226731e 2757 return filter_fn(info_dict)
cb202fd2 2758
dcf64d43 2759 def run_pp(self, pp, infodict):
5bfa4862 2760 files_to_delete = []
dcf64d43 2761 if '__files_to_move' not in infodict:
2762 infodict['__files_to_move'] = {}
af819c21 2763 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2764 if not files_to_delete:
dcf64d43 2765 return infodict
5bfa4862 2766
2767 if self.params.get('keepvideo', False):
2768 for f in files_to_delete:
dcf64d43 2769 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2770 else:
2771 for old_filename in set(files_to_delete):
2772 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2773 try:
2774 os.remove(encodeFilename(old_filename))
2775 except (IOError, OSError):
2776 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2777 if old_filename in infodict['__files_to_move']:
2778 del infodict['__files_to_move'][old_filename]
2779 return infodict
5bfa4862 2780
277d6ff5 2781 @staticmethod
2782 def post_extract(info_dict):
2783 def actual_post_extract(info_dict):
2784 if info_dict.get('_type') in ('playlist', 'multi_video'):
2785 for video_dict in info_dict.get('entries', {}):
b050d210 2786 actual_post_extract(video_dict or {})
277d6ff5 2787 return
2788
2789 if '__post_extractor' not in info_dict:
2790 return
2791 post_extractor = info_dict['__post_extractor']
2792 if post_extractor:
2793 info_dict.update(post_extractor().items())
2794 del info_dict['__post_extractor']
2795 return
2796
b050d210 2797 actual_post_extract(info_dict or {})
277d6ff5 2798
56d868db 2799 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2800 info = dict(ie_info)
56d868db 2801 info['__files_to_move'] = files_to_move or {}
2802 for pp in self._pps[key]:
dcf64d43 2803 info = self.run_pp(pp, info)
56d868db 2804 return info, info.pop('__files_to_move', None)
5bfa4862 2805
dcf64d43 2806 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2807 """Run all the postprocessors on the given file."""
2808 info = dict(ie_info)
2809 info['filepath'] = filename
dcf64d43 2810 info['__files_to_move'] = files_to_move or {}
0202b52a 2811
56d868db 2812 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2813 info = self.run_pp(pp, info)
2814 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2815 del info['__files_to_move']
56d868db 2816 for pp in self._pps['after_move']:
dcf64d43 2817 info = self.run_pp(pp, info)
23c1a667 2818 return info
c1c9a79c 2819
5db07df6 2820 def _make_archive_id(self, info_dict):
e9fef7ee
S
2821 video_id = info_dict.get('id')
2822 if not video_id:
2823 return
5db07df6
PH
2824 # Future-proof against any change in case
2825 # and backwards compatibility with prior versions
e9fef7ee 2826 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2827 if extractor is None:
1211bb6d
S
2828 url = str_or_none(info_dict.get('url'))
2829 if not url:
2830 return
e9fef7ee
S
2831 # Try to find matching extractor for the URL and take its ie_key
2832 for ie in self._ies:
1211bb6d 2833 if ie.suitable(url):
e9fef7ee
S
2834 extractor = ie.ie_key()
2835 break
2836 else:
2837 return
d0757229 2838 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2839
2840 def in_download_archive(self, info_dict):
2841 fn = self.params.get('download_archive')
2842 if fn is None:
2843 return False
2844
2845 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2846 if not vid_id:
7012b23c 2847 return False # Incomplete video information
5db07df6 2848
a45e8619 2849 return vid_id in self.archive
c1c9a79c
PH
2850
2851 def record_download_archive(self, info_dict):
2852 fn = self.params.get('download_archive')
2853 if fn is None:
2854 return
5db07df6
PH
2855 vid_id = self._make_archive_id(info_dict)
2856 assert vid_id
c1c9a79c 2857 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2858 archive_file.write(vid_id + '\n')
a45e8619 2859 self.archive.add(vid_id)
dd82ffea 2860
8c51aa65 2861 @staticmethod
8abeeb94 2862 def format_resolution(format, default='unknown'):
fb04e403
PH
2863 if format.get('vcodec') == 'none':
2864 return 'audio only'
f49d89ee
PH
2865 if format.get('resolution') is not None:
2866 return format['resolution']
35615307
DA
2867 if format.get('width') and format.get('height'):
2868 res = '%dx%d' % (format['width'], format['height'])
2869 elif format.get('height'):
2870 res = '%sp' % format['height']
2871 elif format.get('width'):
388ae76b 2872 res = '%dx?' % format['width']
8c51aa65 2873 else:
8abeeb94 2874 res = default
8c51aa65
JMF
2875 return res
2876
c57f7757
PH
2877 def _format_note(self, fdict):
2878 res = ''
2879 if fdict.get('ext') in ['f4f', 'f4m']:
2880 res += '(unsupported) '
32f90364
PH
2881 if fdict.get('language'):
2882 if res:
2883 res += ' '
9016d76f 2884 res += '[%s] ' % fdict['language']
c57f7757
PH
2885 if fdict.get('format_note') is not None:
2886 res += fdict['format_note'] + ' '
2887 if fdict.get('tbr') is not None:
2888 res += '%4dk ' % fdict['tbr']
2889 if fdict.get('container') is not None:
2890 if res:
2891 res += ', '
2892 res += '%s container' % fdict['container']
3089bc74
S
2893 if (fdict.get('vcodec') is not None
2894 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2895 if res:
2896 res += ', '
2897 res += fdict['vcodec']
91c7271a 2898 if fdict.get('vbr') is not None:
c57f7757
PH
2899 res += '@'
2900 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2901 res += 'video@'
2902 if fdict.get('vbr') is not None:
2903 res += '%4dk' % fdict['vbr']
fbb21cf5 2904 if fdict.get('fps') is not None:
5d583bdf
S
2905 if res:
2906 res += ', '
2907 res += '%sfps' % fdict['fps']
c57f7757
PH
2908 if fdict.get('acodec') is not None:
2909 if res:
2910 res += ', '
2911 if fdict['acodec'] == 'none':
2912 res += 'video only'
2913 else:
2914 res += '%-5s' % fdict['acodec']
2915 elif fdict.get('abr') is not None:
2916 if res:
2917 res += ', '
2918 res += 'audio'
2919 if fdict.get('abr') is not None:
2920 res += '@%3dk' % fdict['abr']
2921 if fdict.get('asr') is not None:
2922 res += ' (%5dHz)' % fdict['asr']
2923 if fdict.get('filesize') is not None:
2924 if res:
2925 res += ', '
2926 res += format_bytes(fdict['filesize'])
9732d77e
PH
2927 elif fdict.get('filesize_approx') is not None:
2928 if res:
2929 res += ', '
2930 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2931 return res
91c7271a 2932
76d321f6 2933 def _format_note_table(self, f):
2934 def join_fields(*vargs):
2935 return ', '.join((val for val in vargs if val != ''))
2936
2937 return join_fields(
2938 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2939 format_field(f, 'language', '[%s]'),
2940 format_field(f, 'format_note'),
2941 format_field(f, 'container', ignore=(None, f.get('ext'))),
2942 format_field(f, 'asr', '%5dHz'))
2943
c57f7757 2944 def list_formats(self, info_dict):
94badb25 2945 formats = info_dict.get('formats', [info_dict])
53ed7066 2946 new_format = (
2947 'list-formats' not in self.params.get('compat_opts', [])
2948 and self.params.get('list_formats_as_table', True) is not False)
76d321f6 2949 if new_format:
2950 table = [
2951 [
2952 format_field(f, 'format_id'),
2953 format_field(f, 'ext'),
2954 self.format_resolution(f),
2955 format_field(f, 'fps', '%d'),
2956 '|',
2957 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2958 format_field(f, 'tbr', '%4dk'),
52a8a1e1 2959 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 2960 '|',
2961 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2962 format_field(f, 'vbr', '%4dk'),
2963 format_field(f, 'acodec', default='unknown').replace('none', ''),
2964 format_field(f, 'abr', '%3dk'),
2965 format_field(f, 'asr', '%5dHz'),
2966 self._format_note_table(f)]
2967 for f in formats
2968 if f.get('preference') is None or f['preference'] >= -1000]
2969 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2970 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2971 else:
2972 table = [
2973 [
2974 format_field(f, 'format_id'),
2975 format_field(f, 'ext'),
2976 self.format_resolution(f),
2977 self._format_note(f)]
2978 for f in formats
2979 if f.get('preference') is None or f['preference'] >= -1000]
2980 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2981
cfb56d1a 2982 self.to_screen(
76d321f6 2983 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2984 header_line,
2985 table,
2986 delim=new_format,
2987 extraGap=(0 if new_format else 1),
2988 hideEmpty=new_format)))
cfb56d1a
PH
2989
2990 def list_thumbnails(self, info_dict):
2991 thumbnails = info_dict.get('thumbnails')
2992 if not thumbnails:
b7b72db9 2993 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2994 return
cfb56d1a
PH
2995
2996 self.to_screen(
2997 '[info] Thumbnails for %s:' % info_dict['id'])
2998 self.to_screen(render_table(
2999 ['ID', 'width', 'height', 'URL'],
3000 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3001
360e1ca5 3002 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3003 if not subtitles:
360e1ca5 3004 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3005 return
a504ced0 3006 self.to_screen(
edab9dbf 3007 'Available %s for %s:' % (name, video_id))
2412044c 3008
3009 def _row(lang, formats):
3010 exts, names = zip(*((f['ext'], f['name']) for f in reversed(formats)))
3011 if len(set(names)) == 1:
3012 names = names[:1]
3013 return [lang, ', '.join(names), ', '.join(exts)]
3014
edab9dbf 3015 self.to_screen(render_table(
2412044c 3016 ['Language', 'Name', 'Formats'],
3017 [_row(lang, formats) for lang, formats in subtitles.items()],
3018 hideEmpty=True))
a504ced0 3019
dca08720
PH
3020 def urlopen(self, req):
3021 """ Start an HTTP download """
82d8a8b6 3022 if isinstance(req, compat_basestring):
67dda517 3023 req = sanitized_Request(req)
19a41fc6 3024 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3025
3026 def print_debug_header(self):
3027 if not self.params.get('verbose'):
3028 return
62fec3b2 3029
4192b51c 3030 if type('') is not compat_str:
067aa17e 3031 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3032 self.report_warning(
3033 'Your Python is broken! Update to a newer and supported version')
3034
c6afed48
PH
3035 stdout_encoding = getattr(
3036 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3037 encoding_str = (
734f90bb
PH
3038 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3039 locale.getpreferredencoding(),
3040 sys.getfilesystemencoding(),
c6afed48 3041 stdout_encoding,
b0472057 3042 self.get_encoding()))
4192b51c 3043 write_string(encoding_str, encoding=None)
734f90bb 3044
e5813e53 3045 source = (
3046 '(exe)' if hasattr(sys, 'frozen')
3047 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3048 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3049 else '')
3050 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3051 if _LAZY_LOADER:
f74980cb 3052 self._write_string('[debug] Lazy loading extractors enabled\n')
3053 if _PLUGIN_CLASSES:
3054 self._write_string(
3055 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3056 if self.params.get('compat_opts'):
3057 self._write_string(
3058 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3059 try:
3060 sp = subprocess.Popen(
3061 ['git', 'rev-parse', '--short', 'HEAD'],
3062 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3063 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3064 out, err = process_communicate_or_kill(sp)
dca08720
PH
3065 out = out.decode().strip()
3066 if re.match('[0-9a-f]+', out):
f74980cb 3067 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3068 except Exception:
dca08720
PH
3069 try:
3070 sys.exc_clear()
70a1165b 3071 except Exception:
dca08720 3072 pass
b300cda4
S
3073
3074 def python_implementation():
3075 impl_name = platform.python_implementation()
3076 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3077 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3078 return impl_name
3079
e5813e53 3080 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3081 platform.python_version(),
3082 python_implementation(),
3083 platform.architecture()[0],
b300cda4 3084 platform_name()))
d28b5171 3085
73fac4e9 3086 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3087 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3088 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3089 exe_str = ', '.join(
3090 '%s %s' % (exe, v)
3091 for exe, v in sorted(exe_versions.items())
3092 if v
3093 )
3094 if not exe_str:
3095 exe_str = 'none'
3096 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3097
3098 proxy_map = {}
3099 for handler in self._opener.handlers:
3100 if hasattr(handler, 'proxies'):
3101 proxy_map.update(handler.proxies)
734f90bb 3102 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3103
58b1f00d
PH
3104 if self.params.get('call_home', False):
3105 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3106 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3107 return
58b1f00d
PH
3108 latest_version = self.urlopen(
3109 'https://yt-dl.org/latest/version').read().decode('utf-8')
3110 if version_tuple(latest_version) > version_tuple(__version__):
3111 self.report_warning(
3112 'You are using an outdated version (newest version: %s)! '
3113 'See https://yt-dl.org/update if you need help updating.' %
3114 latest_version)
3115
e344693b 3116 def _setup_opener(self):
6ad14cab 3117 timeout_val = self.params.get('socket_timeout')
19a41fc6 3118 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3119
dca08720
PH
3120 opts_cookiefile = self.params.get('cookiefile')
3121 opts_proxy = self.params.get('proxy')
3122
3123 if opts_cookiefile is None:
3124 self.cookiejar = compat_cookiejar.CookieJar()
3125 else:
590bc6f6 3126 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3127 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3128 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3129 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3130
6a3f4c3f 3131 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3132 if opts_proxy is not None:
3133 if opts_proxy == '':
3134 proxies = {}
3135 else:
3136 proxies = {'http': opts_proxy, 'https': opts_proxy}
3137 else:
3138 proxies = compat_urllib_request.getproxies()
067aa17e 3139 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3140 if 'http' in proxies and 'https' not in proxies:
3141 proxies['https'] = proxies['http']
91410c9b 3142 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3143
3144 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3145 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3146 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3147 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3148 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3149
3150 # When passing our own FileHandler instance, build_opener won't add the
3151 # default FileHandler and allows us to disable the file protocol, which
3152 # can be used for malicious purposes (see
067aa17e 3153 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3154 file_handler = compat_urllib_request.FileHandler()
3155
3156 def file_open(*args, **kwargs):
7a5c1cfe 3157 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3158 file_handler.file_open = file_open
3159
3160 opener = compat_urllib_request.build_opener(
fca6dba8 3161 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3162
dca08720
PH
3163 # Delete the default user-agent header, which would otherwise apply in
3164 # cases where our custom HTTP handler doesn't come into play
067aa17e 3165 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3166 opener.addheaders = []
3167 self._opener = opener
62fec3b2
PH
3168
3169 def encode(self, s):
3170 if isinstance(s, bytes):
3171 return s # Already encoded
3172
3173 try:
3174 return s.encode(self.get_encoding())
3175 except UnicodeEncodeError as err:
3176 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3177 raise
3178
3179 def get_encoding(self):
3180 encoding = self.params.get('encoding')
3181 if encoding is None:
3182 encoding = preferredencoding()
3183 return encoding
ec82d85a 3184
de6000d9 3185 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3186 write_all = self.params.get('write_all_thumbnails', False)
3187 thumbnails = []
3188 if write_all or self.params.get('writethumbnail', False):
0202b52a 3189 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3190 multiple = write_all and len(thumbnails) > 1
ec82d85a 3191
0202b52a 3192 ret = []
6c4fd172 3193 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3194 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3195 suffix = '%s.' % t['id'] if multiple else ''
3196 thumb_display_id = '%s ' % t['id'] if multiple else ''
dcf64d43 3197 t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3198
0c3d0f51 3199 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3200 ret.append(suffix + thumb_ext)
ec82d85a
PH
3201 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3202 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3203 else:
5ef7d9bd 3204 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3205 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3206 try:
3207 uf = self.urlopen(t['url'])
d3d89c32 3208 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3209 shutil.copyfileobj(uf, thumbf)
de6000d9 3210 ret.append(suffix + thumb_ext)
ec82d85a
PH
3211 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3212 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3158150c 3213 except network_exceptions as err:
ec82d85a 3214 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3215 (t['url'], error_to_compat_str(err)))
6c4fd172 3216 if ret and not write_all:
3217 break
0202b52a 3218 return ret