]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Add experimental option `--check-formats` to test the URLs before format selection
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import sys
23import time
67134eab 24import tokenize
8222d8de 25import traceback
75822ca7 26import random
8222d8de 27
961ea474 28from string import ascii_letters
e5813e53 29from zipimport import zipimporter
961ea474 30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
de6000d9 51 OUTTMPL_TYPES,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
9b9c5355 60 error_to_compat_str,
498f5606 61 EntryNotInPlaylist,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
143db31d 68 FORMAT_RE,
525ef922 69 formatSeconds,
773f291d 70 GeoRestrictedError,
c9969434 71 int_or_none,
732044af 72 iri_to_uri,
773f291d 73 ISO3166Utils,
ce02ed60 74 locked_file,
0202b52a 75 make_dir,
dca08720 76 make_HTTPS_handler,
ce02ed60 77 MaxDownloadsReached,
3158150c 78 network_exceptions,
cd6fc19e 79 orderedSet,
b7ab0590 80 PagedList,
083c9df9 81 parse_filesize,
91410c9b 82 PerRequestProxyHandler,
dca08720 83 platform_name,
eedb7ba5 84 PostProcessingError,
ce02ed60 85 preferredencoding,
eedb7ba5 86 prepend_extension,
e8e73840 87 random_uuidv4,
51fb4995 88 register_socks_protocols,
cfb56d1a 89 render_table,
eedb7ba5 90 replace_extension,
8b0d7497 91 RejectedVideoReached,
ce02ed60
PH
92 SameFileError,
93 sanitize_filename,
1bb5c511 94 sanitize_path,
dcf77cf1 95 sanitize_url,
67dda517 96 sanitized_Request,
e5660ee6 97 std_headers,
1211bb6d 98 str_or_none,
e29663c6 99 strftime_or_none,
ce02ed60 100 subtitles_filename,
732044af 101 to_high_limit_path,
a439a3a4 102 traverse_dict,
ce02ed60 103 UnavailableVideoError,
29eb5174 104 url_basename,
58b1f00d 105 version_tuple,
ce02ed60
PH
106 write_json_file,
107 write_string,
1bab3437 108 YoutubeDLCookieJar,
6a3f4c3f 109 YoutubeDLCookieProcessor,
dca08720 110 YoutubeDLHandler,
fca6dba8 111 YoutubeDLRedirectHandler,
f5b1bca9 112 process_communicate_or_kill,
ce02ed60 113)
a0e07d31 114from .cache import Cache
52a8a1e1 115from .extractor import (
116 gen_extractor_classes,
117 get_info_extractor,
118 _LAZY_LOADER,
119 _PLUGIN_CLASSES
120)
4c54b89e 121from .extractor.openload import PhantomJSwrapper
52a8a1e1 122from .downloader import (
123 get_suitable_downloader,
124 shorten_protocol_name
125)
4c83c967 126from .downloader.rtmp import rtmpdump_version
4f026faf 127from .postprocessor import (
f17f8651 128 FFmpegFixupM3u8PP,
62cd676c 129 FFmpegFixupM4aPP,
6271f1ca 130 FFmpegFixupStretchedPP,
4f026faf
PH
131 FFmpegMergerPP,
132 FFmpegPostProcessor,
0202b52a 133 # FFmpegSubtitlesConvertorPP,
4f026faf 134 get_postprocessor,
0202b52a 135 MoveFilesAfterDownloadPP,
4f026faf 136)
dca08720 137from .version import __version__
8222d8de 138
e9c0cdd3
YCH
139if compat_os_name == 'nt':
140 import ctypes
141
2459b6e1 142
8222d8de
JMF
143class YoutubeDL(object):
144 """YoutubeDL class.
145
146 YoutubeDL objects are the ones responsible of downloading the
147 actual video file and writing it to disk if the user has requested
148 it, among some other tasks. In most cases there should be one per
149 program. As, given a video URL, the downloader doesn't know how to
150 extract all the needed information, task that InfoExtractors do, it
151 has to pass the URL to one of them.
152
153 For this, YoutubeDL objects have a method that allows
154 InfoExtractors to be registered in a given order. When it is passed
155 a URL, the YoutubeDL object handles it to the first InfoExtractor it
156 finds that reports being able to handle it. The InfoExtractor extracts
157 all the information about the video or videos the URL refers to, and
158 YoutubeDL process the extracted information, possibly using a File
159 Downloader to download the video.
160
161 YoutubeDL objects accept a lot of parameters. In order not to saturate
162 the object constructor with arguments, it receives a dictionary of
163 options instead. These options are available through the params
164 attribute for the InfoExtractors to use. The YoutubeDL also
165 registers itself as the downloader in charge for the InfoExtractors
166 that are added to it, so this is a "mutual registration".
167
168 Available options:
169
170 username: Username for authentication purposes.
171 password: Password for authentication purposes.
180940e0 172 videopassword: Password for accessing a video.
1da50aa3
S
173 ap_mso: Adobe Pass multiple-system operator identifier.
174 ap_username: Multiple-system operator account username.
175 ap_password: Multiple-system operator account password.
8222d8de
JMF
176 usenetrc: Use netrc for authentication instead.
177 verbose: Print additional info to stdout.
178 quiet: Do not print messages to stdout.
ad8915b7 179 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
180 forceurl: Force printing final URL.
181 forcetitle: Force printing title.
182 forceid: Force printing ID.
183 forcethumbnail: Force printing thumbnail URL.
184 forcedescription: Force printing description.
185 forcefilename: Force printing final filename.
525ef922 186 forceduration: Force printing duration.
8694c600 187 forcejson: Force printing info_dict as JSON.
63e0be34
PH
188 dump_single_json: Force printing the info_dict of the whole playlist
189 (or video) as a single JSON line.
c25228e5 190 force_write_download_archive: Force writing download archive regardless
191 of 'skip_download' or 'simulate'.
8222d8de 192 simulate: Do not download the video files.
eb8a4433 193 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 194 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 195 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
196 extracting metadata even if the video is not actually
197 available for download (experimental)
c25228e5 198 format_sort: How to sort the video formats. see "Sorting Formats"
199 for more details.
200 format_sort_force: Force the given format_sort. see "Sorting Formats"
201 for more details.
202 allow_multiple_video_streams: Allow multiple video streams to be merged
203 into a single file
204 allow_multiple_audio_streams: Allow multiple audio streams to be merged
205 into a single file
4524baf0 206 paths: Dictionary of output paths. The allowed keys are 'home'
207 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 208 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 209 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
210 A string a also accepted for backward compatibility
a820dc72
RA
211 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
212 restrictfilenames: Do not allow "&" and spaces in file names
213 trim_file_name: Limit length of filename (extension excluded)
4524baf0 214 windowsfilenames: Force the filenames to be windows compatible
a820dc72 215 ignoreerrors: Do not stop on download errors
7a5c1cfe 216 (Default True when running yt-dlp,
a820dc72 217 but False when directly accessing YoutubeDL class)
26e2805c 218 skip_playlist_after_errors: Number of allowed failures until the rest of
219 the playlist is skipped
d22dec74 220 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 221 overwrites: Overwrite all video and metadata files if True,
222 overwrite only non-video files if None
223 and don't overwrite any file if False
8222d8de
JMF
224 playliststart: Playlist item to start at.
225 playlistend: Playlist item to end at.
c14e88f0 226 playlist_items: Specific indices of playlist to download.
ff815fe6 227 playlistreverse: Download playlist items in reverse order.
75822ca7 228 playlistrandom: Download playlist items in random order.
8222d8de
JMF
229 matchtitle: Download only matching titles.
230 rejecttitle: Reject downloads for matching titles.
8bf9319e 231 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
232 logtostderr: Log messages to stderr instead of stdout.
233 writedescription: Write the video description to a .description file
234 writeinfojson: Write the video description to a .info.json file
75d43ca0 235 clean_infojson: Remove private fields from the infojson
06167fbb 236 writecomments: Extract video comments. This will not be written to disk
237 unless writeinfojson is also given
1fb07d10 238 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 239 writethumbnail: Write the thumbnail image to a file
c25228e5 240 allow_playlist_files: Whether to write playlists' description, infojson etc
241 also to disk when using the 'write*' options
ec82d85a 242 write_all_thumbnails: Write all thumbnail formats to files
732044af 243 writelink: Write an internet shortcut file, depending on the
244 current platform (.url/.webloc/.desktop)
245 writeurllink: Write a Windows internet shortcut file (.url)
246 writewebloclink: Write a macOS internet shortcut file (.webloc)
247 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 248 writesubtitles: Write the video subtitles to a file
741dd8ea 249 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 250 allsubtitles: Deprecated - Use subtitlelangs = ['all']
251 Downloads all the subtitles of the video
0b7f3118 252 (requires writesubtitles or writeautomaticsub)
8222d8de 253 listsubtitles: Lists all available subtitles for the video
a504ced0 254 subtitlesformat: The format code for subtitles
c32b0aab 255 subtitleslangs: List of languages of the subtitles to download (can be regex).
256 The list may contain "all" to refer to all the available
257 subtitles. The language can be prefixed with a "-" to
258 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
259 keepvideo: Keep the video file after post-processing
260 daterange: A DateRange object, download only if the upload_date is in the range.
261 skip_download: Skip the actual download of the video file
c35f9e72 262 cachedir: Location of the cache files in the filesystem.
a0e07d31 263 False to disable filesystem cache.
47192f92 264 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
265 age_limit: An integer representing the user's age in years.
266 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
267 min_views: An integer representing the minimum view count the video
268 must have in order to not be skipped.
269 Videos without view count information are always
270 downloaded. None for no limit.
271 max_views: An integer representing the maximum view count.
272 Videos that are more popular than that are not
273 downloaded.
274 Videos without view count information are always
275 downloaded. None for no limit.
276 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
277 Videos already present in the file are not downloaded
278 again.
8a51f564 279 break_on_existing: Stop the download process after attempting to download a
280 file that is in the archive.
281 break_on_reject: Stop the download process when encountering a video that
282 has been filtered out.
283 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 284 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
285 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
286 At the moment, this is only supported by YouTube.
a1ee09e8 287 proxy: URL of the proxy server to use
38cce791 288 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 289 on geo-restricted sites.
e344693b 290 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
291 bidi_workaround: Work around buggy terminals without bidirectional text
292 support, using fridibi
a0ddb8a2 293 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 294 include_ads: Download ads as well
04b4d394
PH
295 default_search: Prepend this string if an input url is not valid.
296 'auto' for elaborate guessing
62fec3b2 297 encoding: Use this encoding instead of the system-specified.
e8ee972c 298 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
299 Pass in 'in_playlist' to only show this behavior for
300 playlist items.
4f026faf 301 postprocessors: A list of dictionaries, each with an entry
71b640cc 302 * key: The name of the postprocessor. See
7a5c1cfe 303 yt_dlp/postprocessor/__init__.py for a list.
56d868db 304 * when: When to run the postprocessor. Can be one of
305 pre_process|before_dl|post_process|after_move.
306 Assumed to be 'post_process' if not given
ab8e5e51
AM
307 post_hooks: A list of functions that get called as the final step
308 for each video file, after all postprocessors have been
309 called. The filename will be passed as the only argument.
71b640cc
PH
310 progress_hooks: A list of functions that get called on download
311 progress, with a dictionary with the entries
5cda4eda 312 * status: One of "downloading", "error", or "finished".
ee69b99a 313 Check this first and ignore unknown values.
71b640cc 314
5cda4eda 315 If status is one of "downloading", or "finished", the
ee69b99a
PH
316 following properties may also be present:
317 * filename: The final filename (always present)
5cda4eda 318 * tmpfilename: The filename we're currently writing to
71b640cc
PH
319 * downloaded_bytes: Bytes on disk
320 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
321 * total_bytes_estimate: Guess of the eventual file size,
322 None if unavailable.
323 * elapsed: The number of seconds since download started.
71b640cc
PH
324 * eta: The estimated time in seconds, None if unknown
325 * speed: The download speed in bytes/second, None if
326 unknown
5cda4eda
PH
327 * fragment_index: The counter of the currently
328 downloaded video fragment.
329 * fragment_count: The number of fragments (= individual
330 files that will be merged)
71b640cc
PH
331
332 Progress hooks are guaranteed to be called at least once
333 (with status "finished") if the download is successful.
45598f15 334 merge_output_format: Extension to use when merging formats.
6b591b29 335 final_ext: Expected final extension; used to detect when the file was
336 already downloaded and converted. "merge_output_format" is
337 replaced by this extension when given
6271f1ca
PH
338 fixup: Automatically correct known faults of the file.
339 One of:
340 - "never": do nothing
341 - "warn": only emit a warning
342 - "detect_or_warn": check whether we can do anything
62cd676c 343 about it, warn otherwise (default)
504f20dd 344 source_address: Client-side IP address to bind to.
6ec6cb4e 345 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 346 yt-dlp servers for debugging. (BROKEN)
1cf376f5 347 sleep_interval_requests: Number of seconds to sleep between requests
348 during extraction
7aa589a5
S
349 sleep_interval: Number of seconds to sleep before each download when
350 used alone or a lower bound of a range for randomized
351 sleep before each download (minimum possible number
352 of seconds to sleep) when used along with
353 max_sleep_interval.
354 max_sleep_interval:Upper bound of a range for randomized sleep before each
355 download (maximum possible number of seconds to sleep).
356 Must only be used along with sleep_interval.
357 Actual sleep time will be a random float from range
358 [sleep_interval; max_sleep_interval].
1cf376f5 359 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
360 listformats: Print an overview of available video formats and exit.
361 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
362 match_filter: A function that gets called with the info_dict of
363 every video.
364 If it returns a message, the video is ignored.
365 If it returns None, the video is downloaded.
366 match_filter_func in utils.py is one example for this.
7e5db8c9 367 no_color: Do not emit color codes in output.
0a840f58 368 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 369 HTTP header
0a840f58 370 geo_bypass_country:
773f291d
S
371 Two-letter ISO 3166-2 country code that will be used for
372 explicit geographic restriction bypassing via faking
504f20dd 373 X-Forwarded-For HTTP header
5f95927a
S
374 geo_bypass_ip_block:
375 IP range in CIDR notation that will be used similarly to
504f20dd 376 geo_bypass_country
71b640cc 377
85729c51 378 The following options determine which downloader is picked:
52a8a1e1 379 external_downloader: A dictionary of protocol keys and the executable of the
380 external downloader to use for it. The allowed protocols
381 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
382 Set the value to 'native' to use the native downloader
383 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
384 or {'m3u8': 'ffmpeg'} instead.
385 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
386 if True, otherwise use ffmpeg/avconv if False, otherwise
387 use downloader suggested by extractor if None.
fe7e0c98 388
8222d8de 389 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 390 the downloader (see yt_dlp/downloader/common.py):
8222d8de 391 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 392 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 393 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 394 http_chunk_size.
76b1bd67
JMF
395
396 The following options are used by the post processors:
d4a24f40 397 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 398 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
399 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
400 to the binary or its containing directory.
43820c03 401 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
402 and a list of additional command-line arguments for the
403 postprocessor/executable. The dict can also have "PP+EXE" keys
404 which are used when the given exe is used by the given PP.
405 Use 'default' as the name for arguments to passed to all PP
e409895f 406
407 The following options are used by the extractors:
62bff2c1 408 extractor_retries: Number of times to retry for known errors
409 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 410 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 411 discontinuities such as ad breaks (default: False)
3600fd59 412 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 413 data will be downloaded and processed by extractor.
414 You can reduce network I/O by disabling it if you don't
415 care about DASH. (only for youtube)
e409895f 416 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 417 data will be downloaded and processed by extractor.
418 You can reduce network I/O by disabling it if you don't
419 care about HLS. (only for youtube)
8222d8de
JMF
420 """
421
c9969434
S
422 _NUMERIC_FIELDS = set((
423 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
424 'timestamp', 'upload_year', 'upload_month', 'upload_day',
425 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
426 'average_rating', 'comment_count', 'age_limit',
427 'start_time', 'end_time',
428 'chapter_number', 'season_number', 'episode_number',
429 'track_number', 'disc_number', 'release_year',
430 'playlist_index',
431 ))
432
8222d8de
JMF
433 params = None
434 _ies = []
56d868db 435 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 436 __prepare_filename_warned = False
1cf376f5 437 _first_webpage_request = True
8222d8de
JMF
438 _download_retcode = None
439 _num_downloads = None
30a074c2 440 _playlist_level = 0
441 _playlist_urls = set()
8222d8de
JMF
442 _screen_file = None
443
3511266b 444 def __init__(self, params=None, auto_init=True):
8222d8de 445 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
446 if params is None:
447 params = {}
8222d8de 448 self._ies = []
56c73665 449 self._ies_instances = {}
56d868db 450 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 451 self.__prepare_filename_warned = False
1cf376f5 452 self._first_webpage_request = True
ab8e5e51 453 self._post_hooks = []
933605d7 454 self._progress_hooks = []
8222d8de
JMF
455 self._download_retcode = 0
456 self._num_downloads = 0
457 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 458 self._err_file = sys.stderr
4abf617b
S
459 self.params = {
460 # Default parameters
461 'nocheckcertificate': False,
462 }
463 self.params.update(params)
a0e07d31 464 self.cache = Cache(self)
34308b30 465
be5df5ee
S
466 def check_deprecated(param, option, suggestion):
467 if self.params.get(param) is not None:
468 self.report_warning(
4cd0a709 469 '%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
470 return True
471 return False
472
473 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
474 if self.params.get('geo_verification_proxy') is None:
475 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
476
0d1bb027 477 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
478 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
479 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
480
481 for msg in self.params.get('warnings', []):
482 self.report_warning(msg)
483
6b591b29 484 if self.params.get('final_ext'):
485 if self.params.get('merge_output_format'):
486 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
487 self.params['merge_output_format'] = self.params['final_ext']
488
b9d973be 489 if 'overwrites' in self.params and self.params['overwrites'] is None:
490 del self.params['overwrites']
491
0783b09b 492 if params.get('bidi_workaround', False):
1c088fa8
PH
493 try:
494 import pty
495 master, slave = pty.openpty()
003c69a8 496 width = compat_get_terminal_size().columns
1c088fa8
PH
497 if width is None:
498 width_args = []
499 else:
500 width_args = ['-w', str(width)]
5d681e96 501 sp_kwargs = dict(
1c088fa8
PH
502 stdin=subprocess.PIPE,
503 stdout=slave,
504 stderr=self._err_file)
5d681e96
PH
505 try:
506 self._output_process = subprocess.Popen(
507 ['bidiv'] + width_args, **sp_kwargs
508 )
509 except OSError:
5d681e96
PH
510 self._output_process = subprocess.Popen(
511 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
512 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 513 except OSError as ose:
66e7ace1 514 if ose.errno == errno.ENOENT:
6febd1c1 515 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
516 else:
517 raise
0783b09b 518
3089bc74
S
519 if (sys.platform != 'win32'
520 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
521 and not params.get('restrictfilenames', False)):
e9137224 522 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 523 self.report_warning(
6febd1c1 524 'Assuming --restrict-filenames since file system encoding '
1b725173 525 'cannot encode all characters. '
6febd1c1 526 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 527 self.params['restrictfilenames'] = True
34308b30 528
de6000d9 529 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 530
dca08720
PH
531 self._setup_opener()
532
4cd0a709 533 """Preload the archive, if any is specified"""
534 def preload_download_archive(fn):
535 if fn is None:
536 return False
537 if self.params.get('verbose'):
538 self._write_string('[debug] Loading archive file %r\n' % fn)
539 try:
540 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
541 for line in archive_file:
542 self.archive.add(line.strip())
543 except IOError as ioe:
544 if ioe.errno != errno.ENOENT:
545 raise
546 return False
547 return True
548
549 self.archive = set()
550 preload_download_archive(self.params.get('download_archive'))
551
3511266b
PH
552 if auto_init:
553 self.print_debug_header()
554 self.add_default_info_extractors()
555
4f026faf
PH
556 for pp_def_raw in self.params.get('postprocessors', []):
557 pp_class = get_postprocessor(pp_def_raw['key'])
558 pp_def = dict(pp_def_raw)
559 del pp_def['key']
5bfa4862 560 if 'when' in pp_def:
561 when = pp_def['when']
562 del pp_def['when']
563 else:
56d868db 564 when = 'post_process'
4f026faf 565 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 566 self.add_post_processor(pp, when=when)
4f026faf 567
ab8e5e51
AM
568 for ph in self.params.get('post_hooks', []):
569 self.add_post_hook(ph)
570
71b640cc
PH
571 for ph in self.params.get('progress_hooks', []):
572 self.add_progress_hook(ph)
573
51fb4995
YCH
574 register_socks_protocols()
575
7d4111ed
PH
576 def warn_if_short_id(self, argv):
577 # short YouTube ID starting with dash?
578 idxs = [
579 i for i, a in enumerate(argv)
580 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
581 if idxs:
582 correct_argv = (
7a5c1cfe 583 ['yt-dlp']
3089bc74
S
584 + [a for i, a in enumerate(argv) if i not in idxs]
585 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
586 )
587 self.report_warning(
588 'Long argument string detected. '
589 'Use -- to separate parameters and URLs, like this:\n%s\n' %
590 args_to_str(correct_argv))
591
8222d8de
JMF
592 def add_info_extractor(self, ie):
593 """Add an InfoExtractor object to the end of the list."""
594 self._ies.append(ie)
e52d7f85
JMF
595 if not isinstance(ie, type):
596 self._ies_instances[ie.ie_key()] = ie
597 ie.set_downloader(self)
8222d8de 598
56c73665
JMF
599 def get_info_extractor(self, ie_key):
600 """
601 Get an instance of an IE with name ie_key, it will try to get one from
602 the _ies list, if there's no instance it will create a new one and add
603 it to the extractor list.
604 """
605 ie = self._ies_instances.get(ie_key)
606 if ie is None:
607 ie = get_info_extractor(ie_key)()
608 self.add_info_extractor(ie)
609 return ie
610
023fa8c4
JMF
611 def add_default_info_extractors(self):
612 """
613 Add the InfoExtractors returned by gen_extractors to the end of the list
614 """
e52d7f85 615 for ie in gen_extractor_classes():
023fa8c4
JMF
616 self.add_info_extractor(ie)
617
56d868db 618 def add_post_processor(self, pp, when='post_process'):
8222d8de 619 """Add a PostProcessor object to the end of the chain."""
5bfa4862 620 self._pps[when].append(pp)
8222d8de
JMF
621 pp.set_downloader(self)
622
ab8e5e51
AM
623 def add_post_hook(self, ph):
624 """Add the post hook"""
625 self._post_hooks.append(ph)
626
933605d7
JMF
627 def add_progress_hook(self, ph):
628 """Add the progress hook (currently only for the file downloader)"""
629 self._progress_hooks.append(ph)
8ab470f1 630
1c088fa8 631 def _bidi_workaround(self, message):
5d681e96 632 if not hasattr(self, '_output_channel'):
1c088fa8
PH
633 return message
634
5d681e96 635 assert hasattr(self, '_output_process')
11b85ce6 636 assert isinstance(message, compat_str)
6febd1c1
PH
637 line_count = message.count('\n') + 1
638 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 639 self._output_process.stdin.flush()
6febd1c1 640 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 641 for _ in range(line_count))
6febd1c1 642 return res[:-len('\n')]
1c088fa8 643
8222d8de 644 def to_screen(self, message, skip_eol=False):
0783b09b 645 """Print message to stdout if not in quiet mode."""
848887eb 646 return self.to_stdout(
647 message, skip_eol,
648 quiet=self.params.get('quiet', False))
0783b09b 649
734f90bb 650 def _write_string(self, s, out=None):
b58ddb32 651 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 652
848887eb 653 def to_stdout(self, message, skip_eol=False, quiet=False):
8222d8de 654 """Print message to stdout if not in quiet mode."""
8bf9319e 655 if self.params.get('logger'):
43afe285 656 self.params['logger'].debug(message)
848887eb 657 elif not quiet:
1c088fa8 658 message = self._bidi_workaround(message)
6febd1c1 659 terminator = ['\n', ''][skip_eol]
8222d8de 660 output = message + terminator
1c088fa8 661
734f90bb 662 self._write_string(output, self._screen_file)
8222d8de
JMF
663
664 def to_stderr(self, message):
665 """Print message to stderr."""
11b85ce6 666 assert isinstance(message, compat_str)
8bf9319e 667 if self.params.get('logger'):
43afe285
IB
668 self.params['logger'].error(message)
669 else:
1c088fa8 670 message = self._bidi_workaround(message)
6febd1c1 671 output = message + '\n'
734f90bb 672 self._write_string(output, self._err_file)
8222d8de 673
1e5b9a95
PH
674 def to_console_title(self, message):
675 if not self.params.get('consoletitle', False):
676 return
4bede0d8
C
677 if compat_os_name == 'nt':
678 if ctypes.windll.kernel32.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 682 elif 'TERM' in os.environ:
b46696bd 683 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 684
bdde425c
PH
685 def save_console_title(self):
686 if not self.params.get('consoletitle', False):
687 return
94c3442e
S
688 if self.params.get('simulate', False):
689 return
4bede0d8 690 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 691 # Save the title on stack
734f90bb 692 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
693
694 def restore_console_title(self):
695 if not self.params.get('consoletitle', False):
696 return
94c3442e
S
697 if self.params.get('simulate', False):
698 return
4bede0d8 699 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 700 # Restore the title from stack
734f90bb 701 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
702
703 def __enter__(self):
704 self.save_console_title()
705 return self
706
707 def __exit__(self, *args):
708 self.restore_console_title()
f89197d7 709
dca08720 710 if self.params.get('cookiefile') is not None:
1bab3437 711 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 712
8222d8de
JMF
713 def trouble(self, message=None, tb=None):
714 """Determine action to take when a download problem appears.
715
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
719
720 tb, if given, is additional traceback information.
721 """
722 if message is not None:
723 self.to_stderr(message)
724 if self.params.get('verbose'):
725 if tb is None:
726 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 727 tb = ''
8222d8de 728 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 729 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 730 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
731 else:
732 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 733 tb = ''.join(tb_data)
8222d8de
JMF
734 self.to_stderr(tb)
735 if not self.params.get('ignoreerrors', False):
736 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
737 exc_info = sys.exc_info()[1].exc_info
738 else:
739 exc_info = sys.exc_info()
740 raise DownloadError(message, exc_info)
741 self._download_retcode = 1
742
743 def report_warning(self, message):
744 '''
745 Print the message to stderr, it will be prefixed with 'WARNING:'
746 If stderr is a tty file the 'WARNING:' will be colored
747 '''
6d07ce01
JMF
748 if self.params.get('logger') is not None:
749 self.params['logger'].warning(message)
8222d8de 750 else:
ad8915b7
PH
751 if self.params.get('no_warnings'):
752 return
e9c0cdd3 753 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
754 _msg_header = '\033[0;33mWARNING:\033[0m'
755 else:
756 _msg_header = 'WARNING:'
757 warning_message = '%s %s' % (_msg_header, message)
758 self.to_stderr(warning_message)
8222d8de
JMF
759
760 def report_error(self, message, tb=None):
761 '''
762 Do the same as trouble, but prefixes the message with 'ERROR:', colored
763 in red if stderr is a tty file.
764 '''
e9c0cdd3 765 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 766 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 767 else:
6febd1c1
PH
768 _msg_header = 'ERROR:'
769 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
770 self.trouble(error_message, tb)
771
8222d8de
JMF
772 def report_file_already_downloaded(self, file_name):
773 """Report file has already been fully downloaded."""
774 try:
6febd1c1 775 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 776 except UnicodeEncodeError:
6febd1c1 777 self.to_screen('[download] The file has already been downloaded')
8222d8de 778
0c3d0f51 779 def report_file_delete(self, file_name):
780 """Report that existing file will be deleted."""
781 try:
c25228e5 782 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 783 except UnicodeEncodeError:
c25228e5 784 self.to_screen('Deleting existing file')
0c3d0f51 785
de6000d9 786 def parse_outtmpl(self):
787 outtmpl_dict = self.params.get('outtmpl', {})
788 if not isinstance(outtmpl_dict, dict):
789 outtmpl_dict = {'default': outtmpl_dict}
790 outtmpl_dict.update({
791 k: v for k, v in DEFAULT_OUTTMPL.items()
792 if not outtmpl_dict.get(k)})
793 for key, val in outtmpl_dict.items():
794 if isinstance(val, bytes):
795 self.report_warning(
796 'Parameter outtmpl is bytes, but should be a unicode string. '
797 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
798 return outtmpl_dict
799
143db31d 800 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
801 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
802 template_dict = dict(info_dict)
a439a3a4 803 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 804
805 # duration_string
806 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
807 formatSeconds(info_dict['duration'], '-')
808 if info_dict.get('duration', None) is not None
809 else None)
810
811 # epoch
812 template_dict['epoch'] = int(time.time())
813
814 # autonumber
815 autonumber_size = self.params.get('autonumber_size')
816 if autonumber_size is None:
817 autonumber_size = 5
818 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
819
820 # resolution if not defined
821 if template_dict.get('resolution') is None:
822 if template_dict.get('width') and template_dict.get('height'):
823 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
824 elif template_dict.get('height'):
825 template_dict['resolution'] = '%sp' % template_dict['height']
826 elif template_dict.get('width'):
827 template_dict['resolution'] = '%dx?' % template_dict['width']
828
143db31d 829 # For fields playlist_index and autonumber convert all occurrences
830 # of %(field)s to %(field)0Nd for backward compatibility
831 field_size_compat_map = {
f59ae581 832 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
143db31d 833 'autonumber': autonumber_size,
834 }
835 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
836 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
837 if mobj:
838 outtmpl = re.sub(
839 FIELD_SIZE_COMPAT_RE,
840 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
841 outtmpl)
842
843 numeric_fields = list(self._NUMERIC_FIELDS)
a439a3a4 844 if sanitize is None:
845 sanitize = lambda k, v: v
143db31d 846
e625be0d 847 EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
848 # Field is of the form key1.key2...
849 # where keys (except first) can be string, int or slice
850 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
851 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
852 (?P<negate>-)?
853 (?P<fields>{0})
854 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
855 (?:>(?P<strf_format>.+?))?
856 (?:\|(?P<default>.*?))?
857 $'''.format(FIELD_RE))
858 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
859 MATH_FUNCTIONS = {
860 '+': float.__add__,
861 '-': float.__sub__,
862 }
863 for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
864 final_key = outer_mobj.group('key')
865 str_type = outer_mobj.group('type')
866 value = None
867 mobj = re.match(INTERNAL_FORMAT_RE, final_key)
868 if mobj is not None:
869 mobj = mobj.groupdict()
870 # Object traversal
871 fields = mobj['fields'].split('.')
872 value = traverse_dict(template_dict, fields)
873 # Negative
874 if mobj['negate']:
875 value = float_or_none(value)
876 if value is not None:
877 value *= -1
878 # Do maths
879 if mobj['maths']:
880 value = float_or_none(value)
881 operator = None
882 for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
883 if item == '':
884 value = None
885 if value is None:
886 break
887 if operator:
888 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
889 offset = float_or_none(item)
890 if offset is None:
891 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
892 try:
893 value = operator(value, multiplier * offset)
894 except (TypeError, ZeroDivisionError):
895 value = None
896 operator = None
897 else:
898 operator = MATH_FUNCTIONS[item]
899 # Datetime formatting
900 if mobj['strf_format']:
901 value = strftime_or_none(value, mobj['strf_format'])
902 # Set default
903 if value is None and mobj['default'] is not None:
904 value = mobj['default']
905 # Sanitize
906 if str_type in 'crs' and value is not None: # string
907 value = sanitize('%{}'.format(str_type) % fields[-1], value)
a439a3a4 908 else: # numeric
909 numeric_fields.append(final_key)
910 value = float_or_none(value)
143db31d 911 if value is not None:
a439a3a4 912 template_dict[final_key] = value
143db31d 913
914 # Missing numeric fields used together with integer presentation types
915 # in format specification will break the argument substitution since
916 # string NA placeholder is returned for missing fields. We will patch
917 # output template for missing fields to meet string presentation type.
918 for numeric_field in numeric_fields:
a439a3a4 919 if template_dict.get(numeric_field) is None:
143db31d 920 outtmpl = re.sub(
921 FORMAT_RE.format(re.escape(numeric_field)),
922 r'%({0})s'.format(numeric_field), outtmpl)
923
a439a3a4 924 template_dict = collections.defaultdict(lambda: na, (
925 (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
926 for k, v in template_dict.items() if v is not None))
143db31d 927 return outtmpl, template_dict
928
de6000d9 929 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 930 try:
586a91b6 931 sanitize = lambda k, v: sanitize_filename(
45598aab 932 compat_str(v),
1bb5c511 933 restricted=self.params.get('restrictfilenames'),
40df485f 934 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 935 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 936 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 937
15da37c7
S
938 # expand_path translates '%%' into '%' and '$$' into '$'
939 # correspondingly that is not what we want since we need to keep
940 # '%%' intact for template dict substitution step. Working around
941 # with boundary-alike separator hack.
961ea474 942 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
943 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
944
945 # outtmpl should be expand_path'ed before template dict substitution
946 # because meta fields may contain env variables we don't want to
947 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
948 # title "Hello $PATH", we don't want `$PATH` to be expanded.
949 filename = expand_path(outtmpl).replace(sep, '') % template_dict
950
143db31d 951 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 952 if force_ext is not None:
953 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
954
bdc3fd2f
U
955 # https://github.com/blackjack4494/youtube-dlc/issues/85
956 trim_file_name = self.params.get('trim_file_name', False)
957 if trim_file_name:
958 fn_groups = filename.rsplit('.')
959 ext = fn_groups[-1]
960 sub_ext = ''
961 if len(fn_groups) > 2:
962 sub_ext = fn_groups[-2]
963 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
964
0202b52a 965 return filename
8222d8de 966 except ValueError as err:
6febd1c1 967 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
968 return None
969
de6000d9 970 def prepare_filename(self, info_dict, dir_type='', warn=False):
971 """Generate the output filename."""
0202b52a 972 paths = self.params.get('paths', {})
973 assert isinstance(paths, dict)
de6000d9 974 filename = self._prepare_filename(info_dict, dir_type or 'default')
975
976 if warn and not self.__prepare_filename_warned:
977 if not paths:
978 pass
979 elif filename == '-':
980 self.report_warning('--paths is ignored when an outputting to stdout')
981 elif os.path.isabs(filename):
982 self.report_warning('--paths is ignored since an absolute path is given in output template')
983 self.__prepare_filename_warned = True
984 if filename == '-' or not filename:
985 return filename
986
0202b52a 987 homepath = expand_path(paths.get('home', '').strip())
988 assert isinstance(homepath, compat_str)
989 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
990 assert isinstance(subdir, compat_str)
c2934512 991 path = os.path.join(homepath, subdir, filename)
992
993 # Temporary fix for #4787
994 # 'Treat' all problem characters by passing filename through preferredencoding
995 # to workaround encoding issues with subprocess on python2 @ Windows
996 if sys.version_info < (3, 0) and sys.platform == 'win32':
997 path = encodeFilename(path, True).decode(preferredencoding())
998 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 999
442c37b7 1000 def _match_entry(self, info_dict, incomplete):
ecdec191 1001 """ Returns None if the file should be downloaded """
8222d8de 1002
8b0d7497 1003 def check_filter():
1004 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1005 if 'title' in info_dict:
1006 # This can happen when we're just evaluating the playlist
1007 title = info_dict['title']
1008 matchtitle = self.params.get('matchtitle', False)
1009 if matchtitle:
1010 if not re.search(matchtitle, title, re.IGNORECASE):
1011 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1012 rejecttitle = self.params.get('rejecttitle', False)
1013 if rejecttitle:
1014 if re.search(rejecttitle, title, re.IGNORECASE):
1015 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1016 date = info_dict.get('upload_date')
1017 if date is not None:
1018 dateRange = self.params.get('daterange', DateRange())
1019 if date not in dateRange:
1020 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1021 view_count = info_dict.get('view_count')
1022 if view_count is not None:
1023 min_views = self.params.get('min_views')
1024 if min_views is not None and view_count < min_views:
1025 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1026 max_views = self.params.get('max_views')
1027 if max_views is not None and view_count > max_views:
1028 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1029 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1030 return 'Skipping "%s" because it is age restricted' % video_title
1031 if self.in_download_archive(info_dict):
1032 return '%s has already been recorded in archive' % video_title
1033
1034 if not incomplete:
1035 match_filter = self.params.get('match_filter')
1036 if match_filter is not None:
1037 ret = match_filter(info_dict)
1038 if ret is not None:
1039 return ret
1040 return None
1041
1042 reason = check_filter()
1043 if reason is not None:
1044 self.to_screen('[download] ' + reason)
d83cb531 1045 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 1046 raise ExistingVideoReached()
d83cb531 1047 elif self.params.get('break_on_reject', False):
8b0d7497 1048 raise RejectedVideoReached()
1049 return reason
fe7e0c98 1050
b6c45014
JMF
1051 @staticmethod
1052 def add_extra_info(info_dict, extra_info):
1053 '''Set the keys from extra_info in info dict if they are missing'''
1054 for key, value in extra_info.items():
1055 info_dict.setdefault(key, value)
1056
58f197b7 1057 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1058 process=True, force_generic_extractor=False):
8222d8de
JMF
1059 '''
1060 Returns a list with a dictionary for each video we find.
1061 If 'download', also downloads the videos.
1062 extra_info is a dict containing the extra values to add to each result
613b2d9d 1063 '''
fe7e0c98 1064
61aa5ba3 1065 if not ie_key and force_generic_extractor:
d22dec74
S
1066 ie_key = 'Generic'
1067
8222d8de 1068 if ie_key:
56c73665 1069 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1070 else:
1071 ies = self._ies
1072
1073 for ie in ies:
1074 if not ie.suitable(url):
1075 continue
1076
9a68de12 1077 ie_key = ie.ie_key()
1078 ie = self.get_info_extractor(ie_key)
8222d8de 1079 if not ie.working():
6febd1c1
PH
1080 self.report_warning('The program functionality for this site has been marked as broken, '
1081 'and will probably not work.')
8222d8de
JMF
1082
1083 try:
d0757229 1084 temp_id = str_or_none(
63be1aab 1085 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1086 else ie._match_id(url))
a0566bbf 1087 except (AssertionError, IndexError, AttributeError):
1088 temp_id = None
1089 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1090 self.to_screen("[%s] %s: has already been recorded in archive" % (
1091 ie_key, temp_id))
1092 break
58f197b7 1093 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1094 else:
1095 self.report_error('no suitable InfoExtractor for URL %s' % url)
1096
1097 def __handle_extraction_exceptions(func):
1098 def wrapper(self, *args, **kwargs):
1099 try:
1100 return func(self, *args, **kwargs)
773f291d
S
1101 except GeoRestrictedError as e:
1102 msg = e.msg
1103 if e.countries:
1104 msg += '\nThis video is available in %s.' % ', '.join(
1105 map(ISO3166Utils.short2full, e.countries))
1106 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1107 self.report_error(msg)
fb043a6e 1108 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1109 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1110 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1111 raise
8222d8de
JMF
1112 except Exception as e:
1113 if self.params.get('ignoreerrors', False):
9b9c5355 1114 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1115 else:
1116 raise
a0566bbf 1117 return wrapper
1118
1119 @__handle_extraction_exceptions
58f197b7 1120 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1121 ie_result = ie.extract(url)
1122 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1123 return
1124 if isinstance(ie_result, list):
1125 # Backwards compatibility: old IE result format
1126 ie_result = {
1127 '_type': 'compat_list',
1128 'entries': ie_result,
1129 }
a0566bbf 1130 self.add_default_extra_info(ie_result, ie, url)
1131 if process:
1132 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1133 else:
a0566bbf 1134 return ie_result
fe7e0c98 1135
ea38e55f
PH
1136 def add_default_extra_info(self, ie_result, ie, url):
1137 self.add_extra_info(ie_result, {
1138 'extractor': ie.IE_NAME,
1139 'webpage_url': url,
1140 'webpage_url_basename': url_basename(url),
1141 'extractor_key': ie.ie_key(),
1142 })
1143
8222d8de
JMF
1144 def process_ie_result(self, ie_result, download=True, extra_info={}):
1145 """
1146 Take the result of the ie(may be modified) and resolve all unresolved
1147 references (URLs, playlist items).
1148
1149 It will also download the videos if 'download'.
1150 Returns the resolved ie_result.
1151 """
e8ee972c
PH
1152 result_type = ie_result.get('_type', 'video')
1153
057a5206 1154 if result_type in ('url', 'url_transparent'):
134c6ea8 1155 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1156 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1157 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1158 or extract_flat is True):
de6000d9 1159 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1160 return ie_result
1161
8222d8de 1162 if result_type == 'video':
b6c45014 1163 self.add_extra_info(ie_result, extra_info)
feee2ecf 1164 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1165 elif result_type == 'url':
1166 # We have to add extra_info to the results because it may be
1167 # contained in a playlist
1168 return self.extract_info(ie_result['url'],
58f197b7 1169 download,
8222d8de
JMF
1170 ie_key=ie_result.get('ie_key'),
1171 extra_info=extra_info)
7fc3fa05
PH
1172 elif result_type == 'url_transparent':
1173 # Use the information from the embedding page
1174 info = self.extract_info(
1175 ie_result['url'], ie_key=ie_result.get('ie_key'),
1176 extra_info=extra_info, download=False, process=False)
1177
1640eb09
S
1178 # extract_info may return None when ignoreerrors is enabled and
1179 # extraction failed with an error, don't crash and return early
1180 # in this case
1181 if not info:
1182 return info
1183
412c617d
PH
1184 force_properties = dict(
1185 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1186 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1187 if f in force_properties:
1188 del force_properties[f]
1189 new_result = info.copy()
1190 new_result.update(force_properties)
7fc3fa05 1191
0563f7ac
S
1192 # Extracted info may not be a video result (i.e.
1193 # info.get('_type', 'video') != video) but rather an url or
1194 # url_transparent. In such cases outer metadata (from ie_result)
1195 # should be propagated to inner one (info). For this to happen
1196 # _type of info should be overridden with url_transparent. This
067aa17e 1197 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1198 if new_result.get('_type') == 'url':
1199 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1200
1201 return self.process_ie_result(
1202 new_result, download=download, extra_info=extra_info)
40fcba5e 1203 elif result_type in ('playlist', 'multi_video'):
30a074c2 1204 # Protect from infinite recursion due to recursively nested playlists
1205 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1206 webpage_url = ie_result['webpage_url']
1207 if webpage_url in self._playlist_urls:
7e85e872 1208 self.to_screen(
30a074c2 1209 '[download] Skipping already downloaded playlist: %s'
1210 % ie_result.get('title') or ie_result.get('id'))
1211 return
7e85e872 1212
30a074c2 1213 self._playlist_level += 1
1214 self._playlist_urls.add(webpage_url)
1215 try:
1216 return self.__process_playlist(ie_result, download)
1217 finally:
1218 self._playlist_level -= 1
1219 if not self._playlist_level:
1220 self._playlist_urls.clear()
8222d8de 1221 elif result_type == 'compat_list':
c9bf4114
PH
1222 self.report_warning(
1223 'Extractor %s returned a compat_list result. '
1224 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1225
8222d8de 1226 def _fixup(r):
9e1a5b84
JW
1227 self.add_extra_info(
1228 r,
9103bbc5
JMF
1229 {
1230 'extractor': ie_result['extractor'],
1231 'webpage_url': ie_result['webpage_url'],
29eb5174 1232 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1233 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1234 }
1235 )
8222d8de
JMF
1236 return r
1237 ie_result['entries'] = [
b6c45014 1238 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1239 for r in ie_result['entries']
1240 ]
1241 return ie_result
1242 else:
1243 raise Exception('Invalid result type: %s' % result_type)
1244
e92caff5 1245 def _ensure_dir_exists(self, path):
1246 return make_dir(path, self.report_error)
1247
30a074c2 1248 def __process_playlist(self, ie_result, download):
1249 # We process each entry in the playlist
1250 playlist = ie_result.get('title') or ie_result.get('id')
1251 self.to_screen('[download] Downloading playlist: %s' % playlist)
1252
498f5606 1253 if 'entries' not in ie_result:
1254 raise EntryNotInPlaylist()
1255 incomplete_entries = bool(ie_result.get('requested_entries'))
1256 if incomplete_entries:
1257 def fill_missing_entries(entries, indexes):
1258 ret = [None] * max(*indexes)
1259 for i, entry in zip(indexes, entries):
1260 ret[i - 1] = entry
1261 return ret
1262 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1263
30a074c2 1264 playlist_results = []
1265
1266 playliststart = self.params.get('playliststart', 1) - 1
1267 playlistend = self.params.get('playlistend')
1268 # For backwards compatibility, interpret -1 as whole list
1269 if playlistend == -1:
1270 playlistend = None
1271
1272 playlistitems_str = self.params.get('playlist_items')
1273 playlistitems = None
1274 if playlistitems_str is not None:
1275 def iter_playlistitems(format):
1276 for string_segment in format.split(','):
1277 if '-' in string_segment:
1278 start, end = string_segment.split('-')
1279 for item in range(int(start), int(end) + 1):
1280 yield int(item)
1281 else:
1282 yield int(string_segment)
1283 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1284
1285 ie_entries = ie_result['entries']
1286
1287 def make_playlistitems_entries(list_ie_entries):
1288 num_entries = len(list_ie_entries)
498f5606 1289 for i in playlistitems:
1290 if -num_entries < i <= num_entries:
1291 yield list_ie_entries[i - 1]
1292 elif incomplete_entries:
1293 raise EntryNotInPlaylist()
30a074c2 1294
1295 if isinstance(ie_entries, list):
1296 n_all_entries = len(ie_entries)
1297 if playlistitems:
498f5606 1298 entries = list(make_playlistitems_entries(ie_entries))
30a074c2 1299 else:
1300 entries = ie_entries[playliststart:playlistend]
1301 n_entries = len(entries)
498f5606 1302 msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
30a074c2 1303 elif isinstance(ie_entries, PagedList):
1304 if playlistitems:
1305 entries = []
1306 for item in playlistitems:
1307 entries.extend(ie_entries.getslice(
1308 item - 1, item
1309 ))
1310 else:
1311 entries = ie_entries.getslice(
1312 playliststart, playlistend)
1313 n_entries = len(entries)
498f5606 1314 msg = 'Downloading %d videos' % n_entries
30a074c2 1315 else: # iterable
1316 if playlistitems:
498f5606 1317 entries = list(make_playlistitems_entries(list(itertools.islice(
1318 ie_entries, 0, max(playlistitems)))))
30a074c2 1319 else:
1320 entries = list(itertools.islice(
1321 ie_entries, playliststart, playlistend))
1322 n_entries = len(entries)
498f5606 1323 msg = 'Downloading %d videos' % n_entries
1324
1325 if any((entry is None for entry in entries)):
1326 raise EntryNotInPlaylist()
1327 if not playlistitems and (playliststart or playlistend):
1328 playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1329 ie_result['entries'] = entries
1330 ie_result['requested_entries'] = playlistitems
1331
1332 if self.params.get('allow_playlist_files', True):
1333 ie_copy = {
1334 'playlist': playlist,
1335 'playlist_id': ie_result.get('id'),
1336 'playlist_title': ie_result.get('title'),
1337 'playlist_uploader': ie_result.get('uploader'),
1338 'playlist_uploader_id': ie_result.get('uploader_id'),
1339 'playlist_index': 0
1340 }
1341 ie_copy.update(dict(ie_result))
1342
1343 if self.params.get('writeinfojson', False):
1344 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1345 if not self._ensure_dir_exists(encodeFilename(infofn)):
1346 return
1347 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1348 self.to_screen('[info] Playlist metadata is already present')
1349 else:
1350 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1351 try:
1352 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1353 except (OSError, IOError):
1354 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1355
1356 if self.params.get('writedescription', False):
1357 descfn = self.prepare_filename(ie_copy, 'pl_description')
1358 if not self._ensure_dir_exists(encodeFilename(descfn)):
1359 return
1360 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1361 self.to_screen('[info] Playlist description is already present')
1362 elif ie_result.get('description') is None:
1363 self.report_warning('There\'s no playlist description to write.')
1364 else:
1365 try:
1366 self.to_screen('[info] Writing playlist description to: ' + descfn)
1367 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1368 descfile.write(ie_result['description'])
1369 except (OSError, IOError):
1370 self.report_error('Cannot write playlist description file ' + descfn)
1371 return
30a074c2 1372
1373 if self.params.get('playlistreverse', False):
1374 entries = entries[::-1]
30a074c2 1375 if self.params.get('playlistrandom', False):
1376 random.shuffle(entries)
1377
1378 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1379
498f5606 1380 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
26e2805c 1381 failures = 0
1382 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
30a074c2 1383 for i, entry in enumerate(entries, 1):
1384 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1385 # This __x_forwarded_for_ip thing is a bit ugly but requires
1386 # minimal changes
1387 if x_forwarded_for:
1388 entry['__x_forwarded_for_ip'] = x_forwarded_for
1389 extra = {
1390 'n_entries': n_entries,
f59ae581 1391 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
30a074c2 1392 'playlist': playlist,
1393 'playlist_id': ie_result.get('id'),
1394 'playlist_title': ie_result.get('title'),
1395 'playlist_uploader': ie_result.get('uploader'),
1396 'playlist_uploader_id': ie_result.get('uploader_id'),
498f5606 1397 'playlist_index': playlistitems[i - 1] if playlistitems else i,
30a074c2 1398 'extractor': ie_result['extractor'],
1399 'webpage_url': ie_result['webpage_url'],
1400 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1401 'extractor_key': ie_result['extractor_key'],
1402 }
1403
1404 if self._match_entry(entry, incomplete=True) is not None:
1405 continue
1406
1407 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1408 if not entry_result:
1409 failures += 1
1410 if failures >= max_failures:
1411 self.report_error(
1412 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1413 break
30a074c2 1414 # TODO: skip failed (empty) entries?
1415 playlist_results.append(entry_result)
1416 ie_result['entries'] = playlist_results
1417 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1418 return ie_result
1419
a0566bbf 1420 @__handle_extraction_exceptions
1421 def __process_iterable_entry(self, entry, download, extra_info):
1422 return self.process_ie_result(
1423 entry, download=download, extra_info=extra_info)
1424
67134eab
JMF
1425 def _build_format_filter(self, filter_spec):
1426 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1427
1428 OPERATORS = {
1429 '<': operator.lt,
1430 '<=': operator.le,
1431 '>': operator.gt,
1432 '>=': operator.ge,
1433 '=': operator.eq,
1434 '!=': operator.ne,
1435 }
67134eab 1436 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1437 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1438 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1439 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1440 $
083c9df9 1441 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1442 m = operator_rex.search(filter_spec)
9ddb6925
S
1443 if m:
1444 try:
1445 comparison_value = int(m.group('value'))
1446 except ValueError:
1447 comparison_value = parse_filesize(m.group('value'))
1448 if comparison_value is None:
1449 comparison_value = parse_filesize(m.group('value') + 'B')
1450 if comparison_value is None:
1451 raise ValueError(
1452 'Invalid value %r in format specification %r' % (
67134eab 1453 m.group('value'), filter_spec))
9ddb6925
S
1454 op = OPERATORS[m.group('op')]
1455
083c9df9 1456 if not m:
9ddb6925
S
1457 STR_OPERATORS = {
1458 '=': operator.eq,
10d33b34
YCH
1459 '^=': lambda attr, value: attr.startswith(value),
1460 '$=': lambda attr, value: attr.endswith(value),
1461 '*=': lambda attr, value: value in attr,
9ddb6925 1462 }
67134eab 1463 str_operator_rex = re.compile(r'''(?x)
f96bff99 1464 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1465 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1466 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1467 \s*$
9ddb6925 1468 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1469 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1470 if m:
1471 comparison_value = m.group('value')
2cc779f4
S
1472 str_op = STR_OPERATORS[m.group('op')]
1473 if m.group('negation'):
e118a879 1474 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1475 else:
1476 op = str_op
083c9df9 1477
9ddb6925 1478 if not m:
67134eab 1479 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1480
1481 def _filter(f):
1482 actual_value = f.get(m.group('key'))
1483 if actual_value is None:
1484 return m.group('none_inclusive')
1485 return op(actual_value, comparison_value)
67134eab
JMF
1486 return _filter
1487
0017d9ad 1488 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1489
af0f7428
S
1490 def can_merge():
1491 merger = FFmpegMergerPP(self)
1492 return merger.available and merger.can_merge()
1493
91ebc640 1494 prefer_best = (
1495 not self.params.get('simulate', False)
1496 and download
1497 and (
1498 not can_merge()
19807826 1499 or info_dict.get('is_live', False)
de6000d9 1500 or self.outtmpl_dict['default'] == '-'))
91ebc640 1501
1502 return (
1503 'best/bestvideo+bestaudio'
1504 if prefer_best
1505 else 'bestvideo*+bestaudio/best'
19807826 1506 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1507 else 'bestvideo+bestaudio/best')
0017d9ad 1508
67134eab
JMF
1509 def build_format_selector(self, format_spec):
1510 def syntax_error(note, start):
1511 message = (
1512 'Invalid format specification: '
1513 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1514 return SyntaxError(message)
1515
1516 PICKFIRST = 'PICKFIRST'
1517 MERGE = 'MERGE'
1518 SINGLE = 'SINGLE'
0130afb7 1519 GROUP = 'GROUP'
67134eab
JMF
1520 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1521
91ebc640 1522 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1523 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1524
e8e73840 1525 check_formats = self.params.get('check_formats')
1526
67134eab
JMF
1527 def _parse_filter(tokens):
1528 filter_parts = []
1529 for type, string, start, _, _ in tokens:
1530 if type == tokenize.OP and string == ']':
1531 return ''.join(filter_parts)
1532 else:
1533 filter_parts.append(string)
1534
232541df 1535 def _remove_unused_ops(tokens):
17cc1534 1536 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1537 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1538 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1539 last_string, last_start, last_end, last_line = None, None, None, None
1540 for type, string, start, end, line in tokens:
1541 if type == tokenize.OP and string == '[':
1542 if last_string:
1543 yield tokenize.NAME, last_string, last_start, last_end, last_line
1544 last_string = None
1545 yield type, string, start, end, line
1546 # everything inside brackets will be handled by _parse_filter
1547 for type, string, start, end, line in tokens:
1548 yield type, string, start, end, line
1549 if type == tokenize.OP and string == ']':
1550 break
1551 elif type == tokenize.OP and string in ALLOWED_OPS:
1552 if last_string:
1553 yield tokenize.NAME, last_string, last_start, last_end, last_line
1554 last_string = None
1555 yield type, string, start, end, line
1556 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1557 if not last_string:
1558 last_string = string
1559 last_start = start
1560 last_end = end
1561 else:
1562 last_string += string
1563 if last_string:
1564 yield tokenize.NAME, last_string, last_start, last_end, last_line
1565
cf2ac6df 1566 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1567 selectors = []
1568 current_selector = None
1569 for type, string, start, _, _ in tokens:
1570 # ENCODING is only defined in python 3.x
1571 if type == getattr(tokenize, 'ENCODING', None):
1572 continue
1573 elif type in [tokenize.NAME, tokenize.NUMBER]:
1574 current_selector = FormatSelector(SINGLE, string, [])
1575 elif type == tokenize.OP:
cf2ac6df
JMF
1576 if string == ')':
1577 if not inside_group:
1578 # ')' will be handled by the parentheses group
1579 tokens.restore_last_token()
67134eab 1580 break
cf2ac6df 1581 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1582 tokens.restore_last_token()
1583 break
cf2ac6df
JMF
1584 elif inside_choice and string == ',':
1585 tokens.restore_last_token()
1586 break
1587 elif string == ',':
0a31a350
JMF
1588 if not current_selector:
1589 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1590 selectors.append(current_selector)
1591 current_selector = None
1592 elif string == '/':
d96d604e
JMF
1593 if not current_selector:
1594 raise syntax_error('"/" must follow a format selector', start)
67134eab 1595 first_choice = current_selector
cf2ac6df 1596 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1597 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1598 elif string == '[':
1599 if not current_selector:
1600 current_selector = FormatSelector(SINGLE, 'best', [])
1601 format_filter = _parse_filter(tokens)
1602 current_selector.filters.append(format_filter)
0130afb7
JMF
1603 elif string == '(':
1604 if current_selector:
1605 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1606 group = _parse_format_selection(tokens, inside_group=True)
1607 current_selector = FormatSelector(GROUP, group, [])
67134eab 1608 elif string == '+':
d03cfdce 1609 if not current_selector:
1610 raise syntax_error('Unexpected "+"', start)
1611 selector_1 = current_selector
1612 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1613 if not selector_2:
1614 raise syntax_error('Expected a selector', start)
1615 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1616 else:
1617 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1618 elif type == tokenize.ENDMARKER:
1619 break
1620 if current_selector:
1621 selectors.append(current_selector)
1622 return selectors
1623
f8d4ad9a 1624 def _merge(formats_pair):
1625 format_1, format_2 = formats_pair
1626
1627 formats_info = []
1628 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1629 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1630
1631 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1632 get_no_more = {"video": False, "audio": False}
1633 for (i, fmt_info) in enumerate(formats_info):
1634 for aud_vid in ["audio", "video"]:
1635 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1636 if get_no_more[aud_vid]:
1637 formats_info.pop(i)
1638 get_no_more[aud_vid] = True
1639
1640 if len(formats_info) == 1:
1641 return formats_info[0]
1642
1643 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1644 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1645
1646 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1647 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1648
1649 output_ext = self.params.get('merge_output_format')
1650 if not output_ext:
1651 if the_only_video:
1652 output_ext = the_only_video['ext']
1653 elif the_only_audio and not video_fmts:
1654 output_ext = the_only_audio['ext']
1655 else:
1656 output_ext = 'mkv'
1657
1658 new_dict = {
1659 'requested_formats': formats_info,
1660 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1661 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1662 'ext': output_ext,
1663 }
1664
1665 if the_only_video:
1666 new_dict.update({
1667 'width': the_only_video.get('width'),
1668 'height': the_only_video.get('height'),
1669 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1670 'fps': the_only_video.get('fps'),
1671 'vcodec': the_only_video.get('vcodec'),
1672 'vbr': the_only_video.get('vbr'),
1673 'stretched_ratio': the_only_video.get('stretched_ratio'),
1674 })
1675
1676 if the_only_audio:
1677 new_dict.update({
1678 'acodec': the_only_audio.get('acodec'),
1679 'abr': the_only_audio.get('abr'),
1680 })
1681
1682 return new_dict
1683
e8e73840 1684 def _check_formats(formats):
1685 for f in formats:
1686 self.to_screen('[info] Testing format %s' % f['format_id'])
1687 paths = self.params.get('paths', {})
1688 temp_file = os.path.join(
1689 expand_path(paths.get('home', '').strip()),
1690 expand_path(paths.get('temp', '').strip()),
1691 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1692 dl, _ = self.dl(temp_file, f, test=True)
1693 if os.path.exists(temp_file):
1694 os.remove(temp_file)
1695 if dl:
1696 yield f
1697 else:
1698 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1699
67134eab 1700 def _build_selector_function(selector):
909d24dd 1701 if isinstance(selector, list): # ,
67134eab
JMF
1702 fs = [_build_selector_function(s) for s in selector]
1703
317f7ab6 1704 def selector_function(ctx):
67134eab 1705 for f in fs:
317f7ab6 1706 for format in f(ctx):
67134eab
JMF
1707 yield format
1708 return selector_function
909d24dd 1709
1710 elif selector.type == GROUP: # ()
0130afb7 1711 selector_function = _build_selector_function(selector.selector)
909d24dd 1712
1713 elif selector.type == PICKFIRST: # /
67134eab
JMF
1714 fs = [_build_selector_function(s) for s in selector.selector]
1715
317f7ab6 1716 def selector_function(ctx):
67134eab 1717 for f in fs:
317f7ab6 1718 picked_formats = list(f(ctx))
67134eab
JMF
1719 if picked_formats:
1720 return picked_formats
1721 return []
67134eab 1722
909d24dd 1723 elif selector.type == SINGLE: # atom
598d185d 1724 format_spec = selector.selector or 'best'
909d24dd 1725
f8d4ad9a 1726 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1727 if format_spec == 'all':
1728 def selector_function(ctx):
1729 formats = list(ctx['formats'])
e8e73840 1730 if check_formats:
1731 formats = _check_formats(formats)
1732 for f in formats:
1733 yield f
f8d4ad9a 1734 elif format_spec == 'mergeall':
1735 def selector_function(ctx):
e8e73840 1736 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1737 if not formats:
1738 return
921b76ca 1739 merged_format = formats[-1]
1740 for f in formats[-2::-1]:
f8d4ad9a 1741 merged_format = _merge((merged_format, f))
1742 yield merged_format
909d24dd 1743
1744 else:
e8e73840 1745 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1746 mobj = re.match(
1747 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1748 format_spec)
1749 if mobj is not None:
1750 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1751 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1752 format_type = (mobj.group('type') or [None])[0]
1753 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1754 format_modified = mobj.group('mod') is not None
909d24dd 1755
1756 format_fallback = not format_type and not format_modified # for b, w
eff63539 1757 filter_f = (
1758 (lambda f: f.get('%scodec' % format_type) != 'none')
1759 if format_type and format_modified # bv*, ba*, wv*, wa*
1760 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1761 if format_type # bv, ba, wv, wa
1762 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1763 if not format_modified # b, w
1764 else None) # b*, w*
67134eab 1765 else:
909d24dd 1766 filter_f = ((lambda f: f.get('ext') == format_spec)
1767 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1768 else (lambda f: f.get('format_id') == format_spec)) # id
1769
1770 def selector_function(ctx):
1771 formats = list(ctx['formats'])
1772 if not formats:
1773 return
1774 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1775 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1776 # for extractors with incomplete formats (audio only (soundcloud)
1777 # or video only (imgur)) best/worst will fallback to
1778 # best/worst {video,audio}-only format
e8e73840 1779 matches = formats
1780 if format_reverse:
1781 matches = matches[::-1]
1782 if check_formats:
1783 matches = list(itertools.islice(_check_formats(matches), format_idx))
1784 n = len(matches)
1785 if -n <= format_idx - 1 < n:
1786 yield matches[format_idx - 1]
909d24dd 1787
1788 elif selector.type == MERGE: # +
d03cfdce 1789 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1790
317f7ab6
S
1791 def selector_function(ctx):
1792 for pair in itertools.product(
d03cfdce 1793 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1794 yield _merge(pair)
083c9df9 1795
67134eab 1796 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1797
317f7ab6
S
1798 def final_selector(ctx):
1799 ctx_copy = copy.deepcopy(ctx)
67134eab 1800 for _filter in filters:
317f7ab6
S
1801 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1802 return selector_function(ctx_copy)
67134eab 1803 return final_selector
083c9df9 1804
67134eab 1805 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1806 try:
232541df 1807 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1808 except tokenize.TokenError:
1809 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1810
1811 class TokenIterator(object):
1812 def __init__(self, tokens):
1813 self.tokens = tokens
1814 self.counter = 0
1815
1816 def __iter__(self):
1817 return self
1818
1819 def __next__(self):
1820 if self.counter >= len(self.tokens):
1821 raise StopIteration()
1822 value = self.tokens[self.counter]
1823 self.counter += 1
1824 return value
1825
1826 next = __next__
1827
1828 def restore_last_token(self):
1829 self.counter -= 1
1830
1831 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1832 return _build_selector_function(parsed_selector)
a9c58ad9 1833
e5660ee6
JMF
1834 def _calc_headers(self, info_dict):
1835 res = std_headers.copy()
1836
1837 add_headers = info_dict.get('http_headers')
1838 if add_headers:
1839 res.update(add_headers)
1840
1841 cookies = self._calc_cookies(info_dict)
1842 if cookies:
1843 res['Cookie'] = cookies
1844
0016b84e
S
1845 if 'X-Forwarded-For' not in res:
1846 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1847 if x_forwarded_for_ip:
1848 res['X-Forwarded-For'] = x_forwarded_for_ip
1849
e5660ee6
JMF
1850 return res
1851
1852 def _calc_cookies(self, info_dict):
5c2266df 1853 pr = sanitized_Request(info_dict['url'])
e5660ee6 1854 self.cookiejar.add_cookie_header(pr)
662435f7 1855 return pr.get_header('Cookie')
e5660ee6 1856
dd82ffea
JMF
1857 def process_video_result(self, info_dict, download=True):
1858 assert info_dict.get('_type', 'video') == 'video'
1859
bec1fad2
PH
1860 if 'id' not in info_dict:
1861 raise ExtractorError('Missing "id" field in extractor result')
1862 if 'title' not in info_dict:
1863 raise ExtractorError('Missing "title" field in extractor result')
1864
c9969434
S
1865 def report_force_conversion(field, field_not, conversion):
1866 self.report_warning(
1867 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1868 % (field, field_not, conversion))
1869
1870 def sanitize_string_field(info, string_field):
1871 field = info.get(string_field)
1872 if field is None or isinstance(field, compat_str):
1873 return
1874 report_force_conversion(string_field, 'a string', 'string')
1875 info[string_field] = compat_str(field)
1876
1877 def sanitize_numeric_fields(info):
1878 for numeric_field in self._NUMERIC_FIELDS:
1879 field = info.get(numeric_field)
1880 if field is None or isinstance(field, compat_numeric_types):
1881 continue
1882 report_force_conversion(numeric_field, 'numeric', 'int')
1883 info[numeric_field] = int_or_none(field)
1884
1885 sanitize_string_field(info_dict, 'id')
1886 sanitize_numeric_fields(info_dict)
be6217b2 1887
dd82ffea
JMF
1888 if 'playlist' not in info_dict:
1889 # It isn't part of a playlist
1890 info_dict['playlist'] = None
1891 info_dict['playlist_index'] = None
1892
d5519808 1893 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1894 if thumbnails is None:
1895 thumbnail = info_dict.get('thumbnail')
1896 if thumbnail:
a7a14d95 1897 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1898 if thumbnails:
be6d7229 1899 thumbnails.sort(key=lambda t: (
d37708fc
RA
1900 t.get('preference') if t.get('preference') is not None else -1,
1901 t.get('width') if t.get('width') is not None else -1,
1902 t.get('height') if t.get('height') is not None else -1,
1903 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1904 for i, t in enumerate(thumbnails):
dcf77cf1 1905 t['url'] = sanitize_url(t['url'])
9603e8a7 1906 if t.get('width') and t.get('height'):
d5519808 1907 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1908 if t.get('id') is None:
1909 t['id'] = '%d' % i
d5519808 1910
b7b72db9 1911 if self.params.get('list_thumbnails'):
1912 self.list_thumbnails(info_dict)
1913 return
1914
536a55da
S
1915 thumbnail = info_dict.get('thumbnail')
1916 if thumbnail:
1917 info_dict['thumbnail'] = sanitize_url(thumbnail)
1918 elif thumbnails:
d5519808
PH
1919 info_dict['thumbnail'] = thumbnails[-1]['url']
1920
c9ae7b95 1921 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1922 info_dict['display_id'] = info_dict['id']
1923
10db0d2f 1924 for ts_key, date_key in (
1925 ('timestamp', 'upload_date'),
1926 ('release_timestamp', 'release_date'),
1927 ):
1928 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1929 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1930 # see http://bugs.python.org/issue1646728)
1931 try:
1932 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1933 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1934 except (ValueError, OverflowError, OSError):
1935 pass
9d2ecdbc 1936
33d2fc2f
S
1937 # Auto generate title fields corresponding to the *_number fields when missing
1938 # in order to always have clean titles. This is very common for TV series.
1939 for field in ('chapter', 'season', 'episode'):
1940 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1941 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1942
05108a49
S
1943 for cc_kind in ('subtitles', 'automatic_captions'):
1944 cc = info_dict.get(cc_kind)
1945 if cc:
1946 for _, subtitle in cc.items():
1947 for subtitle_format in subtitle:
1948 if subtitle_format.get('url'):
1949 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1950 if subtitle_format.get('ext') is None:
1951 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1952
1953 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1954 subtitles = info_dict.get('subtitles')
4bba3716 1955
a504ced0 1956 if self.params.get('listsubtitles', False):
360e1ca5 1957 if 'automatic_captions' in info_dict:
05108a49
S
1958 self.list_subtitles(
1959 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1960 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1961 return
05108a49 1962
360e1ca5 1963 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1964 info_dict['id'], subtitles, automatic_captions)
a504ced0 1965
dd82ffea
JMF
1966 # We now pick which formats have to be downloaded
1967 if info_dict.get('formats') is None:
1968 # There's only one format available
1969 formats = [info_dict]
1970 else:
1971 formats = info_dict['formats']
1972
db95dc13 1973 if not formats:
b7da73eb 1974 if not self.params.get('ignore_no_formats_error'):
1975 raise ExtractorError('No video formats found!')
1976 else:
1977 self.report_warning('No video formats found!')
db95dc13 1978
73af5cc8
S
1979 def is_wellformed(f):
1980 url = f.get('url')
a5ac0c47 1981 if not url:
73af5cc8
S
1982 self.report_warning(
1983 '"url" field is missing or empty - skipping format, '
1984 'there is an error in extractor')
a5ac0c47
S
1985 return False
1986 if isinstance(url, bytes):
1987 sanitize_string_field(f, 'url')
1988 return True
73af5cc8
S
1989
1990 # Filter out malformed formats for better extraction robustness
1991 formats = list(filter(is_wellformed, formats))
1992
181c7053
S
1993 formats_dict = {}
1994
dd82ffea 1995 # We check that all the formats have the format and format_id fields
db95dc13 1996 for i, format in enumerate(formats):
c9969434
S
1997 sanitize_string_field(format, 'format_id')
1998 sanitize_numeric_fields(format)
dcf77cf1 1999 format['url'] = sanitize_url(format['url'])
e74e3b63 2000 if not format.get('format_id'):
8016c922 2001 format['format_id'] = compat_str(i)
e2effb08
S
2002 else:
2003 # Sanitize format_id from characters used in format selector expression
ec85ded8 2004 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2005 format_id = format['format_id']
2006 if format_id not in formats_dict:
2007 formats_dict[format_id] = []
2008 formats_dict[format_id].append(format)
2009
2010 # Make sure all formats have unique format_id
2011 for format_id, ambiguous_formats in formats_dict.items():
2012 if len(ambiguous_formats) > 1:
2013 for i, format in enumerate(ambiguous_formats):
2014 format['format_id'] = '%s-%d' % (format_id, i)
2015
2016 for i, format in enumerate(formats):
8c51aa65 2017 if format.get('format') is None:
6febd1c1 2018 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2019 id=format['format_id'],
2020 res=self.format_resolution(format),
6febd1c1 2021 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2022 )
c1002e96 2023 # Automatically determine file extension if missing
5b1d8575 2024 if format.get('ext') is None:
cce929ea 2025 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2026 # Automatically determine protocol if missing (useful for format
2027 # selection purposes)
6f0be937 2028 if format.get('protocol') is None:
b5559424 2029 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2030 # Add HTTP headers, so that external programs can use them from the
2031 # json output
2032 full_format_info = info_dict.copy()
2033 full_format_info.update(format)
2034 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2035 # Remove private housekeeping stuff
2036 if '__x_forwarded_for_ip' in info_dict:
2037 del info_dict['__x_forwarded_for_ip']
dd82ffea 2038
4bcc7bd1 2039 # TODO Central sorting goes here
99e206d5 2040
b7da73eb 2041 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2042 # only set the 'formats' fields if the original info_dict list them
2043 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2044 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2045 # which can't be exported to json
b3d9ef88 2046 info_dict['formats'] = formats
cfb56d1a 2047 if self.params.get('listformats'):
b7da73eb 2048 if not info_dict.get('formats'):
2049 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 2050 self.list_formats(info_dict)
2051 return
2052
de3ef3ed 2053 req_format = self.params.get('format')
a9c58ad9 2054 if req_format is None:
0017d9ad
S
2055 req_format = self._default_format_spec(info_dict, download=download)
2056 if self.params.get('verbose'):
e8be92f9 2057 self.to_screen('[debug] Default format spec: %s' % req_format)
0017d9ad 2058
5acfa126 2059 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2060
2061 # While in format selection we may need to have an access to the original
2062 # format set in order to calculate some metrics or do some processing.
2063 # For now we need to be able to guess whether original formats provided
2064 # by extractor are incomplete or not (i.e. whether extractor provides only
2065 # video-only or audio-only formats) for proper formats selection for
2066 # extractors with such incomplete formats (see
067aa17e 2067 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2068 # Since formats may be filtered during format selection and may not match
2069 # the original formats the results may be incorrect. Thus original formats
2070 # or pre-calculated metrics should be passed to format selection routines
2071 # as well.
2072 # We will pass a context object containing all necessary additional data
2073 # instead of just formats.
2074 # This fixes incorrect format selection issue (see
067aa17e 2075 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2076 incomplete_formats = (
317f7ab6 2077 # All formats are video-only or
3089bc74 2078 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2079 # all formats are audio-only
3089bc74 2080 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2081
2082 ctx = {
2083 'formats': formats,
2084 'incomplete_formats': incomplete_formats,
2085 }
2086
2087 formats_to_download = list(format_selector(ctx))
dd82ffea 2088 if not formats_to_download:
b7da73eb 2089 if not self.params.get('ignore_no_formats_error'):
2090 raise ExtractorError('Requested format is not available', expected=True)
2091 else:
2092 self.report_warning('Requested format is not available')
2093 elif download:
2094 self.to_screen(
2095 '[info] %s: Downloading format(s) %s'
2096 % (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
dd82ffea 2097 if len(formats_to_download) > 1:
b7da73eb 2098 self.to_screen(
2099 '[info] %s: Downloading video in %s formats'
2100 % (info_dict['id'], len(formats_to_download)))
2101 for fmt in formats_to_download:
dd82ffea 2102 new_info = dict(info_dict)
b7da73eb 2103 new_info.update(fmt)
dd82ffea
JMF
2104 self.process_info(new_info)
2105 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2106 if formats_to_download:
2107 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2108 return info_dict
2109
98c70d6f 2110 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2111 """Select the requested subtitles and their format"""
98c70d6f
JMF
2112 available_subs = {}
2113 if normal_subtitles and self.params.get('writesubtitles'):
2114 available_subs.update(normal_subtitles)
2115 if automatic_captions and self.params.get('writeautomaticsub'):
2116 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2117 if lang not in available_subs:
2118 available_subs[lang] = cap_info
2119
4d171848
JMF
2120 if (not self.params.get('writesubtitles') and not
2121 self.params.get('writeautomaticsub') or not
2122 available_subs):
2123 return None
a504ced0 2124
c32b0aab 2125 all_sub_langs = available_subs.keys()
a504ced0 2126 if self.params.get('allsubtitles', False):
c32b0aab 2127 requested_langs = all_sub_langs
2128 elif self.params.get('subtitleslangs', False):
2129 requested_langs = set()
2130 for lang in self.params.get('subtitleslangs'):
2131 if lang == 'all':
2132 requested_langs.update(all_sub_langs)
2133 continue
2134 discard = lang[0] == '-'
2135 if discard:
2136 lang = lang[1:]
2137 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2138 if discard:
2139 for lang in current_langs:
2140 requested_langs.discard(lang)
2141 else:
2142 requested_langs.update(current_langs)
2143 elif 'en' in available_subs:
2144 requested_langs = ['en']
a504ced0 2145 else:
c32b0aab 2146 requested_langs = [list(all_sub_langs)[0]]
a504ced0
JMF
2147
2148 formats_query = self.params.get('subtitlesformat', 'best')
2149 formats_preference = formats_query.split('/') if formats_query else []
2150 subs = {}
2151 for lang in requested_langs:
2152 formats = available_subs.get(lang)
2153 if formats is None:
2154 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2155 continue
a504ced0
JMF
2156 for ext in formats_preference:
2157 if ext == 'best':
2158 f = formats[-1]
2159 break
2160 matches = list(filter(lambda f: f['ext'] == ext, formats))
2161 if matches:
2162 f = matches[-1]
2163 break
2164 else:
2165 f = formats[-1]
2166 self.report_warning(
2167 'No subtitle format found matching "%s" for language %s, '
2168 'using %s' % (formats_query, lang, f['ext']))
2169 subs[lang] = f
2170 return subs
2171
d06daf23
S
2172 def __forced_printings(self, info_dict, filename, incomplete):
2173 def print_mandatory(field):
2174 if (self.params.get('force%s' % field, False)
2175 and (not incomplete or info_dict.get(field) is not None)):
2176 self.to_stdout(info_dict[field])
2177
2178 def print_optional(field):
2179 if (self.params.get('force%s' % field, False)
2180 and info_dict.get(field) is not None):
2181 self.to_stdout(info_dict[field])
2182
2183 print_mandatory('title')
2184 print_mandatory('id')
2185 if self.params.get('forceurl', False) and not incomplete:
2186 if info_dict.get('requested_formats') is not None:
2187 for f in info_dict['requested_formats']:
2188 self.to_stdout(f['url'] + f.get('play_path', ''))
2189 else:
2190 # For RTMP URLs, also include the playpath
2191 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2192 print_optional('thumbnail')
2193 print_optional('description')
2194 if self.params.get('forcefilename', False) and filename is not None:
2195 self.to_stdout(filename)
2196 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2197 self.to_stdout(formatSeconds(info_dict['duration']))
2198 print_mandatory('format')
2199 if self.params.get('forcejson', False):
277d6ff5 2200 self.post_extract(info_dict)
75d43ca0 2201 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2202
e8e73840 2203 def dl(self, name, info, subtitle=False, test=False):
2204
2205 if test:
2206 verbose = self.params.get('verbose')
2207 params = {
2208 'test': True,
2209 'quiet': not verbose,
2210 'verbose': verbose,
2211 'noprogress': not verbose,
2212 'nopart': True,
2213 'skip_unavailable_fragments': False,
2214 'keep_fragments': False,
2215 'overwrites': True,
2216 '_no_ytdl_file': True,
2217 }
2218 else:
2219 params = self.params
2220 fd = get_suitable_downloader(info, params)(self, params)
2221 if not test:
2222 for ph in self._progress_hooks:
2223 fd.add_progress_hook(ph)
2224 if self.params.get('verbose'):
2225 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2226 new_info = dict(info)
2227 if new_info.get('http_headers') is None:
2228 new_info['http_headers'] = self._calc_headers(new_info)
2229 return fd.download(name, new_info, subtitle)
2230
8222d8de
JMF
2231 def process_info(self, info_dict):
2232 """Process a single resolved IE result."""
2233
2234 assert info_dict.get('_type', 'video') == 'video'
fd288278 2235
0202b52a 2236 info_dict.setdefault('__postprocessors', [])
2237
fd288278
PH
2238 max_downloads = self.params.get('max_downloads')
2239 if max_downloads is not None:
2240 if self._num_downloads >= int(max_downloads):
2241 raise MaxDownloadsReached()
8222d8de 2242
d06daf23 2243 # TODO: backward compatibility, to be removed
8222d8de 2244 info_dict['fulltitle'] = info_dict['title']
8222d8de 2245
11b85ce6 2246 if 'format' not in info_dict:
8222d8de
JMF
2247 info_dict['format'] = info_dict['ext']
2248
8b0d7497 2249 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
2250 return
2251
277d6ff5 2252 self.post_extract(info_dict)
fd288278 2253 self._num_downloads += 1
8222d8de 2254
56d868db 2255 info_dict, _ = self.pre_process(info_dict)
5bfa4862 2256
dcf64d43 2257 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2258 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2259 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2260 files_to_move = {}
8222d8de
JMF
2261
2262 # Forced printings
0202b52a 2263 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2264
8222d8de 2265 if self.params.get('simulate', False):
2d30509f 2266 if self.params.get('force_write_download_archive', False):
2267 self.record_download_archive(info_dict)
2268
2269 # Do nothing else if in simulate mode
8222d8de
JMF
2270 return
2271
de6000d9 2272 if full_filename is None:
8222d8de
JMF
2273 return
2274
e92caff5 2275 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2276 return
e92caff5 2277 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2278 return
2279
2280 if self.params.get('writedescription', False):
de6000d9 2281 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2282 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2283 return
0c3d0f51 2284 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2285 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2286 elif info_dict.get('description') is None:
2287 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2288 else:
2289 try:
6febd1c1 2290 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2291 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2292 descfile.write(info_dict['description'])
7b6fefc9 2293 except (OSError, IOError):
6febd1c1 2294 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2295 return
8222d8de 2296
1fb07d10 2297 if self.params.get('writeannotations', False):
de6000d9 2298 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2299 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2300 return
0c3d0f51 2301 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2302 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2303 elif not info_dict.get('annotations'):
2304 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2305 else:
2306 try:
6febd1c1 2307 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2308 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2309 annofile.write(info_dict['annotations'])
2310 except (KeyError, TypeError):
6febd1c1 2311 self.report_warning('There are no annotations to write.')
7b6fefc9 2312 except (OSError, IOError):
6febd1c1 2313 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2314 return
1fb07d10 2315
c4a91be7 2316 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2317 self.params.get('writeautomaticsub')])
c4a91be7 2318
c84dd8a9 2319 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2320 # subtitles download errors are already managed as troubles in relevant IE
2321 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2322 subtitles = info_dict['requested_subtitles']
fa57af1e 2323 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2324 for sub_lang, sub_info in subtitles.items():
2325 sub_format = sub_info['ext']
56d868db 2326 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2327 sub_filename_final = subtitles_filename(
2328 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2329 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2330 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2331 sub_info['filepath'] = sub_filename
0202b52a 2332 files_to_move[sub_filename] = sub_filename_final
a504ced0 2333 else:
0c9df79e 2334 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2335 if sub_info.get('data') is not None:
2336 try:
2337 # Use newline='' to prevent conversion of newline characters
067aa17e 2338 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2339 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2340 subfile.write(sub_info['data'])
dcf64d43 2341 sub_info['filepath'] = sub_filename
0202b52a 2342 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2343 except (OSError, IOError):
2344 self.report_error('Cannot write subtitles file ' + sub_filename)
2345 return
7b6fefc9 2346 else:
5ff1bc0c 2347 try:
e8e73840 2348 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2349 sub_info['filepath'] = sub_filename
0202b52a 2350 files_to_move[sub_filename] = sub_filename_final
3158150c 2351 except tuple([ExtractorError, IOError, OSError, ValueError] + network_exceptions) as err:
5ff1bc0c
RA
2352 self.report_warning('Unable to download subtitle for "%s": %s' %
2353 (sub_lang, error_to_compat_str(err)))
2354 continue
8222d8de 2355
8222d8de 2356 if self.params.get('writeinfojson', False):
de6000d9 2357 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2358 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2359 return
0c3d0f51 2360 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2361 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2362 else:
66c935fb 2363 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2364 try:
75d43ca0 2365 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2366 except (OSError, IOError):
66c935fb 2367 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2368 return
de6000d9 2369 info_dict['__infojson_filename'] = infofn
8222d8de 2370
56d868db 2371 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2372 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2373 thumb_filename = replace_extension(
2374 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2375 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2376
732044af 2377 # Write internet shortcut files
2378 url_link = webloc_link = desktop_link = False
2379 if self.params.get('writelink', False):
2380 if sys.platform == "darwin": # macOS.
2381 webloc_link = True
2382 elif sys.platform.startswith("linux"):
2383 desktop_link = True
2384 else: # if sys.platform in ['win32', 'cygwin']:
2385 url_link = True
2386 if self.params.get('writeurllink', False):
2387 url_link = True
2388 if self.params.get('writewebloclink', False):
2389 webloc_link = True
2390 if self.params.get('writedesktoplink', False):
2391 desktop_link = True
2392
2393 if url_link or webloc_link or desktop_link:
2394 if 'webpage_url' not in info_dict:
2395 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2396 return
2397 ascii_url = iri_to_uri(info_dict['webpage_url'])
2398
2399 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2400 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2401 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2402 self.to_screen('[info] Internet shortcut is already present')
2403 else:
2404 try:
2405 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2406 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2407 template_vars = {'url': ascii_url}
2408 if embed_filename:
2409 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2410 linkfile.write(template % template_vars)
2411 except (OSError, IOError):
2412 self.report_error('Cannot write internet shortcut ' + linkfn)
2413 return False
2414 return True
2415
2416 if url_link:
2417 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2418 return
2419 if webloc_link:
2420 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2421 return
2422 if desktop_link:
2423 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2424 return
2425
56d868db 2426 try:
2427 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2428 except PostProcessingError as err:
2429 self.report_error('Preprocessing: %s' % str(err))
2430 return
2431
732044af 2432 must_record_download_archive = False
56d868db 2433 if self.params.get('skip_download', False):
2434 info_dict['filepath'] = temp_filename
2435 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2436 info_dict['__files_to_move'] = files_to_move
2437 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2438 else:
2439 # Download
4340deca 2440 try:
0202b52a 2441
6b591b29 2442 def existing_file(*filepaths):
2443 ext = info_dict.get('ext')
2444 final_ext = self.params.get('final_ext', ext)
2445 existing_files = []
2446 for file in orderedSet(filepaths):
2447 if final_ext != ext:
2448 converted = replace_extension(file, final_ext, ext)
2449 if os.path.exists(encodeFilename(converted)):
2450 existing_files.append(converted)
2451 if os.path.exists(encodeFilename(file)):
2452 existing_files.append(file)
2453
2454 if not existing_files or self.params.get('overwrites', False):
2455 for file in orderedSet(existing_files):
2456 self.report_file_delete(file)
2457 os.remove(encodeFilename(file))
2458 return None
2459
2460 self.report_file_already_downloaded(existing_files[0])
2461 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2462 return existing_files[0]
0202b52a 2463
2464 success = True
4340deca
P
2465 if info_dict.get('requested_formats') is not None:
2466 downloaded = []
d47aeb22 2467 merger = FFmpegMergerPP(self)
63ad4d43 2468 if self.params.get('allow_unplayable_formats'):
2469 self.report_warning(
2470 'You have requested merging of multiple formats '
2471 'while also allowing unplayable formats to be downloaded. '
2472 'The formats won\'t be merged to prevent data corruption.')
2473 elif not merger.available:
2474 self.report_warning(
2475 'You have requested merging of multiple formats but ffmpeg is not installed. '
2476 'The formats won\'t be merged.')
81cd954a
S
2477
2478 def compatible_formats(formats):
d03cfdce 2479 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2480 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2481 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2482 if len(video_formats) > 2 or len(audio_formats) > 2:
2483 return False
2484
81cd954a 2485 # Check extension
d03cfdce 2486 exts = set(format.get('ext') for format in formats)
2487 COMPATIBLE_EXTS = (
2488 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2489 set(('webm',)),
2490 )
2491 for ext_sets in COMPATIBLE_EXTS:
2492 if ext_sets.issuperset(exts):
2493 return True
81cd954a
S
2494 # TODO: Check acodec/vcodec
2495 return False
2496
2497 requested_formats = info_dict['requested_formats']
0202b52a 2498 old_ext = info_dict['ext']
4d971a16 2499 if self.params.get('merge_output_format') is None:
2500 if not compatible_formats(requested_formats):
2501 info_dict['ext'] = 'mkv'
2502 self.report_warning(
2503 'Requested formats are incompatible for merge and will be merged into mkv.')
2504 if (info_dict['ext'] == 'webm'
2505 and self.params.get('writethumbnail', False)
2506 and info_dict.get('thumbnails')):
2507 info_dict['ext'] = 'mkv'
2508 self.report_warning(
2509 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2510
2511 def correct_ext(filename):
2512 filename_real_ext = os.path.splitext(filename)[1][1:]
2513 filename_wo_ext = (
2514 os.path.splitext(filename)[0]
2515 if filename_real_ext == old_ext
2516 else filename)
2517 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2518
38c6902b 2519 # Ensure filename always has a correct extension for successful merge
0202b52a 2520 full_filename = correct_ext(full_filename)
2521 temp_filename = correct_ext(temp_filename)
2522 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2523 info_dict['__real_download'] = False
0202b52a 2524 if dl_filename is None:
81cd954a 2525 for f in requested_formats:
5b5fbc08
JMF
2526 new_info = dict(info_dict)
2527 new_info.update(f)
c5c9bf0c 2528 fname = prepend_extension(
de6000d9 2529 self.prepare_filename(new_info, 'temp'),
c5c9bf0c 2530 'f%s' % f['format_id'], new_info['ext'])
e92caff5 2531 if not self._ensure_dir_exists(fname):
c5c9bf0c 2532 return
5b5fbc08 2533 downloaded.append(fname)
e8e73840 2534 partial_success, real_download = self.dl(fname, new_info)
1ea24129 2535 info_dict['__real_download'] = info_dict['__real_download'] or real_download
5b5fbc08 2536 success = success and partial_success
63ad4d43 2537 if merger.available and not self.params.get('allow_unplayable_formats'):
efabc161 2538 info_dict['__postprocessors'].append(merger)
1ea24129 2539 info_dict['__files_to_merge'] = downloaded
2540 # Even if there were no downloads, it is being merged only now
2541 info_dict['__real_download'] = True
42bb0c59 2542 else:
2543 for file in downloaded:
2544 files_to_move[file] = None
4340deca
P
2545 else:
2546 # Just a single file
0202b52a 2547 dl_filename = existing_file(full_filename, temp_filename)
2548 if dl_filename is None:
e8e73840 2549 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2550 info_dict['__real_download'] = real_download
2551
0202b52a 2552 dl_filename = dl_filename or temp_filename
c571435f 2553 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2554
3158150c 2555 except network_exceptions as err:
7960b056 2556 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2557 return
2558 except (OSError, IOError) as err:
2559 raise UnavailableVideoError(err)
2560 except (ContentTooShortError, ) as err:
2561 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2562 return
8222d8de 2563
de6000d9 2564 if success and full_filename != '-':
6271f1ca 2565 # Fixup content
62cd676c
PH
2566 fixup_policy = self.params.get('fixup')
2567 if fixup_policy is None:
2568 fixup_policy = 'detect_or_warn'
2569
e4172ac9 2570 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2571
6271f1ca
PH
2572 stretched_ratio = info_dict.get('stretched_ratio')
2573 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2574 if fixup_policy == 'warn':
2575 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2576 info_dict['id'], stretched_ratio))
2577 elif fixup_policy == 'detect_or_warn':
2578 stretched_pp = FFmpegFixupStretchedPP(self)
2579 if stretched_pp.available:
6271f1ca
PH
2580 info_dict['__postprocessors'].append(stretched_pp)
2581 else:
2582 self.report_warning(
d1e4a464
S
2583 '%s: Non-uniform pixel ratio (%s). %s'
2584 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2585 else:
62cd676c
PH
2586 assert fixup_policy in ('ignore', 'never')
2587
3089bc74 2588 if (info_dict.get('requested_formats') is None
6b591b29 2589 and info_dict.get('container') == 'm4a_dash'
2590 and info_dict.get('ext') == 'm4a'):
62cd676c 2591 if fixup_policy == 'warn':
d1e4a464
S
2592 self.report_warning(
2593 '%s: writing DASH m4a. '
2594 'Only some players support this container.'
2595 % info_dict['id'])
62cd676c
PH
2596 elif fixup_policy == 'detect_or_warn':
2597 fixup_pp = FFmpegFixupM4aPP(self)
2598 if fixup_pp.available:
62cd676c
PH
2599 info_dict['__postprocessors'].append(fixup_pp)
2600 else:
2601 self.report_warning(
d1e4a464
S
2602 '%s: writing DASH m4a. '
2603 'Only some players support this container. %s'
2604 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2605 else:
2606 assert fixup_policy in ('ignore', 'never')
6271f1ca 2607
0a473f2f 2608 if ('protocol' in info_dict
2609 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2610 if fixup_policy == 'warn':
a02682fd 2611 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2612 info_dict['id']))
2613 elif fixup_policy == 'detect_or_warn':
2614 fixup_pp = FFmpegFixupM3u8PP(self)
2615 if fixup_pp.available:
f17f8651 2616 info_dict['__postprocessors'].append(fixup_pp)
2617 else:
2618 self.report_warning(
a02682fd 2619 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2620 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2621 else:
2622 assert fixup_policy in ('ignore', 'never')
2623
8222d8de 2624 try:
23c1a667 2625 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2626 except PostProcessingError as err:
2627 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2628 return
ab8e5e51
AM
2629 try:
2630 for ph in self._post_hooks:
23c1a667 2631 ph(info_dict['filepath'])
ab8e5e51
AM
2632 except Exception as err:
2633 self.report_error('post hooks: %s' % str(err))
2634 return
2d30509f 2635 must_record_download_archive = True
2636
2637 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2638 self.record_download_archive(info_dict)
c3e6ffba 2639 max_downloads = self.params.get('max_downloads')
2640 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2641 raise MaxDownloadsReached()
8222d8de
JMF
2642
2643 def download(self, url_list):
2644 """Download a given list of URLs."""
de6000d9 2645 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2646 if (len(url_list) > 1
2647 and outtmpl != '-'
2648 and '%' not in outtmpl
2649 and self.params.get('max_downloads') != 1):
acd69589 2650 raise SameFileError(outtmpl)
8222d8de
JMF
2651
2652 for url in url_list:
2653 try:
5f6a1245 2654 # It also downloads the videos
61aa5ba3
S
2655 res = self.extract_info(
2656 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2657 except UnavailableVideoError:
6febd1c1 2658 self.report_error('unable to download video')
8222d8de 2659 except MaxDownloadsReached:
8b0d7497 2660 self.to_screen('[info] Maximum number of downloaded files reached')
2661 raise
2662 except ExistingVideoReached:
d83cb531 2663 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2664 raise
2665 except RejectedVideoReached:
d83cb531 2666 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2667 raise
63e0be34
PH
2668 else:
2669 if self.params.get('dump_single_json', False):
277d6ff5 2670 self.post_extract(res)
75d43ca0 2671 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2672
2673 return self._download_retcode
2674
1dcc4c0c 2675 def download_with_info_file(self, info_filename):
31bd3925
JMF
2676 with contextlib.closing(fileinput.FileInput(
2677 [info_filename], mode='r',
2678 openhook=fileinput.hook_encoded('utf-8'))) as f:
2679 # FileInput doesn't have a read method, we can't call json.load
498f5606 2680 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2681 try:
2682 self.process_ie_result(info, download=True)
498f5606 2683 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2684 webpage_url = info.get('webpage_url')
2685 if webpage_url is not None:
6febd1c1 2686 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2687 return self.download([webpage_url])
2688 else:
2689 raise
2690 return self._download_retcode
1dcc4c0c 2691
cb202fd2 2692 @staticmethod
75d43ca0 2693 def filter_requested_info(info_dict, actually_filter=True):
2694 if not actually_filter:
394dcd44 2695 info_dict['epoch'] = int(time.time())
75d43ca0 2696 return info_dict
5226731e 2697 exceptions = {
498f5606 2698 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
5226731e 2699 'keep': ['_type'],
2700 }
2701 keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2702 filter_fn = lambda obj: (
a515a78d 2703 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2704 else obj if not isinstance(obj, dict)
2705 else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
5226731e 2706 return filter_fn(info_dict)
cb202fd2 2707
dcf64d43 2708 def run_pp(self, pp, infodict):
5bfa4862 2709 files_to_delete = []
dcf64d43 2710 if '__files_to_move' not in infodict:
2711 infodict['__files_to_move'] = {}
af819c21 2712 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2713 if not files_to_delete:
dcf64d43 2714 return infodict
5bfa4862 2715
2716 if self.params.get('keepvideo', False):
2717 for f in files_to_delete:
dcf64d43 2718 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2719 else:
2720 for old_filename in set(files_to_delete):
2721 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2722 try:
2723 os.remove(encodeFilename(old_filename))
2724 except (IOError, OSError):
2725 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2726 if old_filename in infodict['__files_to_move']:
2727 del infodict['__files_to_move'][old_filename]
2728 return infodict
5bfa4862 2729
277d6ff5 2730 @staticmethod
2731 def post_extract(info_dict):
2732 def actual_post_extract(info_dict):
2733 if info_dict.get('_type') in ('playlist', 'multi_video'):
2734 for video_dict in info_dict.get('entries', {}):
b050d210 2735 actual_post_extract(video_dict or {})
277d6ff5 2736 return
2737
2738 if '__post_extractor' not in info_dict:
2739 return
2740 post_extractor = info_dict['__post_extractor']
2741 if post_extractor:
2742 info_dict.update(post_extractor().items())
2743 del info_dict['__post_extractor']
2744 return
2745
b050d210 2746 actual_post_extract(info_dict or {})
277d6ff5 2747
56d868db 2748 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2749 info = dict(ie_info)
56d868db 2750 info['__files_to_move'] = files_to_move or {}
2751 for pp in self._pps[key]:
dcf64d43 2752 info = self.run_pp(pp, info)
56d868db 2753 return info, info.pop('__files_to_move', None)
5bfa4862 2754
dcf64d43 2755 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2756 """Run all the postprocessors on the given file."""
2757 info = dict(ie_info)
2758 info['filepath'] = filename
dcf64d43 2759 info['__files_to_move'] = files_to_move or {}
0202b52a 2760
56d868db 2761 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2762 info = self.run_pp(pp, info)
2763 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2764 del info['__files_to_move']
56d868db 2765 for pp in self._pps['after_move']:
dcf64d43 2766 info = self.run_pp(pp, info)
23c1a667 2767 return info
c1c9a79c 2768
5db07df6 2769 def _make_archive_id(self, info_dict):
e9fef7ee
S
2770 video_id = info_dict.get('id')
2771 if not video_id:
2772 return
5db07df6
PH
2773 # Future-proof against any change in case
2774 # and backwards compatibility with prior versions
e9fef7ee 2775 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2776 if extractor is None:
1211bb6d
S
2777 url = str_or_none(info_dict.get('url'))
2778 if not url:
2779 return
e9fef7ee
S
2780 # Try to find matching extractor for the URL and take its ie_key
2781 for ie in self._ies:
1211bb6d 2782 if ie.suitable(url):
e9fef7ee
S
2783 extractor = ie.ie_key()
2784 break
2785 else:
2786 return
d0757229 2787 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2788
2789 def in_download_archive(self, info_dict):
2790 fn = self.params.get('download_archive')
2791 if fn is None:
2792 return False
2793
2794 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2795 if not vid_id:
7012b23c 2796 return False # Incomplete video information
5db07df6 2797
a45e8619 2798 return vid_id in self.archive
c1c9a79c
PH
2799
2800 def record_download_archive(self, info_dict):
2801 fn = self.params.get('download_archive')
2802 if fn is None:
2803 return
5db07df6
PH
2804 vid_id = self._make_archive_id(info_dict)
2805 assert vid_id
c1c9a79c 2806 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2807 archive_file.write(vid_id + '\n')
a45e8619 2808 self.archive.add(vid_id)
dd82ffea 2809
8c51aa65 2810 @staticmethod
8abeeb94 2811 def format_resolution(format, default='unknown'):
fb04e403
PH
2812 if format.get('vcodec') == 'none':
2813 return 'audio only'
f49d89ee
PH
2814 if format.get('resolution') is not None:
2815 return format['resolution']
35615307
DA
2816 if format.get('width') and format.get('height'):
2817 res = '%dx%d' % (format['width'], format['height'])
2818 elif format.get('height'):
2819 res = '%sp' % format['height']
2820 elif format.get('width'):
388ae76b 2821 res = '%dx?' % format['width']
8c51aa65 2822 else:
8abeeb94 2823 res = default
8c51aa65
JMF
2824 return res
2825
c57f7757
PH
2826 def _format_note(self, fdict):
2827 res = ''
2828 if fdict.get('ext') in ['f4f', 'f4m']:
2829 res += '(unsupported) '
32f90364
PH
2830 if fdict.get('language'):
2831 if res:
2832 res += ' '
9016d76f 2833 res += '[%s] ' % fdict['language']
c57f7757
PH
2834 if fdict.get('format_note') is not None:
2835 res += fdict['format_note'] + ' '
2836 if fdict.get('tbr') is not None:
2837 res += '%4dk ' % fdict['tbr']
2838 if fdict.get('container') is not None:
2839 if res:
2840 res += ', '
2841 res += '%s container' % fdict['container']
3089bc74
S
2842 if (fdict.get('vcodec') is not None
2843 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2844 if res:
2845 res += ', '
2846 res += fdict['vcodec']
91c7271a 2847 if fdict.get('vbr') is not None:
c57f7757
PH
2848 res += '@'
2849 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2850 res += 'video@'
2851 if fdict.get('vbr') is not None:
2852 res += '%4dk' % fdict['vbr']
fbb21cf5 2853 if fdict.get('fps') is not None:
5d583bdf
S
2854 if res:
2855 res += ', '
2856 res += '%sfps' % fdict['fps']
c57f7757
PH
2857 if fdict.get('acodec') is not None:
2858 if res:
2859 res += ', '
2860 if fdict['acodec'] == 'none':
2861 res += 'video only'
2862 else:
2863 res += '%-5s' % fdict['acodec']
2864 elif fdict.get('abr') is not None:
2865 if res:
2866 res += ', '
2867 res += 'audio'
2868 if fdict.get('abr') is not None:
2869 res += '@%3dk' % fdict['abr']
2870 if fdict.get('asr') is not None:
2871 res += ' (%5dHz)' % fdict['asr']
2872 if fdict.get('filesize') is not None:
2873 if res:
2874 res += ', '
2875 res += format_bytes(fdict['filesize'])
9732d77e
PH
2876 elif fdict.get('filesize_approx') is not None:
2877 if res:
2878 res += ', '
2879 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2880 return res
91c7271a 2881
76d321f6 2882 def _format_note_table(self, f):
2883 def join_fields(*vargs):
2884 return ', '.join((val for val in vargs if val != ''))
2885
2886 return join_fields(
2887 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2888 format_field(f, 'language', '[%s]'),
2889 format_field(f, 'format_note'),
2890 format_field(f, 'container', ignore=(None, f.get('ext'))),
2891 format_field(f, 'asr', '%5dHz'))
2892
c57f7757 2893 def list_formats(self, info_dict):
94badb25 2894 formats = info_dict.get('formats', [info_dict])
76d321f6 2895 new_format = self.params.get('listformats_table', False)
2896 if new_format:
2897 table = [
2898 [
2899 format_field(f, 'format_id'),
2900 format_field(f, 'ext'),
2901 self.format_resolution(f),
2902 format_field(f, 'fps', '%d'),
2903 '|',
2904 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2905 format_field(f, 'tbr', '%4dk'),
52a8a1e1 2906 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 2907 '|',
2908 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2909 format_field(f, 'vbr', '%4dk'),
2910 format_field(f, 'acodec', default='unknown').replace('none', ''),
2911 format_field(f, 'abr', '%3dk'),
2912 format_field(f, 'asr', '%5dHz'),
2913 self._format_note_table(f)]
2914 for f in formats
2915 if f.get('preference') is None or f['preference'] >= -1000]
2916 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2917 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2918 else:
2919 table = [
2920 [
2921 format_field(f, 'format_id'),
2922 format_field(f, 'ext'),
2923 self.format_resolution(f),
2924 self._format_note(f)]
2925 for f in formats
2926 if f.get('preference') is None or f['preference'] >= -1000]
2927 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2928
cfb56d1a 2929 self.to_screen(
76d321f6 2930 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2931 header_line,
2932 table,
2933 delim=new_format,
2934 extraGap=(0 if new_format else 1),
2935 hideEmpty=new_format)))
cfb56d1a
PH
2936
2937 def list_thumbnails(self, info_dict):
2938 thumbnails = info_dict.get('thumbnails')
2939 if not thumbnails:
b7b72db9 2940 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2941 return
cfb56d1a
PH
2942
2943 self.to_screen(
2944 '[info] Thumbnails for %s:' % info_dict['id'])
2945 self.to_screen(render_table(
2946 ['ID', 'width', 'height', 'URL'],
2947 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2948
360e1ca5 2949 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2950 if not subtitles:
360e1ca5 2951 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2952 return
a504ced0 2953 self.to_screen(
edab9dbf
JMF
2954 'Available %s for %s:' % (name, video_id))
2955 self.to_screen(render_table(
2956 ['Language', 'formats'],
2957 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2958 for lang, formats in subtitles.items()]))
a504ced0 2959
dca08720
PH
2960 def urlopen(self, req):
2961 """ Start an HTTP download """
82d8a8b6 2962 if isinstance(req, compat_basestring):
67dda517 2963 req = sanitized_Request(req)
19a41fc6 2964 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2965
2966 def print_debug_header(self):
2967 if not self.params.get('verbose'):
2968 return
62fec3b2 2969
4192b51c 2970 if type('') is not compat_str:
067aa17e 2971 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2972 self.report_warning(
2973 'Your Python is broken! Update to a newer and supported version')
2974
c6afed48
PH
2975 stdout_encoding = getattr(
2976 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2977 encoding_str = (
734f90bb
PH
2978 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2979 locale.getpreferredencoding(),
2980 sys.getfilesystemencoding(),
c6afed48 2981 stdout_encoding,
b0472057 2982 self.get_encoding()))
4192b51c 2983 write_string(encoding_str, encoding=None)
734f90bb 2984
e5813e53 2985 source = (
2986 '(exe)' if hasattr(sys, 'frozen')
2987 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2988 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2989 else '')
2990 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 2991 if _LAZY_LOADER:
f74980cb 2992 self._write_string('[debug] Lazy loading extractors enabled\n')
2993 if _PLUGIN_CLASSES:
2994 self._write_string(
2995 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
dca08720
PH
2996 try:
2997 sp = subprocess.Popen(
2998 ['git', 'rev-parse', '--short', 'HEAD'],
2999 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3000 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3001 out, err = process_communicate_or_kill(sp)
dca08720
PH
3002 out = out.decode().strip()
3003 if re.match('[0-9a-f]+', out):
f74980cb 3004 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3005 except Exception:
dca08720
PH
3006 try:
3007 sys.exc_clear()
70a1165b 3008 except Exception:
dca08720 3009 pass
b300cda4
S
3010
3011 def python_implementation():
3012 impl_name = platform.python_implementation()
3013 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3014 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3015 return impl_name
3016
e5813e53 3017 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3018 platform.python_version(),
3019 python_implementation(),
3020 platform.architecture()[0],
b300cda4 3021 platform_name()))
d28b5171 3022
73fac4e9 3023 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3024 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3025 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3026 exe_str = ', '.join(
3027 '%s %s' % (exe, v)
3028 for exe, v in sorted(exe_versions.items())
3029 if v
3030 )
3031 if not exe_str:
3032 exe_str = 'none'
3033 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3034
3035 proxy_map = {}
3036 for handler in self._opener.handlers:
3037 if hasattr(handler, 'proxies'):
3038 proxy_map.update(handler.proxies)
734f90bb 3039 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3040
58b1f00d
PH
3041 if self.params.get('call_home', False):
3042 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3043 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3044 return
58b1f00d
PH
3045 latest_version = self.urlopen(
3046 'https://yt-dl.org/latest/version').read().decode('utf-8')
3047 if version_tuple(latest_version) > version_tuple(__version__):
3048 self.report_warning(
3049 'You are using an outdated version (newest version: %s)! '
3050 'See https://yt-dl.org/update if you need help updating.' %
3051 latest_version)
3052
e344693b 3053 def _setup_opener(self):
6ad14cab 3054 timeout_val = self.params.get('socket_timeout')
19a41fc6 3055 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3056
dca08720
PH
3057 opts_cookiefile = self.params.get('cookiefile')
3058 opts_proxy = self.params.get('proxy')
3059
3060 if opts_cookiefile is None:
3061 self.cookiejar = compat_cookiejar.CookieJar()
3062 else:
590bc6f6 3063 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 3064 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 3065 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 3066 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 3067
6a3f4c3f 3068 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3069 if opts_proxy is not None:
3070 if opts_proxy == '':
3071 proxies = {}
3072 else:
3073 proxies = {'http': opts_proxy, 'https': opts_proxy}
3074 else:
3075 proxies = compat_urllib_request.getproxies()
067aa17e 3076 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3077 if 'http' in proxies and 'https' not in proxies:
3078 proxies['https'] = proxies['http']
91410c9b 3079 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3080
3081 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3082 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3083 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3084 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3085 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3086
3087 # When passing our own FileHandler instance, build_opener won't add the
3088 # default FileHandler and allows us to disable the file protocol, which
3089 # can be used for malicious purposes (see
067aa17e 3090 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3091 file_handler = compat_urllib_request.FileHandler()
3092
3093 def file_open(*args, **kwargs):
7a5c1cfe 3094 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3095 file_handler.file_open = file_open
3096
3097 opener = compat_urllib_request.build_opener(
fca6dba8 3098 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3099
dca08720
PH
3100 # Delete the default user-agent header, which would otherwise apply in
3101 # cases where our custom HTTP handler doesn't come into play
067aa17e 3102 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3103 opener.addheaders = []
3104 self._opener = opener
62fec3b2
PH
3105
3106 def encode(self, s):
3107 if isinstance(s, bytes):
3108 return s # Already encoded
3109
3110 try:
3111 return s.encode(self.get_encoding())
3112 except UnicodeEncodeError as err:
3113 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3114 raise
3115
3116 def get_encoding(self):
3117 encoding = self.params.get('encoding')
3118 if encoding is None:
3119 encoding = preferredencoding()
3120 return encoding
ec82d85a 3121
de6000d9 3122 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3123 write_all = self.params.get('write_all_thumbnails', False)
3124 thumbnails = []
3125 if write_all or self.params.get('writethumbnail', False):
0202b52a 3126 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3127 multiple = write_all and len(thumbnails) > 1
ec82d85a 3128
0202b52a 3129 ret = []
6c4fd172 3130 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3131 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3132 suffix = '%s.' % t['id'] if multiple else ''
3133 thumb_display_id = '%s ' % t['id'] if multiple else ''
dcf64d43 3134 t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3135
0c3d0f51 3136 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3137 ret.append(suffix + thumb_ext)
ec82d85a
PH
3138 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3139 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3140 else:
5ef7d9bd 3141 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3142 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3143 try:
3144 uf = self.urlopen(t['url'])
d3d89c32 3145 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3146 shutil.copyfileobj(uf, thumbf)
de6000d9 3147 ret.append(suffix + thumb_ext)
ec82d85a
PH
3148 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3149 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3158150c 3150 except network_exceptions as err:
ec82d85a 3151 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3152 (t['url'], error_to_compat_str(err)))
6c4fd172 3153 if ret and not write_all:
3154 break
0202b52a 3155 return ret