]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
[version] update
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
de6000d9 52 OUTTMPL_TYPES,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
9b9c5355 61 error_to_compat_str,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
ce02ed60 73 locked_file,
0202b52a 74 make_dir,
dca08720 75 make_HTTPS_handler,
ce02ed60 76 MaxDownloadsReached,
cd6fc19e 77 orderedSet,
b7ab0590 78 PagedList,
083c9df9 79 parse_filesize,
91410c9b 80 PerRequestProxyHandler,
dca08720 81 platform_name,
eedb7ba5 82 PostProcessingError,
ce02ed60 83 preferredencoding,
eedb7ba5 84 prepend_extension,
51fb4995 85 register_socks_protocols,
cfb56d1a 86 render_table,
eedb7ba5 87 replace_extension,
8b0d7497 88 RejectedVideoReached,
ce02ed60
PH
89 SameFileError,
90 sanitize_filename,
1bb5c511 91 sanitize_path,
dcf77cf1 92 sanitize_url,
67dda517 93 sanitized_Request,
e5660ee6 94 std_headers,
1211bb6d 95 str_or_none,
e29663c6 96 strftime_or_none,
ce02ed60 97 subtitles_filename,
732044af 98 to_high_limit_path,
ce02ed60 99 UnavailableVideoError,
29eb5174 100 url_basename,
58b1f00d 101 version_tuple,
ce02ed60
PH
102 write_json_file,
103 write_string,
1bab3437 104 YoutubeDLCookieJar,
6a3f4c3f 105 YoutubeDLCookieProcessor,
dca08720 106 YoutubeDLHandler,
fca6dba8 107 YoutubeDLRedirectHandler,
f5b1bca9 108 process_communicate_or_kill,
ce02ed60 109)
a0e07d31 110from .cache import Cache
f74980cb 111from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
4c54b89e 112from .extractor.openload import PhantomJSwrapper
3bc2ddcc 113from .downloader import get_suitable_downloader
4c83c967 114from .downloader.rtmp import rtmpdump_version
4f026faf 115from .postprocessor import (
f17f8651 116 FFmpegFixupM3u8PP,
62cd676c 117 FFmpegFixupM4aPP,
6271f1ca 118 FFmpegFixupStretchedPP,
4f026faf
PH
119 FFmpegMergerPP,
120 FFmpegPostProcessor,
0202b52a 121 # FFmpegSubtitlesConvertorPP,
4f026faf 122 get_postprocessor,
0202b52a 123 MoveFilesAfterDownloadPP,
4f026faf 124)
dca08720 125from .version import __version__
8222d8de 126
e9c0cdd3
YCH
127if compat_os_name == 'nt':
128 import ctypes
129
2459b6e1 130
8222d8de
JMF
131class YoutubeDL(object):
132 """YoutubeDL class.
133
134 YoutubeDL objects are the ones responsible of downloading the
135 actual video file and writing it to disk if the user has requested
136 it, among some other tasks. In most cases there should be one per
137 program. As, given a video URL, the downloader doesn't know how to
138 extract all the needed information, task that InfoExtractors do, it
139 has to pass the URL to one of them.
140
141 For this, YoutubeDL objects have a method that allows
142 InfoExtractors to be registered in a given order. When it is passed
143 a URL, the YoutubeDL object handles it to the first InfoExtractor it
144 finds that reports being able to handle it. The InfoExtractor extracts
145 all the information about the video or videos the URL refers to, and
146 YoutubeDL process the extracted information, possibly using a File
147 Downloader to download the video.
148
149 YoutubeDL objects accept a lot of parameters. In order not to saturate
150 the object constructor with arguments, it receives a dictionary of
151 options instead. These options are available through the params
152 attribute for the InfoExtractors to use. The YoutubeDL also
153 registers itself as the downloader in charge for the InfoExtractors
154 that are added to it, so this is a "mutual registration".
155
156 Available options:
157
158 username: Username for authentication purposes.
159 password: Password for authentication purposes.
180940e0 160 videopassword: Password for accessing a video.
1da50aa3
S
161 ap_mso: Adobe Pass multiple-system operator identifier.
162 ap_username: Multiple-system operator account username.
163 ap_password: Multiple-system operator account password.
8222d8de
JMF
164 usenetrc: Use netrc for authentication instead.
165 verbose: Print additional info to stdout.
166 quiet: Do not print messages to stdout.
ad8915b7 167 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
168 forceurl: Force printing final URL.
169 forcetitle: Force printing title.
170 forceid: Force printing ID.
171 forcethumbnail: Force printing thumbnail URL.
172 forcedescription: Force printing description.
173 forcefilename: Force printing final filename.
525ef922 174 forceduration: Force printing duration.
8694c600 175 forcejson: Force printing info_dict as JSON.
63e0be34
PH
176 dump_single_json: Force printing the info_dict of the whole playlist
177 (or video) as a single JSON line.
c25228e5 178 force_write_download_archive: Force writing download archive regardless
179 of 'skip_download' or 'simulate'.
8222d8de 180 simulate: Do not download the video files.
eb8a4433 181 format: Video format code. see "FORMAT SELECTION" for more details.
c25228e5 182 format_sort: How to sort the video formats. see "Sorting Formats"
183 for more details.
184 format_sort_force: Force the given format_sort. see "Sorting Formats"
185 for more details.
186 allow_multiple_video_streams: Allow multiple video streams to be merged
187 into a single file
188 allow_multiple_audio_streams: Allow multiple audio streams to be merged
189 into a single file
de6000d9 190 outtmpl: Dictionary of templates for output names. Allowed keys
191 are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
a820dc72
RA
192 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
193 restrictfilenames: Do not allow "&" and spaces in file names
194 trim_file_name: Limit length of filename (extension excluded)
195 ignoreerrors: Do not stop on download errors
196 (Default True when running youtube-dlc,
197 but False when directly accessing YoutubeDL class)
d22dec74 198 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 199 overwrites: Overwrite all video and metadata files if True,
200 overwrite only non-video files if None
201 and don't overwrite any file if False
8222d8de
JMF
202 playliststart: Playlist item to start at.
203 playlistend: Playlist item to end at.
c14e88f0 204 playlist_items: Specific indices of playlist to download.
ff815fe6 205 playlistreverse: Download playlist items in reverse order.
75822ca7 206 playlistrandom: Download playlist items in random order.
8222d8de
JMF
207 matchtitle: Download only matching titles.
208 rejecttitle: Reject downloads for matching titles.
8bf9319e 209 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
210 logtostderr: Log messages to stderr instead of stdout.
211 writedescription: Write the video description to a .description file
212 writeinfojson: Write the video description to a .info.json file
06167fbb 213 writecomments: Extract video comments. This will not be written to disk
214 unless writeinfojson is also given
1fb07d10 215 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 216 writethumbnail: Write the thumbnail image to a file
c25228e5 217 allow_playlist_files: Whether to write playlists' description, infojson etc
218 also to disk when using the 'write*' options
ec82d85a 219 write_all_thumbnails: Write all thumbnail formats to files
732044af 220 writelink: Write an internet shortcut file, depending on the
221 current platform (.url/.webloc/.desktop)
222 writeurllink: Write a Windows internet shortcut file (.url)
223 writewebloclink: Write a macOS internet shortcut file (.webloc)
224 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 225 writesubtitles: Write the video subtitles to a file
741dd8ea 226 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 227 allsubtitles: Downloads all the subtitles of the video
0b7f3118 228 (requires writesubtitles or writeautomaticsub)
8222d8de 229 listsubtitles: Lists all available subtitles for the video
a504ced0 230 subtitlesformat: The format code for subtitles
aa6a10c4 231 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
232 keepvideo: Keep the video file after post-processing
233 daterange: A DateRange object, download only if the upload_date is in the range.
234 skip_download: Skip the actual download of the video file
c35f9e72 235 cachedir: Location of the cache files in the filesystem.
a0e07d31 236 False to disable filesystem cache.
47192f92 237 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
238 age_limit: An integer representing the user's age in years.
239 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
240 min_views: An integer representing the minimum view count the video
241 must have in order to not be skipped.
242 Videos without view count information are always
243 downloaded. None for no limit.
244 max_views: An integer representing the maximum view count.
245 Videos that are more popular than that are not
246 downloaded.
247 Videos without view count information are always
248 downloaded. None for no limit.
249 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
250 Videos already present in the file are not downloaded
251 again.
8a51f564 252 break_on_existing: Stop the download process after attempting to download a
253 file that is in the archive.
254 break_on_reject: Stop the download process when encountering a video that
255 has been filtered out.
256 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 257 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
258 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
259 At the moment, this is only supported by YouTube.
a1ee09e8 260 proxy: URL of the proxy server to use
38cce791 261 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 262 on geo-restricted sites.
e344693b 263 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
264 bidi_workaround: Work around buggy terminals without bidirectional text
265 support, using fridibi
a0ddb8a2 266 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 267 include_ads: Download ads as well
04b4d394
PH
268 default_search: Prepend this string if an input url is not valid.
269 'auto' for elaborate guessing
62fec3b2 270 encoding: Use this encoding instead of the system-specified.
e8ee972c 271 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
272 Pass in 'in_playlist' to only show this behavior for
273 playlist items.
4f026faf 274 postprocessors: A list of dictionaries, each with an entry
71b640cc 275 * key: The name of the postprocessor. See
cefecac1 276 youtube_dlc/postprocessor/__init__.py for a list.
0202b52a 277 * _after_move: Optional. If True, run this post_processor
278 after 'MoveFilesAfterDownload'
4f026faf
PH
279 as well as any further keyword arguments for the
280 postprocessor.
ab8e5e51
AM
281 post_hooks: A list of functions that get called as the final step
282 for each video file, after all postprocessors have been
283 called. The filename will be passed as the only argument.
71b640cc
PH
284 progress_hooks: A list of functions that get called on download
285 progress, with a dictionary with the entries
5cda4eda 286 * status: One of "downloading", "error", or "finished".
ee69b99a 287 Check this first and ignore unknown values.
71b640cc 288
5cda4eda 289 If status is one of "downloading", or "finished", the
ee69b99a
PH
290 following properties may also be present:
291 * filename: The final filename (always present)
5cda4eda 292 * tmpfilename: The filename we're currently writing to
71b640cc
PH
293 * downloaded_bytes: Bytes on disk
294 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
295 * total_bytes_estimate: Guess of the eventual file size,
296 None if unavailable.
297 * elapsed: The number of seconds since download started.
71b640cc
PH
298 * eta: The estimated time in seconds, None if unknown
299 * speed: The download speed in bytes/second, None if
300 unknown
5cda4eda
PH
301 * fragment_index: The counter of the currently
302 downloaded video fragment.
303 * fragment_count: The number of fragments (= individual
304 files that will be merged)
71b640cc
PH
305
306 Progress hooks are guaranteed to be called at least once
307 (with status "finished") if the download is successful.
45598f15 308 merge_output_format: Extension to use when merging formats.
6b591b29 309 final_ext: Expected final extension; used to detect when the file was
310 already downloaded and converted. "merge_output_format" is
311 replaced by this extension when given
6271f1ca
PH
312 fixup: Automatically correct known faults of the file.
313 One of:
314 - "never": do nothing
315 - "warn": only emit a warning
316 - "detect_or_warn": check whether we can do anything
62cd676c 317 about it, warn otherwise (default)
504f20dd 318 source_address: Client-side IP address to bind to.
6ec6cb4e 319 call_home: Boolean, true iff we are allowed to contact the
cefecac1 320 youtube-dlc servers for debugging.
7aa589a5
S
321 sleep_interval: Number of seconds to sleep before each download when
322 used alone or a lower bound of a range for randomized
323 sleep before each download (minimum possible number
324 of seconds to sleep) when used along with
325 max_sleep_interval.
326 max_sleep_interval:Upper bound of a range for randomized sleep before each
327 download (maximum possible number of seconds to sleep).
328 Must only be used along with sleep_interval.
329 Actual sleep time will be a random float from range
330 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
331 listformats: Print an overview of available video formats and exit.
332 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
333 match_filter: A function that gets called with the info_dict of
334 every video.
335 If it returns a message, the video is ignored.
336 If it returns None, the video is downloaded.
337 match_filter_func in utils.py is one example for this.
7e5db8c9 338 no_color: Do not emit color codes in output.
0a840f58 339 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 340 HTTP header
0a840f58 341 geo_bypass_country:
773f291d
S
342 Two-letter ISO 3166-2 country code that will be used for
343 explicit geographic restriction bypassing via faking
504f20dd 344 X-Forwarded-For HTTP header
5f95927a
S
345 geo_bypass_ip_block:
346 IP range in CIDR notation that will be used similarly to
504f20dd 347 geo_bypass_country
71b640cc 348
85729c51
PH
349 The following options determine which downloader is picked:
350 external_downloader: Executable of the external downloader to call.
351 None or unset for standard (built-in) downloader.
bf09af3a
S
352 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
353 if True, otherwise use ffmpeg/avconv if False, otherwise
354 use downloader suggested by extractor if None.
fe7e0c98 355
8222d8de 356 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 357 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 358 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 359 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
360 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
361 http_chunk_size.
76b1bd67
JMF
362
363 The following options are used by the post processors:
d4a24f40 364 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 365 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
366 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
367 to the binary or its containing directory.
43820c03 368 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
369 and a list of additional command-line arguments for the
370 postprocessor/executable. The dict can also have "PP+EXE" keys
371 which are used when the given exe is used by the given PP.
372 Use 'default' as the name for arguments to passed to all PP
3600fd59
S
373 The following options are used by the Youtube extractor:
374 youtube_include_dash_manifest: If True (default), DASH manifests and related
375 data will be downloaded and processed by extractor.
376 You can reduce network I/O by disabling it if you don't
377 care about DASH.
8222d8de
JMF
378 """
379
c9969434
S
380 _NUMERIC_FIELDS = set((
381 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
382 'timestamp', 'upload_year', 'upload_month', 'upload_day',
383 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
384 'average_rating', 'comment_count', 'age_limit',
385 'start_time', 'end_time',
386 'chapter_number', 'season_number', 'episode_number',
387 'track_number', 'disc_number', 'release_year',
388 'playlist_index',
389 ))
390
8222d8de
JMF
391 params = None
392 _ies = []
5bfa4862 393 _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 394 __prepare_filename_warned = False
8222d8de
JMF
395 _download_retcode = None
396 _num_downloads = None
30a074c2 397 _playlist_level = 0
398 _playlist_urls = set()
8222d8de
JMF
399 _screen_file = None
400
3511266b 401 def __init__(self, params=None, auto_init=True):
8222d8de 402 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
403 if params is None:
404 params = {}
8222d8de 405 self._ies = []
56c73665 406 self._ies_instances = {}
5bfa4862 407 self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 408 self.__prepare_filename_warned = False
ab8e5e51 409 self._post_hooks = []
933605d7 410 self._progress_hooks = []
8222d8de
JMF
411 self._download_retcode = 0
412 self._num_downloads = 0
413 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 414 self._err_file = sys.stderr
4abf617b
S
415 self.params = {
416 # Default parameters
417 'nocheckcertificate': False,
418 }
419 self.params.update(params)
a0e07d31 420 self.cache = Cache(self)
a45e8619 421 self.archive = set()
ecdec191
JB
422
423 """Preload the archive, if any is specified"""
424 def preload_download_archive(self):
425 fn = self.params.get('download_archive')
426 if fn is None:
427 return False
428 try:
429 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
430 for line in archive_file:
a45e8619 431 self.archive.add(line.strip())
ecdec191
JB
432 except IOError as ioe:
433 if ioe.errno != errno.ENOENT:
434 raise
1d74d8d9 435 return False
ecdec191 436 return True
34308b30 437
be5df5ee
S
438 def check_deprecated(param, option, suggestion):
439 if self.params.get(param) is not None:
440 self.report_warning(
441 '%s is deprecated. Use %s instead.' % (option, suggestion))
442 return True
443 return False
444
1de7ea76
JB
445 if self.params.get('verbose'):
446 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
447
ecdec191
JB
448 preload_download_archive(self)
449
be5df5ee 450 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
451 if self.params.get('geo_verification_proxy') is None:
452 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
453
6b591b29 454 if self.params.get('final_ext'):
455 if self.params.get('merge_output_format'):
456 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
457 self.params['merge_output_format'] = self.params['final_ext']
458
b9d973be 459 if 'overwrites' in self.params and self.params['overwrites'] is None:
460 del self.params['overwrites']
461
be5df5ee
S
462 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
463 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
464 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
465
0783b09b 466 if params.get('bidi_workaround', False):
1c088fa8
PH
467 try:
468 import pty
469 master, slave = pty.openpty()
003c69a8 470 width = compat_get_terminal_size().columns
1c088fa8
PH
471 if width is None:
472 width_args = []
473 else:
474 width_args = ['-w', str(width)]
5d681e96 475 sp_kwargs = dict(
1c088fa8
PH
476 stdin=subprocess.PIPE,
477 stdout=slave,
478 stderr=self._err_file)
5d681e96
PH
479 try:
480 self._output_process = subprocess.Popen(
481 ['bidiv'] + width_args, **sp_kwargs
482 )
483 except OSError:
5d681e96
PH
484 self._output_process = subprocess.Popen(
485 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
486 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 487 except OSError as ose:
66e7ace1 488 if ose.errno == errno.ENOENT:
6febd1c1 489 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
490 else:
491 raise
0783b09b 492
3089bc74
S
493 if (sys.platform != 'win32'
494 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
495 and not params.get('restrictfilenames', False)):
e9137224 496 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 497 self.report_warning(
6febd1c1 498 'Assuming --restrict-filenames since file system encoding '
1b725173 499 'cannot encode all characters. '
6febd1c1 500 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 501 self.params['restrictfilenames'] = True
34308b30 502
de6000d9 503 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 504
dca08720
PH
505 self._setup_opener()
506
3511266b
PH
507 if auto_init:
508 self.print_debug_header()
509 self.add_default_info_extractors()
510
4f026faf
PH
511 for pp_def_raw in self.params.get('postprocessors', []):
512 pp_class = get_postprocessor(pp_def_raw['key'])
513 pp_def = dict(pp_def_raw)
514 del pp_def['key']
5bfa4862 515 if 'when' in pp_def:
516 when = pp_def['when']
517 del pp_def['when']
518 else:
519 when = 'normal'
4f026faf 520 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 521 self.add_post_processor(pp, when=when)
4f026faf 522
ab8e5e51
AM
523 for ph in self.params.get('post_hooks', []):
524 self.add_post_hook(ph)
525
71b640cc
PH
526 for ph in self.params.get('progress_hooks', []):
527 self.add_progress_hook(ph)
528
51fb4995
YCH
529 register_socks_protocols()
530
7d4111ed
PH
531 def warn_if_short_id(self, argv):
532 # short YouTube ID starting with dash?
533 idxs = [
534 i for i, a in enumerate(argv)
535 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
536 if idxs:
537 correct_argv = (
cefecac1 538 ['youtube-dlc']
3089bc74
S
539 + [a for i, a in enumerate(argv) if i not in idxs]
540 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
541 )
542 self.report_warning(
543 'Long argument string detected. '
544 'Use -- to separate parameters and URLs, like this:\n%s\n' %
545 args_to_str(correct_argv))
546
8222d8de
JMF
547 def add_info_extractor(self, ie):
548 """Add an InfoExtractor object to the end of the list."""
549 self._ies.append(ie)
e52d7f85
JMF
550 if not isinstance(ie, type):
551 self._ies_instances[ie.ie_key()] = ie
552 ie.set_downloader(self)
8222d8de 553
56c73665
JMF
554 def get_info_extractor(self, ie_key):
555 """
556 Get an instance of an IE with name ie_key, it will try to get one from
557 the _ies list, if there's no instance it will create a new one and add
558 it to the extractor list.
559 """
560 ie = self._ies_instances.get(ie_key)
561 if ie is None:
562 ie = get_info_extractor(ie_key)()
563 self.add_info_extractor(ie)
564 return ie
565
023fa8c4
JMF
566 def add_default_info_extractors(self):
567 """
568 Add the InfoExtractors returned by gen_extractors to the end of the list
569 """
e52d7f85 570 for ie in gen_extractor_classes():
023fa8c4
JMF
571 self.add_info_extractor(ie)
572
5bfa4862 573 def add_post_processor(self, pp, when='normal'):
8222d8de 574 """Add a PostProcessor object to the end of the chain."""
5bfa4862 575 self._pps[when].append(pp)
8222d8de
JMF
576 pp.set_downloader(self)
577
ab8e5e51
AM
578 def add_post_hook(self, ph):
579 """Add the post hook"""
580 self._post_hooks.append(ph)
581
933605d7
JMF
582 def add_progress_hook(self, ph):
583 """Add the progress hook (currently only for the file downloader)"""
584 self._progress_hooks.append(ph)
8ab470f1 585
1c088fa8 586 def _bidi_workaround(self, message):
5d681e96 587 if not hasattr(self, '_output_channel'):
1c088fa8
PH
588 return message
589
5d681e96 590 assert hasattr(self, '_output_process')
11b85ce6 591 assert isinstance(message, compat_str)
6febd1c1
PH
592 line_count = message.count('\n') + 1
593 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 594 self._output_process.stdin.flush()
6febd1c1 595 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 596 for _ in range(line_count))
6febd1c1 597 return res[:-len('\n')]
1c088fa8 598
8222d8de 599 def to_screen(self, message, skip_eol=False):
0783b09b
PH
600 """Print message to stdout if not in quiet mode."""
601 return self.to_stdout(message, skip_eol, check_quiet=True)
602
734f90bb 603 def _write_string(self, s, out=None):
b58ddb32 604 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 605
0783b09b 606 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 607 """Print message to stdout if not in quiet mode."""
8bf9319e 608 if self.params.get('logger'):
43afe285 609 self.params['logger'].debug(message)
0783b09b 610 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 611 message = self._bidi_workaround(message)
6febd1c1 612 terminator = ['\n', ''][skip_eol]
8222d8de 613 output = message + terminator
1c088fa8 614
734f90bb 615 self._write_string(output, self._screen_file)
8222d8de
JMF
616
617 def to_stderr(self, message):
618 """Print message to stderr."""
11b85ce6 619 assert isinstance(message, compat_str)
8bf9319e 620 if self.params.get('logger'):
43afe285
IB
621 self.params['logger'].error(message)
622 else:
1c088fa8 623 message = self._bidi_workaround(message)
6febd1c1 624 output = message + '\n'
734f90bb 625 self._write_string(output, self._err_file)
8222d8de 626
1e5b9a95
PH
627 def to_console_title(self, message):
628 if not self.params.get('consoletitle', False):
629 return
4bede0d8
C
630 if compat_os_name == 'nt':
631 if ctypes.windll.kernel32.GetConsoleWindow():
632 # c_wchar_p() might not be necessary if `message` is
633 # already of type unicode()
634 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 635 elif 'TERM' in os.environ:
b46696bd 636 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 637
bdde425c
PH
638 def save_console_title(self):
639 if not self.params.get('consoletitle', False):
640 return
94c3442e
S
641 if self.params.get('simulate', False):
642 return
4bede0d8 643 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 644 # Save the title on stack
734f90bb 645 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
646
647 def restore_console_title(self):
648 if not self.params.get('consoletitle', False):
649 return
94c3442e
S
650 if self.params.get('simulate', False):
651 return
4bede0d8 652 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 653 # Restore the title from stack
734f90bb 654 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
655
656 def __enter__(self):
657 self.save_console_title()
658 return self
659
660 def __exit__(self, *args):
661 self.restore_console_title()
f89197d7 662
dca08720 663 if self.params.get('cookiefile') is not None:
1bab3437 664 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 665
8222d8de
JMF
666 def trouble(self, message=None, tb=None):
667 """Determine action to take when a download problem appears.
668
669 Depending on if the downloader has been configured to ignore
670 download errors or not, this method may throw an exception or
671 not when errors are found, after printing the message.
672
673 tb, if given, is additional traceback information.
674 """
675 if message is not None:
676 self.to_stderr(message)
677 if self.params.get('verbose'):
678 if tb is None:
679 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 680 tb = ''
8222d8de 681 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 682 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 683 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
684 else:
685 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 686 tb = ''.join(tb_data)
8222d8de
JMF
687 self.to_stderr(tb)
688 if not self.params.get('ignoreerrors', False):
689 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
690 exc_info = sys.exc_info()[1].exc_info
691 else:
692 exc_info = sys.exc_info()
693 raise DownloadError(message, exc_info)
694 self._download_retcode = 1
695
696 def report_warning(self, message):
697 '''
698 Print the message to stderr, it will be prefixed with 'WARNING:'
699 If stderr is a tty file the 'WARNING:' will be colored
700 '''
6d07ce01
JMF
701 if self.params.get('logger') is not None:
702 self.params['logger'].warning(message)
8222d8de 703 else:
ad8915b7
PH
704 if self.params.get('no_warnings'):
705 return
e9c0cdd3 706 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
707 _msg_header = '\033[0;33mWARNING:\033[0m'
708 else:
709 _msg_header = 'WARNING:'
710 warning_message = '%s %s' % (_msg_header, message)
711 self.to_stderr(warning_message)
8222d8de
JMF
712
713 def report_error(self, message, tb=None):
714 '''
715 Do the same as trouble, but prefixes the message with 'ERROR:', colored
716 in red if stderr is a tty file.
717 '''
e9c0cdd3 718 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 719 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 720 else:
6febd1c1
PH
721 _msg_header = 'ERROR:'
722 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
723 self.trouble(error_message, tb)
724
8222d8de
JMF
725 def report_file_already_downloaded(self, file_name):
726 """Report file has already been fully downloaded."""
727 try:
6febd1c1 728 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 729 except UnicodeEncodeError:
6febd1c1 730 self.to_screen('[download] The file has already been downloaded')
8222d8de 731
0c3d0f51 732 def report_file_delete(self, file_name):
733 """Report that existing file will be deleted."""
734 try:
c25228e5 735 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 736 except UnicodeEncodeError:
c25228e5 737 self.to_screen('Deleting existing file')
0c3d0f51 738
de6000d9 739 def parse_outtmpl(self):
740 outtmpl_dict = self.params.get('outtmpl', {})
741 if not isinstance(outtmpl_dict, dict):
742 outtmpl_dict = {'default': outtmpl_dict}
743 outtmpl_dict.update({
744 k: v for k, v in DEFAULT_OUTTMPL.items()
745 if not outtmpl_dict.get(k)})
746 for key, val in outtmpl_dict.items():
747 if isinstance(val, bytes):
748 self.report_warning(
749 'Parameter outtmpl is bytes, but should be a unicode string. '
750 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
751 return outtmpl_dict
752
753 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de
JMF
754 try:
755 template_dict = dict(info_dict)
756
e29663c6 757 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
758 formatSeconds(info_dict['duration'], '-')
759 if info_dict.get('duration', None) is not None
760 else None)
761
8222d8de
JMF
762 template_dict['epoch'] = int(time.time())
763 autonumber_size = self.params.get('autonumber_size')
764 if autonumber_size is None:
765 autonumber_size = 5
89db639d 766 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
767 if template_dict.get('resolution') is None:
768 if template_dict.get('width') and template_dict.get('height'):
769 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
770 elif template_dict.get('height'):
805ef3c6 771 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 772 elif template_dict.get('width'):
51ce9117 773 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 774
586a91b6 775 sanitize = lambda k, v: sanitize_filename(
45598aab 776 compat_str(v),
1bb5c511 777 restricted=self.params.get('restrictfilenames'),
40df485f 778 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 779 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 780 for k, v in template_dict.items()
f0e14fdd 781 if v is not None and not isinstance(v, (list, tuple, dict)))
e29663c6 782 na = self.params.get('outtmpl_na_placeholder', 'NA')
783 template_dict = collections.defaultdict(lambda: na, template_dict)
8222d8de 784
de6000d9 785 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
786 force_ext = OUTTMPL_TYPES.get(tmpl_type)
d0d9ade4 787
89db639d
S
788 # For fields playlist_index and autonumber convert all occurrences
789 # of %(field)s to %(field)0Nd for backward compatibility
790 field_size_compat_map = {
791 'playlist_index': len(str(template_dict['n_entries'])),
792 'autonumber': autonumber_size,
793 }
794 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
795 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
796 if mobj:
797 outtmpl = re.sub(
798 FIELD_SIZE_COMPAT_RE,
799 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
800 outtmpl)
801
e29663c6 802 # As of [1] format syntax is:
803 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
804 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
805 FORMAT_RE = r'''(?x)
806 (?<!%)
807 %
808 \({0}\) # mapping key
809 (?:[#0\-+ ]+)? # conversion flags (optional)
810 (?:\d+)? # minimum field width (optional)
811 (?:\.\d+)? # precision (optional)
812 [hlL]? # length modifier (optional)
813 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
814 '''
815
816 numeric_fields = list(self._NUMERIC_FIELDS)
817
818 # Format date
819 FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
820 for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
821 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
822 if key in template_dict:
823 continue
824 value = strftime_or_none(template_dict.get(field), frmt, na)
825 if conv_type in 'crs': # string
826 value = sanitize(field, value)
827 else: # number
828 numeric_fields.append(key)
829 value = float_or_none(value, default=None)
830 if value is not None:
831 template_dict[key] = value
832
d0d9ade4
S
833 # Missing numeric fields used together with integer presentation types
834 # in format specification will break the argument substitution since
a820dc72
RA
835 # string NA placeholder is returned for missing fields. We will patch
836 # output template for missing fields to meet string presentation type.
e29663c6 837 for numeric_field in numeric_fields:
d0d9ade4 838 if numeric_field not in template_dict:
d0d9ade4 839 outtmpl = re.sub(
e29663c6 840 FORMAT_RE.format(re.escape(numeric_field)),
d0d9ade4
S
841 r'%({0})s'.format(numeric_field), outtmpl)
842
15da37c7
S
843 # expand_path translates '%%' into '%' and '$$' into '$'
844 # correspondingly that is not what we want since we need to keep
845 # '%%' intact for template dict substitution step. Working around
846 # with boundary-alike separator hack.
961ea474 847 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
848 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
849
850 # outtmpl should be expand_path'ed before template dict substitution
851 # because meta fields may contain env variables we don't want to
852 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
853 # title "Hello $PATH", we don't want `$PATH` to be expanded.
854 filename = expand_path(outtmpl).replace(sep, '') % template_dict
855
de6000d9 856 if force_ext is not None:
857 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
858
bdc3fd2f
U
859 # https://github.com/blackjack4494/youtube-dlc/issues/85
860 trim_file_name = self.params.get('trim_file_name', False)
861 if trim_file_name:
862 fn_groups = filename.rsplit('.')
863 ext = fn_groups[-1]
864 sub_ext = ''
865 if len(fn_groups) > 2:
866 sub_ext = fn_groups[-2]
867 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
868
3a0d2f52
S
869 # Temporary fix for #4787
870 # 'Treat' all problem characters by passing filename through preferredencoding
871 # to workaround encoding issues with subprocess on python2 @ Windows
872 if sys.version_info < (3, 0) and sys.platform == 'win32':
873 filename = encodeFilename(filename, True).decode(preferredencoding())
0202b52a 874 filename = sanitize_path(filename)
875
0202b52a 876 return filename
8222d8de 877 except ValueError as err:
6febd1c1 878 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
879 return None
880
de6000d9 881 def prepare_filename(self, info_dict, dir_type='', warn=False):
882 """Generate the output filename."""
0202b52a 883 paths = self.params.get('paths', {})
884 assert isinstance(paths, dict)
de6000d9 885 filename = self._prepare_filename(info_dict, dir_type or 'default')
886
887 if warn and not self.__prepare_filename_warned:
888 if not paths:
889 pass
890 elif filename == '-':
891 self.report_warning('--paths is ignored when an outputting to stdout')
892 elif os.path.isabs(filename):
893 self.report_warning('--paths is ignored since an absolute path is given in output template')
894 self.__prepare_filename_warned = True
895 if filename == '-' or not filename:
896 return filename
897
0202b52a 898 homepath = expand_path(paths.get('home', '').strip())
899 assert isinstance(homepath, compat_str)
900 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
901 assert isinstance(subdir, compat_str)
902 return sanitize_path(os.path.join(homepath, subdir, filename))
903
442c37b7 904 def _match_entry(self, info_dict, incomplete):
ecdec191 905 """ Returns None if the file should be downloaded """
8222d8de 906
8b0d7497 907 def check_filter():
908 video_title = info_dict.get('title', info_dict.get('id', 'video'))
909 if 'title' in info_dict:
910 # This can happen when we're just evaluating the playlist
911 title = info_dict['title']
912 matchtitle = self.params.get('matchtitle', False)
913 if matchtitle:
914 if not re.search(matchtitle, title, re.IGNORECASE):
915 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
916 rejecttitle = self.params.get('rejecttitle', False)
917 if rejecttitle:
918 if re.search(rejecttitle, title, re.IGNORECASE):
919 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
920 date = info_dict.get('upload_date')
921 if date is not None:
922 dateRange = self.params.get('daterange', DateRange())
923 if date not in dateRange:
924 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
925 view_count = info_dict.get('view_count')
926 if view_count is not None:
927 min_views = self.params.get('min_views')
928 if min_views is not None and view_count < min_views:
929 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
930 max_views = self.params.get('max_views')
931 if max_views is not None and view_count > max_views:
932 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
933 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
934 return 'Skipping "%s" because it is age restricted' % video_title
935 if self.in_download_archive(info_dict):
936 return '%s has already been recorded in archive' % video_title
937
938 if not incomplete:
939 match_filter = self.params.get('match_filter')
940 if match_filter is not None:
941 ret = match_filter(info_dict)
942 if ret is not None:
943 return ret
944 return None
945
946 reason = check_filter()
947 if reason is not None:
948 self.to_screen('[download] ' + reason)
d83cb531 949 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 950 raise ExistingVideoReached()
d83cb531 951 elif self.params.get('break_on_reject', False):
8b0d7497 952 raise RejectedVideoReached()
953 return reason
fe7e0c98 954
b6c45014
JMF
955 @staticmethod
956 def add_extra_info(info_dict, extra_info):
957 '''Set the keys from extra_info in info dict if they are missing'''
958 for key, value in extra_info.items():
959 info_dict.setdefault(key, value)
960
0704d222 961 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 962 process=True, force_generic_extractor=False):
8222d8de
JMF
963 '''
964 Returns a list with a dictionary for each video we find.
965 If 'download', also downloads the videos.
966 extra_info is a dict containing the extra values to add to each result
613b2d9d 967 '''
fe7e0c98 968
61aa5ba3 969 if not ie_key and force_generic_extractor:
d22dec74
S
970 ie_key = 'Generic'
971
8222d8de 972 if ie_key:
56c73665 973 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
974 else:
975 ies = self._ies
976
977 for ie in ies:
978 if not ie.suitable(url):
979 continue
980
9a68de12 981 ie_key = ie.ie_key()
982 ie = self.get_info_extractor(ie_key)
8222d8de 983 if not ie.working():
6febd1c1
PH
984 self.report_warning('The program functionality for this site has been marked as broken, '
985 'and will probably not work.')
8222d8de
JMF
986
987 try:
d0757229 988 temp_id = str_or_none(
63be1aab 989 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
990 else ie._match_id(url))
a0566bbf 991 except (AssertionError, IndexError, AttributeError):
992 temp_id = None
993 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
994 self.to_screen("[%s] %s: has already been recorded in archive" % (
995 ie_key, temp_id))
996 break
a0566bbf 997 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
a0566bbf 998 else:
999 self.report_error('no suitable InfoExtractor for URL %s' % url)
1000
1001 def __handle_extraction_exceptions(func):
1002 def wrapper(self, *args, **kwargs):
1003 try:
1004 return func(self, *args, **kwargs)
773f291d
S
1005 except GeoRestrictedError as e:
1006 msg = e.msg
1007 if e.countries:
1008 msg += '\nThis video is available in %s.' % ', '.join(
1009 map(ISO3166Utils.short2full, e.countries))
1010 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1011 self.report_error(msg)
fb043a6e 1012 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1013 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1014 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1015 raise
8222d8de
JMF
1016 except Exception as e:
1017 if self.params.get('ignoreerrors', False):
9b9c5355 1018 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1019 else:
1020 raise
a0566bbf 1021 return wrapper
1022
1023 @__handle_extraction_exceptions
1024 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1025 ie_result = ie.extract(url)
1026 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1027 return
1028 if isinstance(ie_result, list):
1029 # Backwards compatibility: old IE result format
1030 ie_result = {
1031 '_type': 'compat_list',
1032 'entries': ie_result,
1033 }
1034 if info_dict:
1035 if info_dict.get('id'):
1036 ie_result['id'] = info_dict['id']
1037 if info_dict.get('title'):
1038 ie_result['title'] = info_dict['title']
1039 self.add_default_extra_info(ie_result, ie, url)
1040 if process:
1041 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1042 else:
a0566bbf 1043 return ie_result
fe7e0c98 1044
ea38e55f
PH
1045 def add_default_extra_info(self, ie_result, ie, url):
1046 self.add_extra_info(ie_result, {
1047 'extractor': ie.IE_NAME,
1048 'webpage_url': url,
1049 'webpage_url_basename': url_basename(url),
1050 'extractor_key': ie.ie_key(),
1051 })
1052
8222d8de
JMF
1053 def process_ie_result(self, ie_result, download=True, extra_info={}):
1054 """
1055 Take the result of the ie(may be modified) and resolve all unresolved
1056 references (URLs, playlist items).
1057
1058 It will also download the videos if 'download'.
1059 Returns the resolved ie_result.
1060 """
e8ee972c
PH
1061 result_type = ie_result.get('_type', 'video')
1062
057a5206 1063 if result_type in ('url', 'url_transparent'):
134c6ea8 1064 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1065 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1066 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1067 or extract_flat is True):
de6000d9 1068 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1069 return ie_result
1070
8222d8de 1071 if result_type == 'video':
b6c45014 1072 self.add_extra_info(ie_result, extra_info)
feee2ecf 1073 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1074 elif result_type == 'url':
1075 # We have to add extra_info to the results because it may be
1076 # contained in a playlist
1077 return self.extract_info(ie_result['url'],
0704d222 1078 download, info_dict=ie_result,
8222d8de
JMF
1079 ie_key=ie_result.get('ie_key'),
1080 extra_info=extra_info)
7fc3fa05
PH
1081 elif result_type == 'url_transparent':
1082 # Use the information from the embedding page
1083 info = self.extract_info(
1084 ie_result['url'], ie_key=ie_result.get('ie_key'),
1085 extra_info=extra_info, download=False, process=False)
1086
1640eb09
S
1087 # extract_info may return None when ignoreerrors is enabled and
1088 # extraction failed with an error, don't crash and return early
1089 # in this case
1090 if not info:
1091 return info
1092
412c617d
PH
1093 force_properties = dict(
1094 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1095 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1096 if f in force_properties:
1097 del force_properties[f]
1098 new_result = info.copy()
1099 new_result.update(force_properties)
7fc3fa05 1100
0563f7ac
S
1101 # Extracted info may not be a video result (i.e.
1102 # info.get('_type', 'video') != video) but rather an url or
1103 # url_transparent. In such cases outer metadata (from ie_result)
1104 # should be propagated to inner one (info). For this to happen
1105 # _type of info should be overridden with url_transparent. This
067aa17e 1106 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1107 if new_result.get('_type') == 'url':
1108 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1109
1110 return self.process_ie_result(
1111 new_result, download=download, extra_info=extra_info)
40fcba5e 1112 elif result_type in ('playlist', 'multi_video'):
30a074c2 1113 # Protect from infinite recursion due to recursively nested playlists
1114 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1115 webpage_url = ie_result['webpage_url']
1116 if webpage_url in self._playlist_urls:
7e85e872 1117 self.to_screen(
30a074c2 1118 '[download] Skipping already downloaded playlist: %s'
1119 % ie_result.get('title') or ie_result.get('id'))
1120 return
7e85e872 1121
30a074c2 1122 self._playlist_level += 1
1123 self._playlist_urls.add(webpage_url)
1124 try:
1125 return self.__process_playlist(ie_result, download)
1126 finally:
1127 self._playlist_level -= 1
1128 if not self._playlist_level:
1129 self._playlist_urls.clear()
8222d8de 1130 elif result_type == 'compat_list':
c9bf4114
PH
1131 self.report_warning(
1132 'Extractor %s returned a compat_list result. '
1133 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1134
8222d8de 1135 def _fixup(r):
9e1a5b84
JW
1136 self.add_extra_info(
1137 r,
9103bbc5
JMF
1138 {
1139 'extractor': ie_result['extractor'],
1140 'webpage_url': ie_result['webpage_url'],
29eb5174 1141 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1142 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1143 }
1144 )
8222d8de
JMF
1145 return r
1146 ie_result['entries'] = [
b6c45014 1147 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1148 for r in ie_result['entries']
1149 ]
1150 return ie_result
1151 else:
1152 raise Exception('Invalid result type: %s' % result_type)
1153
30a074c2 1154 def __process_playlist(self, ie_result, download):
1155 # We process each entry in the playlist
1156 playlist = ie_result.get('title') or ie_result.get('id')
1157 self.to_screen('[download] Downloading playlist: %s' % playlist)
1158
cac96421 1159 if self.params.get('allow_playlist_files', True):
1160 ie_copy = {
1161 'playlist': playlist,
1162 'playlist_id': ie_result.get('id'),
1163 'playlist_title': ie_result.get('title'),
1164 'playlist_uploader': ie_result.get('uploader'),
1165 'playlist_uploader_id': ie_result.get('uploader_id'),
1166 'playlist_index': 0
1167 }
1168 ie_copy.update(dict(ie_result))
02fd60d3 1169
cac96421 1170 def ensure_dir_exists(path):
1171 return make_dir(path, self.report_error)
1172
1173 if self.params.get('writeinfojson', False):
de6000d9 1174 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
cac96421 1175 if not ensure_dir_exists(encodeFilename(infofn)):
02fd60d3 1176 return
b9d973be 1177 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
cac96421 1178 self.to_screen('[info] Playlist metadata is already present')
1179 else:
cac96421 1180 playlist_info = dict(ie_result)
18590cec 1181 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1182 del playlist_info['entries']
1183 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
cac96421 1184 try:
1185 write_json_file(self.filter_requested_info(playlist_info), infofn)
1186 except (OSError, IOError):
1187 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1188
1189 if self.params.get('writedescription', False):
de6000d9 1190 descfn = self.prepare_filename(ie_copy, 'pl_description')
cac96421 1191 if not ensure_dir_exists(encodeFilename(descfn)):
1192 return
1193 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1194 self.to_screen('[info] Playlist description is already present')
1195 elif ie_result.get('description') is None:
1196 self.report_warning('There\'s no playlist description to write.')
1197 else:
1198 try:
1199 self.to_screen('[info] Writing playlist description to: ' + descfn)
1200 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1201 descfile.write(ie_result['description'])
1202 except (OSError, IOError):
1203 self.report_error('Cannot write playlist description file ' + descfn)
1204 return
02fd60d3 1205
30a074c2 1206 playlist_results = []
1207
1208 playliststart = self.params.get('playliststart', 1) - 1
1209 playlistend = self.params.get('playlistend')
1210 # For backwards compatibility, interpret -1 as whole list
1211 if playlistend == -1:
1212 playlistend = None
1213
1214 playlistitems_str = self.params.get('playlist_items')
1215 playlistitems = None
1216 if playlistitems_str is not None:
1217 def iter_playlistitems(format):
1218 for string_segment in format.split(','):
1219 if '-' in string_segment:
1220 start, end = string_segment.split('-')
1221 for item in range(int(start), int(end) + 1):
1222 yield int(item)
1223 else:
1224 yield int(string_segment)
1225 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1226
1227 ie_entries = ie_result['entries']
1228
1229 def make_playlistitems_entries(list_ie_entries):
1230 num_entries = len(list_ie_entries)
1231 return [
1232 list_ie_entries[i - 1] for i in playlistitems
1233 if -num_entries <= i - 1 < num_entries]
1234
1235 def report_download(num_entries):
1236 self.to_screen(
1237 '[%s] playlist %s: Downloading %d videos' %
1238 (ie_result['extractor'], playlist, num_entries))
1239
1240 if isinstance(ie_entries, list):
1241 n_all_entries = len(ie_entries)
1242 if playlistitems:
1243 entries = make_playlistitems_entries(ie_entries)
1244 else:
1245 entries = ie_entries[playliststart:playlistend]
1246 n_entries = len(entries)
1247 self.to_screen(
1248 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1249 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1250 elif isinstance(ie_entries, PagedList):
1251 if playlistitems:
1252 entries = []
1253 for item in playlistitems:
1254 entries.extend(ie_entries.getslice(
1255 item - 1, item
1256 ))
1257 else:
1258 entries = ie_entries.getslice(
1259 playliststart, playlistend)
1260 n_entries = len(entries)
1261 report_download(n_entries)
1262 else: # iterable
1263 if playlistitems:
1264 entries = make_playlistitems_entries(list(itertools.islice(
1265 ie_entries, 0, max(playlistitems))))
1266 else:
1267 entries = list(itertools.islice(
1268 ie_entries, playliststart, playlistend))
1269 n_entries = len(entries)
1270 report_download(n_entries)
1271
1272 if self.params.get('playlistreverse', False):
1273 entries = entries[::-1]
1274
1275 if self.params.get('playlistrandom', False):
1276 random.shuffle(entries)
1277
1278 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1279
1280 for i, entry in enumerate(entries, 1):
1281 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1282 # This __x_forwarded_for_ip thing is a bit ugly but requires
1283 # minimal changes
1284 if x_forwarded_for:
1285 entry['__x_forwarded_for_ip'] = x_forwarded_for
1286 extra = {
1287 'n_entries': n_entries,
1288 'playlist': playlist,
1289 'playlist_id': ie_result.get('id'),
1290 'playlist_title': ie_result.get('title'),
1291 'playlist_uploader': ie_result.get('uploader'),
1292 'playlist_uploader_id': ie_result.get('uploader_id'),
1293 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1294 'extractor': ie_result['extractor'],
1295 'webpage_url': ie_result['webpage_url'],
1296 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1297 'extractor_key': ie_result['extractor_key'],
1298 }
1299
1300 if self._match_entry(entry, incomplete=True) is not None:
1301 continue
1302
1303 entry_result = self.__process_iterable_entry(entry, download, extra)
1304 # TODO: skip failed (empty) entries?
1305 playlist_results.append(entry_result)
1306 ie_result['entries'] = playlist_results
1307 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1308 return ie_result
1309
a0566bbf 1310 @__handle_extraction_exceptions
1311 def __process_iterable_entry(self, entry, download, extra_info):
1312 return self.process_ie_result(
1313 entry, download=download, extra_info=extra_info)
1314
67134eab
JMF
1315 def _build_format_filter(self, filter_spec):
1316 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1317
1318 OPERATORS = {
1319 '<': operator.lt,
1320 '<=': operator.le,
1321 '>': operator.gt,
1322 '>=': operator.ge,
1323 '=': operator.eq,
1324 '!=': operator.ne,
1325 }
67134eab 1326 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1327 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1328 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1329 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1330 $
083c9df9 1331 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1332 m = operator_rex.search(filter_spec)
9ddb6925
S
1333 if m:
1334 try:
1335 comparison_value = int(m.group('value'))
1336 except ValueError:
1337 comparison_value = parse_filesize(m.group('value'))
1338 if comparison_value is None:
1339 comparison_value = parse_filesize(m.group('value') + 'B')
1340 if comparison_value is None:
1341 raise ValueError(
1342 'Invalid value %r in format specification %r' % (
67134eab 1343 m.group('value'), filter_spec))
9ddb6925
S
1344 op = OPERATORS[m.group('op')]
1345
083c9df9 1346 if not m:
9ddb6925
S
1347 STR_OPERATORS = {
1348 '=': operator.eq,
10d33b34
YCH
1349 '^=': lambda attr, value: attr.startswith(value),
1350 '$=': lambda attr, value: attr.endswith(value),
1351 '*=': lambda attr, value: value in attr,
9ddb6925 1352 }
67134eab 1353 str_operator_rex = re.compile(r'''(?x)
f96bff99 1354 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1355 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1356 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1357 \s*$
9ddb6925 1358 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1359 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1360 if m:
1361 comparison_value = m.group('value')
2cc779f4
S
1362 str_op = STR_OPERATORS[m.group('op')]
1363 if m.group('negation'):
e118a879 1364 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1365 else:
1366 op = str_op
083c9df9 1367
9ddb6925 1368 if not m:
67134eab 1369 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1370
1371 def _filter(f):
1372 actual_value = f.get(m.group('key'))
1373 if actual_value is None:
1374 return m.group('none_inclusive')
1375 return op(actual_value, comparison_value)
67134eab
JMF
1376 return _filter
1377
0017d9ad 1378 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1379
af0f7428
S
1380 def can_merge():
1381 merger = FFmpegMergerPP(self)
1382 return merger.available and merger.can_merge()
1383
91ebc640 1384 prefer_best = (
1385 not self.params.get('simulate', False)
1386 and download
1387 and (
1388 not can_merge()
19807826 1389 or info_dict.get('is_live', False)
de6000d9 1390 or self.outtmpl_dict['default'] == '-'))
91ebc640 1391
1392 return (
1393 'best/bestvideo+bestaudio'
1394 if prefer_best
1395 else 'bestvideo*+bestaudio/best'
19807826 1396 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1397 else 'bestvideo+bestaudio/best')
0017d9ad 1398
67134eab
JMF
1399 def build_format_selector(self, format_spec):
1400 def syntax_error(note, start):
1401 message = (
1402 'Invalid format specification: '
1403 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1404 return SyntaxError(message)
1405
1406 PICKFIRST = 'PICKFIRST'
1407 MERGE = 'MERGE'
1408 SINGLE = 'SINGLE'
0130afb7 1409 GROUP = 'GROUP'
67134eab
JMF
1410 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1411
91ebc640 1412 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1413 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1414
67134eab
JMF
1415 def _parse_filter(tokens):
1416 filter_parts = []
1417 for type, string, start, _, _ in tokens:
1418 if type == tokenize.OP and string == ']':
1419 return ''.join(filter_parts)
1420 else:
1421 filter_parts.append(string)
1422
232541df 1423 def _remove_unused_ops(tokens):
17cc1534 1424 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1425 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1426 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1427 last_string, last_start, last_end, last_line = None, None, None, None
1428 for type, string, start, end, line in tokens:
1429 if type == tokenize.OP and string == '[':
1430 if last_string:
1431 yield tokenize.NAME, last_string, last_start, last_end, last_line
1432 last_string = None
1433 yield type, string, start, end, line
1434 # everything inside brackets will be handled by _parse_filter
1435 for type, string, start, end, line in tokens:
1436 yield type, string, start, end, line
1437 if type == tokenize.OP and string == ']':
1438 break
1439 elif type == tokenize.OP and string in ALLOWED_OPS:
1440 if last_string:
1441 yield tokenize.NAME, last_string, last_start, last_end, last_line
1442 last_string = None
1443 yield type, string, start, end, line
1444 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1445 if not last_string:
1446 last_string = string
1447 last_start = start
1448 last_end = end
1449 else:
1450 last_string += string
1451 if last_string:
1452 yield tokenize.NAME, last_string, last_start, last_end, last_line
1453
cf2ac6df 1454 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1455 selectors = []
1456 current_selector = None
1457 for type, string, start, _, _ in tokens:
1458 # ENCODING is only defined in python 3.x
1459 if type == getattr(tokenize, 'ENCODING', None):
1460 continue
1461 elif type in [tokenize.NAME, tokenize.NUMBER]:
1462 current_selector = FormatSelector(SINGLE, string, [])
1463 elif type == tokenize.OP:
cf2ac6df
JMF
1464 if string == ')':
1465 if not inside_group:
1466 # ')' will be handled by the parentheses group
1467 tokens.restore_last_token()
67134eab 1468 break
cf2ac6df 1469 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1470 tokens.restore_last_token()
1471 break
cf2ac6df
JMF
1472 elif inside_choice and string == ',':
1473 tokens.restore_last_token()
1474 break
1475 elif string == ',':
0a31a350
JMF
1476 if not current_selector:
1477 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1478 selectors.append(current_selector)
1479 current_selector = None
1480 elif string == '/':
d96d604e
JMF
1481 if not current_selector:
1482 raise syntax_error('"/" must follow a format selector', start)
67134eab 1483 first_choice = current_selector
cf2ac6df 1484 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1485 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1486 elif string == '[':
1487 if not current_selector:
1488 current_selector = FormatSelector(SINGLE, 'best', [])
1489 format_filter = _parse_filter(tokens)
1490 current_selector.filters.append(format_filter)
0130afb7
JMF
1491 elif string == '(':
1492 if current_selector:
1493 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1494 group = _parse_format_selection(tokens, inside_group=True)
1495 current_selector = FormatSelector(GROUP, group, [])
67134eab 1496 elif string == '+':
d03cfdce 1497 if not current_selector:
1498 raise syntax_error('Unexpected "+"', start)
1499 selector_1 = current_selector
1500 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1501 if not selector_2:
1502 raise syntax_error('Expected a selector', start)
1503 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1504 else:
1505 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1506 elif type == tokenize.ENDMARKER:
1507 break
1508 if current_selector:
1509 selectors.append(current_selector)
1510 return selectors
1511
1512 def _build_selector_function(selector):
909d24dd 1513 if isinstance(selector, list): # ,
67134eab
JMF
1514 fs = [_build_selector_function(s) for s in selector]
1515
317f7ab6 1516 def selector_function(ctx):
67134eab 1517 for f in fs:
317f7ab6 1518 for format in f(ctx):
67134eab
JMF
1519 yield format
1520 return selector_function
909d24dd 1521
1522 elif selector.type == GROUP: # ()
0130afb7 1523 selector_function = _build_selector_function(selector.selector)
909d24dd 1524
1525 elif selector.type == PICKFIRST: # /
67134eab
JMF
1526 fs = [_build_selector_function(s) for s in selector.selector]
1527
317f7ab6 1528 def selector_function(ctx):
67134eab 1529 for f in fs:
317f7ab6 1530 picked_formats = list(f(ctx))
67134eab
JMF
1531 if picked_formats:
1532 return picked_formats
1533 return []
67134eab 1534
909d24dd 1535 elif selector.type == SINGLE: # atom
1536 format_spec = selector.selector if selector.selector is not None else 'best'
1537
1538 if format_spec == 'all':
1539 def selector_function(ctx):
1540 formats = list(ctx['formats'])
1541 if formats:
1542 for f in formats:
1543 yield f
1544
1545 else:
1546 format_fallback = False
1547 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1548 if format_spec_obj is not None:
1549 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1550 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1551 not_format_type = 'v' if format_type == 'a' else 'a'
1552 format_modified = format_spec_obj.group(3) is not None
1553
1554 format_fallback = not format_type and not format_modified # for b, w
1555 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1556 if format_type and format_modified # bv*, ba*, wv*, wa*
1557 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1558 if format_type # bv, ba, wv, wa
1559 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1560 if not format_modified # b, w
1561 else None) # b*, w*
67134eab 1562 else:
909d24dd 1563 format_idx = -1
1564 filter_f = ((lambda f: f.get('ext') == format_spec)
1565 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1566 else (lambda f: f.get('format_id') == format_spec)) # id
1567
1568 def selector_function(ctx):
1569 formats = list(ctx['formats'])
1570 if not formats:
1571 return
1572 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1573 if matches:
909d24dd 1574 yield matches[format_idx]
1575 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1576 # for extractors with incomplete formats (audio only (soundcloud)
1577 # or video only (imgur)) best/worst will fallback to
1578 # best/worst {video,audio}-only format
1579 yield formats[format_idx]
1580
1581 elif selector.type == MERGE: # +
d03cfdce 1582 def _merge(formats_pair):
1583 format_1, format_2 = formats_pair
1584
1585 formats_info = []
1586 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1587 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1588
909d24dd 1589 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1590 get_no_more = {"video": False, "audio": False}
1591 for (i, fmt_info) in enumerate(formats_info):
1592 for aud_vid in ["audio", "video"]:
1593 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1594 if get_no_more[aud_vid]:
1595 formats_info.pop(i)
1596 get_no_more[aud_vid] = True
1597
1598 if len(formats_info) == 1:
1599 return formats_info[0]
1600
d03cfdce 1601 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1602 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1603
1604 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1605 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1606
1607 output_ext = self.params.get('merge_output_format')
1608 if not output_ext:
1609 if the_only_video:
1610 output_ext = the_only_video['ext']
1611 elif the_only_audio and not video_fmts:
1612 output_ext = the_only_audio['ext']
1613 else:
1614 output_ext = 'mkv'
1615
1616 new_dict = {
67134eab 1617 'requested_formats': formats_info,
d03cfdce 1618 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1619 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1620 'ext': output_ext,
1621 }
d03cfdce 1622
1623 if the_only_video:
1624 new_dict.update({
1625 'width': the_only_video.get('width'),
1626 'height': the_only_video.get('height'),
1627 'resolution': the_only_video.get('resolution'),
1628 'fps': the_only_video.get('fps'),
1629 'vcodec': the_only_video.get('vcodec'),
1630 'vbr': the_only_video.get('vbr'),
1631 'stretched_ratio': the_only_video.get('stretched_ratio'),
1632 })
1633
1634 if the_only_audio:
1635 new_dict.update({
1636 'acodec': the_only_audio.get('acodec'),
1637 'abr': the_only_audio.get('abr'),
1638 })
1639
1640 return new_dict
1641
1642 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1643
317f7ab6
S
1644 def selector_function(ctx):
1645 for pair in itertools.product(
d03cfdce 1646 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1647 yield _merge(pair)
083c9df9 1648
67134eab 1649 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1650
317f7ab6
S
1651 def final_selector(ctx):
1652 ctx_copy = copy.deepcopy(ctx)
67134eab 1653 for _filter in filters:
317f7ab6
S
1654 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1655 return selector_function(ctx_copy)
67134eab 1656 return final_selector
083c9df9 1657
67134eab 1658 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1659 try:
232541df 1660 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1661 except tokenize.TokenError:
1662 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1663
1664 class TokenIterator(object):
1665 def __init__(self, tokens):
1666 self.tokens = tokens
1667 self.counter = 0
1668
1669 def __iter__(self):
1670 return self
1671
1672 def __next__(self):
1673 if self.counter >= len(self.tokens):
1674 raise StopIteration()
1675 value = self.tokens[self.counter]
1676 self.counter += 1
1677 return value
1678
1679 next = __next__
1680
1681 def restore_last_token(self):
1682 self.counter -= 1
1683
1684 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1685 return _build_selector_function(parsed_selector)
a9c58ad9 1686
e5660ee6
JMF
1687 def _calc_headers(self, info_dict):
1688 res = std_headers.copy()
1689
1690 add_headers = info_dict.get('http_headers')
1691 if add_headers:
1692 res.update(add_headers)
1693
1694 cookies = self._calc_cookies(info_dict)
1695 if cookies:
1696 res['Cookie'] = cookies
1697
0016b84e
S
1698 if 'X-Forwarded-For' not in res:
1699 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1700 if x_forwarded_for_ip:
1701 res['X-Forwarded-For'] = x_forwarded_for_ip
1702
e5660ee6
JMF
1703 return res
1704
1705 def _calc_cookies(self, info_dict):
5c2266df 1706 pr = sanitized_Request(info_dict['url'])
e5660ee6 1707 self.cookiejar.add_cookie_header(pr)
662435f7 1708 return pr.get_header('Cookie')
e5660ee6 1709
dd82ffea
JMF
1710 def process_video_result(self, info_dict, download=True):
1711 assert info_dict.get('_type', 'video') == 'video'
1712
bec1fad2
PH
1713 if 'id' not in info_dict:
1714 raise ExtractorError('Missing "id" field in extractor result')
1715 if 'title' not in info_dict:
1716 raise ExtractorError('Missing "title" field in extractor result')
1717
c9969434
S
1718 def report_force_conversion(field, field_not, conversion):
1719 self.report_warning(
1720 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1721 % (field, field_not, conversion))
1722
1723 def sanitize_string_field(info, string_field):
1724 field = info.get(string_field)
1725 if field is None or isinstance(field, compat_str):
1726 return
1727 report_force_conversion(string_field, 'a string', 'string')
1728 info[string_field] = compat_str(field)
1729
1730 def sanitize_numeric_fields(info):
1731 for numeric_field in self._NUMERIC_FIELDS:
1732 field = info.get(numeric_field)
1733 if field is None or isinstance(field, compat_numeric_types):
1734 continue
1735 report_force_conversion(numeric_field, 'numeric', 'int')
1736 info[numeric_field] = int_or_none(field)
1737
1738 sanitize_string_field(info_dict, 'id')
1739 sanitize_numeric_fields(info_dict)
be6217b2 1740
dd82ffea
JMF
1741 if 'playlist' not in info_dict:
1742 # It isn't part of a playlist
1743 info_dict['playlist'] = None
1744 info_dict['playlist_index'] = None
1745
d5519808 1746 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1747 if thumbnails is None:
1748 thumbnail = info_dict.get('thumbnail')
1749 if thumbnail:
a7a14d95 1750 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1751 if thumbnails:
be6d7229 1752 thumbnails.sort(key=lambda t: (
d37708fc
RA
1753 t.get('preference') if t.get('preference') is not None else -1,
1754 t.get('width') if t.get('width') is not None else -1,
1755 t.get('height') if t.get('height') is not None else -1,
1756 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1757 for i, t in enumerate(thumbnails):
dcf77cf1 1758 t['url'] = sanitize_url(t['url'])
9603e8a7 1759 if t.get('width') and t.get('height'):
d5519808 1760 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1761 if t.get('id') is None:
1762 t['id'] = '%d' % i
d5519808 1763
b7b72db9 1764 if self.params.get('list_thumbnails'):
1765 self.list_thumbnails(info_dict)
1766 return
1767
536a55da
S
1768 thumbnail = info_dict.get('thumbnail')
1769 if thumbnail:
1770 info_dict['thumbnail'] = sanitize_url(thumbnail)
1771 elif thumbnails:
d5519808
PH
1772 info_dict['thumbnail'] = thumbnails[-1]['url']
1773
c9ae7b95 1774 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1775 info_dict['display_id'] = info_dict['id']
1776
955c4514 1777 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1778 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1779 # see http://bugs.python.org/issue1646728)
1780 try:
1781 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1782 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1783 except (ValueError, OverflowError, OSError):
1784 pass
9d2ecdbc 1785
33d2fc2f
S
1786 # Auto generate title fields corresponding to the *_number fields when missing
1787 # in order to always have clean titles. This is very common for TV series.
1788 for field in ('chapter', 'season', 'episode'):
1789 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1790 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1791
05108a49
S
1792 for cc_kind in ('subtitles', 'automatic_captions'):
1793 cc = info_dict.get(cc_kind)
1794 if cc:
1795 for _, subtitle in cc.items():
1796 for subtitle_format in subtitle:
1797 if subtitle_format.get('url'):
1798 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1799 if subtitle_format.get('ext') is None:
1800 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1801
1802 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1803 subtitles = info_dict.get('subtitles')
4bba3716 1804
a504ced0 1805 if self.params.get('listsubtitles', False):
360e1ca5 1806 if 'automatic_captions' in info_dict:
05108a49
S
1807 self.list_subtitles(
1808 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1809 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1810 return
05108a49 1811
360e1ca5 1812 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1813 info_dict['id'], subtitles, automatic_captions)
a504ced0 1814
dd82ffea
JMF
1815 # We now pick which formats have to be downloaded
1816 if info_dict.get('formats') is None:
1817 # There's only one format available
1818 formats = [info_dict]
1819 else:
1820 formats = info_dict['formats']
1821
db95dc13
PH
1822 if not formats:
1823 raise ExtractorError('No video formats found!')
1824
73af5cc8
S
1825 def is_wellformed(f):
1826 url = f.get('url')
a5ac0c47 1827 if not url:
73af5cc8
S
1828 self.report_warning(
1829 '"url" field is missing or empty - skipping format, '
1830 'there is an error in extractor')
a5ac0c47
S
1831 return False
1832 if isinstance(url, bytes):
1833 sanitize_string_field(f, 'url')
1834 return True
73af5cc8
S
1835
1836 # Filter out malformed formats for better extraction robustness
1837 formats = list(filter(is_wellformed, formats))
1838
181c7053
S
1839 formats_dict = {}
1840
dd82ffea 1841 # We check that all the formats have the format and format_id fields
db95dc13 1842 for i, format in enumerate(formats):
c9969434
S
1843 sanitize_string_field(format, 'format_id')
1844 sanitize_numeric_fields(format)
dcf77cf1 1845 format['url'] = sanitize_url(format['url'])
e74e3b63 1846 if not format.get('format_id'):
8016c922 1847 format['format_id'] = compat_str(i)
e2effb08
S
1848 else:
1849 # Sanitize format_id from characters used in format selector expression
ec85ded8 1850 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1851 format_id = format['format_id']
1852 if format_id not in formats_dict:
1853 formats_dict[format_id] = []
1854 formats_dict[format_id].append(format)
1855
1856 # Make sure all formats have unique format_id
1857 for format_id, ambiguous_formats in formats_dict.items():
1858 if len(ambiguous_formats) > 1:
1859 for i, format in enumerate(ambiguous_formats):
1860 format['format_id'] = '%s-%d' % (format_id, i)
1861
1862 for i, format in enumerate(formats):
8c51aa65 1863 if format.get('format') is None:
6febd1c1 1864 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1865 id=format['format_id'],
1866 res=self.format_resolution(format),
6febd1c1 1867 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1868 )
c1002e96 1869 # Automatically determine file extension if missing
5b1d8575 1870 if format.get('ext') is None:
cce929ea 1871 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1872 # Automatically determine protocol if missing (useful for format
1873 # selection purposes)
6f0be937 1874 if format.get('protocol') is None:
b5559424 1875 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1876 # Add HTTP headers, so that external programs can use them from the
1877 # json output
1878 full_format_info = info_dict.copy()
1879 full_format_info.update(format)
1880 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1881 # Remove private housekeeping stuff
1882 if '__x_forwarded_for_ip' in info_dict:
1883 del info_dict['__x_forwarded_for_ip']
dd82ffea 1884
4bcc7bd1 1885 # TODO Central sorting goes here
99e206d5 1886
f89197d7 1887 if formats[0] is not info_dict:
b3d9ef88
JMF
1888 # only set the 'formats' fields if the original info_dict list them
1889 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1890 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1891 # which can't be exported to json
b3d9ef88 1892 info_dict['formats'] = formats
cfb56d1a 1893 if self.params.get('listformats'):
bfaae0a7 1894 self.list_formats(info_dict)
1895 return
1896
de3ef3ed 1897 req_format = self.params.get('format')
a9c58ad9 1898 if req_format is None:
0017d9ad
S
1899 req_format = self._default_format_spec(info_dict, download=download)
1900 if self.params.get('verbose'):
e8be92f9 1901 self.to_screen('[debug] Default format spec: %s' % req_format)
0017d9ad 1902
5acfa126 1903 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1904
1905 # While in format selection we may need to have an access to the original
1906 # format set in order to calculate some metrics or do some processing.
1907 # For now we need to be able to guess whether original formats provided
1908 # by extractor are incomplete or not (i.e. whether extractor provides only
1909 # video-only or audio-only formats) for proper formats selection for
1910 # extractors with such incomplete formats (see
067aa17e 1911 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1912 # Since formats may be filtered during format selection and may not match
1913 # the original formats the results may be incorrect. Thus original formats
1914 # or pre-calculated metrics should be passed to format selection routines
1915 # as well.
1916 # We will pass a context object containing all necessary additional data
1917 # instead of just formats.
1918 # This fixes incorrect format selection issue (see
067aa17e 1919 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1920 incomplete_formats = (
317f7ab6 1921 # All formats are video-only or
3089bc74 1922 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1923 # all formats are audio-only
3089bc74 1924 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1925
1926 ctx = {
1927 'formats': formats,
1928 'incomplete_formats': incomplete_formats,
1929 }
1930
1931 formats_to_download = list(format_selector(ctx))
dd82ffea 1932 if not formats_to_download:
6febd1c1 1933 raise ExtractorError('requested format not available',
78a3a9f8 1934 expected=True)
dd82ffea
JMF
1935
1936 if download:
909d24dd 1937 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1938 if len(formats_to_download) > 1:
6febd1c1 1939 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1940 for format in formats_to_download:
1941 new_info = dict(info_dict)
1942 new_info.update(format)
1943 self.process_info(new_info)
1944 # We update the info dict with the best quality format (backwards compatibility)
1945 info_dict.update(formats_to_download[-1])
1946 return info_dict
1947
98c70d6f 1948 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1949 """Select the requested subtitles and their format"""
98c70d6f
JMF
1950 available_subs = {}
1951 if normal_subtitles and self.params.get('writesubtitles'):
1952 available_subs.update(normal_subtitles)
1953 if automatic_captions and self.params.get('writeautomaticsub'):
1954 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1955 if lang not in available_subs:
1956 available_subs[lang] = cap_info
1957
4d171848
JMF
1958 if (not self.params.get('writesubtitles') and not
1959 self.params.get('writeautomaticsub') or not
1960 available_subs):
1961 return None
a504ced0
JMF
1962
1963 if self.params.get('allsubtitles', False):
1964 requested_langs = available_subs.keys()
1965 else:
1966 if self.params.get('subtitleslangs', False):
1967 requested_langs = self.params.get('subtitleslangs')
1968 elif 'en' in available_subs:
1969 requested_langs = ['en']
1970 else:
1971 requested_langs = [list(available_subs.keys())[0]]
1972
1973 formats_query = self.params.get('subtitlesformat', 'best')
1974 formats_preference = formats_query.split('/') if formats_query else []
1975 subs = {}
1976 for lang in requested_langs:
1977 formats = available_subs.get(lang)
1978 if formats is None:
1979 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1980 continue
a504ced0
JMF
1981 for ext in formats_preference:
1982 if ext == 'best':
1983 f = formats[-1]
1984 break
1985 matches = list(filter(lambda f: f['ext'] == ext, formats))
1986 if matches:
1987 f = matches[-1]
1988 break
1989 else:
1990 f = formats[-1]
1991 self.report_warning(
1992 'No subtitle format found matching "%s" for language %s, '
1993 'using %s' % (formats_query, lang, f['ext']))
1994 subs[lang] = f
1995 return subs
1996
d06daf23
S
1997 def __forced_printings(self, info_dict, filename, incomplete):
1998 def print_mandatory(field):
1999 if (self.params.get('force%s' % field, False)
2000 and (not incomplete or info_dict.get(field) is not None)):
2001 self.to_stdout(info_dict[field])
2002
2003 def print_optional(field):
2004 if (self.params.get('force%s' % field, False)
2005 and info_dict.get(field) is not None):
2006 self.to_stdout(info_dict[field])
2007
2008 print_mandatory('title')
2009 print_mandatory('id')
2010 if self.params.get('forceurl', False) and not incomplete:
2011 if info_dict.get('requested_formats') is not None:
2012 for f in info_dict['requested_formats']:
2013 self.to_stdout(f['url'] + f.get('play_path', ''))
2014 else:
2015 # For RTMP URLs, also include the playpath
2016 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2017 print_optional('thumbnail')
2018 print_optional('description')
2019 if self.params.get('forcefilename', False) and filename is not None:
2020 self.to_stdout(filename)
2021 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2022 self.to_stdout(formatSeconds(info_dict['duration']))
2023 print_mandatory('format')
2024 if self.params.get('forcejson', False):
2025 self.to_stdout(json.dumps(info_dict))
2026
8222d8de
JMF
2027 def process_info(self, info_dict):
2028 """Process a single resolved IE result."""
2029
2030 assert info_dict.get('_type', 'video') == 'video'
fd288278 2031
0202b52a 2032 info_dict.setdefault('__postprocessors', [])
2033
fd288278
PH
2034 max_downloads = self.params.get('max_downloads')
2035 if max_downloads is not None:
2036 if self._num_downloads >= int(max_downloads):
2037 raise MaxDownloadsReached()
8222d8de 2038
d06daf23 2039 # TODO: backward compatibility, to be removed
8222d8de 2040 info_dict['fulltitle'] = info_dict['title']
8222d8de 2041
11b85ce6 2042 if 'format' not in info_dict:
8222d8de
JMF
2043 info_dict['format'] = info_dict['ext']
2044
8b0d7497 2045 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
2046 return
2047
fd288278 2048 self._num_downloads += 1
8222d8de 2049
5bfa4862 2050 info_dict = self.pre_process(info_dict)
2051
de6000d9 2052 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2053 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2054 files_to_move = {}
de6000d9 2055 skip_dl = self.params.get('skip_download', False)
8222d8de
JMF
2056
2057 # Forced printings
0202b52a 2058 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2059
8222d8de 2060 if self.params.get('simulate', False):
2d30509f 2061 if self.params.get('force_write_download_archive', False):
2062 self.record_download_archive(info_dict)
2063
2064 # Do nothing else if in simulate mode
8222d8de
JMF
2065 return
2066
de6000d9 2067 if full_filename is None:
8222d8de
JMF
2068 return
2069
c5c9bf0c 2070 def ensure_dir_exists(path):
0202b52a 2071 return make_dir(path, self.report_error)
c5c9bf0c 2072
0202b52a 2073 if not ensure_dir_exists(encodeFilename(full_filename)):
2074 return
2075 if not ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2076 return
2077
2078 if self.params.get('writedescription', False):
de6000d9 2079 descfn = self.prepare_filename(info_dict, 'description')
0202b52a 2080 if not ensure_dir_exists(encodeFilename(descfn)):
2081 return
0c3d0f51 2082 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2083 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2084 elif info_dict.get('description') is None:
2085 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2086 else:
2087 try:
6febd1c1 2088 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2089 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2090 descfile.write(info_dict['description'])
7b6fefc9 2091 except (OSError, IOError):
6febd1c1 2092 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2093 return
8222d8de 2094
1fb07d10 2095 if self.params.get('writeannotations', False):
de6000d9 2096 annofn = self.prepare_filename(info_dict, 'annotation')
0202b52a 2097 if not ensure_dir_exists(encodeFilename(annofn)):
2098 return
0c3d0f51 2099 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2100 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2101 elif not info_dict.get('annotations'):
2102 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2103 else:
2104 try:
6febd1c1 2105 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2106 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2107 annofile.write(info_dict['annotations'])
2108 except (KeyError, TypeError):
6febd1c1 2109 self.report_warning('There are no annotations to write.')
7b6fefc9 2110 except (OSError, IOError):
6febd1c1 2111 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2112 return
1fb07d10 2113
9f448fcb 2114 def dl(name, info, subtitle=False):
98b69821 2115 fd = get_suitable_downloader(info, self.params)(self, self.params)
2116 for ph in self._progress_hooks:
2117 fd.add_progress_hook(ph)
2118 if self.params.get('verbose'):
29f7c58a 2119 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 2120 return fd.download(name, info, subtitle)
98b69821 2121
c4a91be7 2122 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2123 self.params.get('writeautomaticsub')])
c4a91be7 2124
c84dd8a9 2125 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2126 # subtitles download errors are already managed as troubles in relevant IE
2127 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2128 subtitles = info_dict['requested_subtitles']
fa57af1e 2129 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2130 for sub_lang, sub_info in subtitles.items():
2131 sub_format = sub_info['ext']
de6000d9 2132 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2133 sub_filename = subtitles_filename(
0fd1a2b0 2134 temp_filename if not skip_dl else sub_fn,
0202b52a 2135 sub_lang, sub_format, info_dict.get('ext'))
de6000d9 2136 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2137 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2138 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
0202b52a 2139 files_to_move[sub_filename] = sub_filename_final
a504ced0 2140 else:
0c9df79e 2141 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2142 if sub_info.get('data') is not None:
2143 try:
2144 # Use newline='' to prevent conversion of newline characters
067aa17e 2145 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2146 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2147 subfile.write(sub_info['data'])
0202b52a 2148 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2149 except (OSError, IOError):
2150 self.report_error('Cannot write subtitles file ' + sub_filename)
2151 return
7b6fefc9 2152 else:
5ff1bc0c 2153 try:
9f448fcb
U
2154 dl(sub_filename, sub_info, subtitle=True)
2155 '''
0c9df79e
U
2156 if self.params.get('sleep_interval_subtitles', False):
2157 dl(sub_filename, sub_info)
2158 else:
2159 sub_data = ie._request_webpage(
2160 sub_info['url'], info_dict['id'], note=False).read()
2161 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2162 subfile.write(sub_data)
9f448fcb 2163 '''
0202b52a 2164 files_to_move[sub_filename] = sub_filename_final
0c9df79e 2165 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
2166 self.report_warning('Unable to download subtitle for "%s": %s' %
2167 (sub_lang, error_to_compat_str(err)))
2168 continue
8222d8de 2169
de6000d9 2170 if skip_dl:
57df9f53 2171 if self.params.get('convertsubtitles', False):
0202b52a 2172 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
de6000d9 2173 filename_real_ext = os.path.splitext(full_filename)[1][1:]
57df9f53 2174 filename_wo_ext = (
0202b52a 2175 os.path.splitext(full_filename)[0]
57df9f53 2176 if filename_real_ext == info_dict['ext']
0202b52a 2177 else full_filename)
57df9f53 2178 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
0202b52a 2179 # if subconv.available:
2180 # info_dict['__postprocessors'].append(subconv)
57df9f53 2181 if os.path.exists(encodeFilename(afilename)):
f791b419
U
2182 self.to_screen(
2183 '[download] %s has already been downloaded and '
2184 'converted' % afilename)
57df9f53
U
2185 else:
2186 try:
0202b52a 2187 self.post_process(full_filename, info_dict, files_to_move)
af819c21 2188 except PostProcessingError as err:
2189 self.report_error('Postprocessing: %s' % str(err))
57df9f53
U
2190 return
2191
8222d8de 2192 if self.params.get('writeinfojson', False):
de6000d9 2193 infofn = self.prepare_filename(info_dict, 'infojson')
0202b52a 2194 if not ensure_dir_exists(encodeFilename(infofn)):
2195 return
0c3d0f51 2196 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2197 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2198 else:
66c935fb 2199 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2200 try:
cb202fd2 2201 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 2202 except (OSError, IOError):
66c935fb 2203 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2204 return
de6000d9 2205 info_dict['__infojson_filename'] = infofn
8222d8de 2206
de6000d9 2207 thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2208 thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2209 for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2210 thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2211 thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2212 files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
8222d8de 2213
732044af 2214 # Write internet shortcut files
2215 url_link = webloc_link = desktop_link = False
2216 if self.params.get('writelink', False):
2217 if sys.platform == "darwin": # macOS.
2218 webloc_link = True
2219 elif sys.platform.startswith("linux"):
2220 desktop_link = True
2221 else: # if sys.platform in ['win32', 'cygwin']:
2222 url_link = True
2223 if self.params.get('writeurllink', False):
2224 url_link = True
2225 if self.params.get('writewebloclink', False):
2226 webloc_link = True
2227 if self.params.get('writedesktoplink', False):
2228 desktop_link = True
2229
2230 if url_link or webloc_link or desktop_link:
2231 if 'webpage_url' not in info_dict:
2232 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2233 return
2234 ascii_url = iri_to_uri(info_dict['webpage_url'])
2235
2236 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2237 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2238 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2239 self.to_screen('[info] Internet shortcut is already present')
2240 else:
2241 try:
2242 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2243 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2244 template_vars = {'url': ascii_url}
2245 if embed_filename:
2246 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2247 linkfile.write(template % template_vars)
2248 except (OSError, IOError):
2249 self.report_error('Cannot write internet shortcut ' + linkfn)
2250 return False
2251 return True
2252
2253 if url_link:
2254 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2255 return
2256 if webloc_link:
2257 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2258 return
2259 if desktop_link:
2260 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2261 return
2262
2263 # Download
2264 must_record_download_archive = False
de6000d9 2265 if not skip_dl:
4340deca 2266 try:
0202b52a 2267
6b591b29 2268 def existing_file(*filepaths):
2269 ext = info_dict.get('ext')
2270 final_ext = self.params.get('final_ext', ext)
2271 existing_files = []
2272 for file in orderedSet(filepaths):
2273 if final_ext != ext:
2274 converted = replace_extension(file, final_ext, ext)
2275 if os.path.exists(encodeFilename(converted)):
2276 existing_files.append(converted)
2277 if os.path.exists(encodeFilename(file)):
2278 existing_files.append(file)
2279
2280 if not existing_files or self.params.get('overwrites', False):
2281 for file in orderedSet(existing_files):
2282 self.report_file_delete(file)
2283 os.remove(encodeFilename(file))
2284 return None
2285
2286 self.report_file_already_downloaded(existing_files[0])
2287 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2288 return existing_files[0]
0202b52a 2289
2290 success = True
4340deca
P
2291 if info_dict.get('requested_formats') is not None:
2292 downloaded = []
d47aeb22 2293 merger = FFmpegMergerPP(self)
f740fae2 2294 if not merger.available:
4340deca 2295 self.report_warning('You have requested multiple '
e4172ac9 2296 'formats but ffmpeg is not installed.'
4a5a898a 2297 ' The formats won\'t be merged.')
81cd954a
S
2298
2299 def compatible_formats(formats):
d03cfdce 2300 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2301 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2302 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2303 if len(video_formats) > 2 or len(audio_formats) > 2:
2304 return False
2305
81cd954a 2306 # Check extension
d03cfdce 2307 exts = set(format.get('ext') for format in formats)
2308 COMPATIBLE_EXTS = (
2309 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2310 set(('webm',)),
2311 )
2312 for ext_sets in COMPATIBLE_EXTS:
2313 if ext_sets.issuperset(exts):
2314 return True
81cd954a
S
2315 # TODO: Check acodec/vcodec
2316 return False
2317
2318 requested_formats = info_dict['requested_formats']
0202b52a 2319 old_ext = info_dict['ext']
c0dea0a7 2320 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2321 info_dict['ext'] = 'mkv'
4a5a898a
S
2322 self.report_warning(
2323 'Requested formats are incompatible for merge and will be merged into mkv.')
0202b52a 2324
2325 def correct_ext(filename):
2326 filename_real_ext = os.path.splitext(filename)[1][1:]
2327 filename_wo_ext = (
2328 os.path.splitext(filename)[0]
2329 if filename_real_ext == old_ext
2330 else filename)
2331 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2332
38c6902b 2333 # Ensure filename always has a correct extension for successful merge
0202b52a 2334 full_filename = correct_ext(full_filename)
2335 temp_filename = correct_ext(temp_filename)
2336 dl_filename = existing_file(full_filename, temp_filename)
2337 if dl_filename is None:
81cd954a 2338 for f in requested_formats:
5b5fbc08
JMF
2339 new_info = dict(info_dict)
2340 new_info.update(f)
c5c9bf0c 2341 fname = prepend_extension(
de6000d9 2342 self.prepare_filename(new_info, 'temp'),
c5c9bf0c
S
2343 'f%s' % f['format_id'], new_info['ext'])
2344 if not ensure_dir_exists(fname):
2345 return
5b5fbc08 2346 downloaded.append(fname)
a9e7f546 2347 partial_success, real_download = dl(fname, new_info)
5b5fbc08 2348 success = success and partial_success
efabc161 2349 if merger.available:
2350 info_dict['__postprocessors'].append(merger)
5b5fbc08 2351 info_dict['__files_to_merge'] = downloaded
a9e7f546 2352 # Even if there were no downloads, it is being merged only now
2353 info_dict['__real_download'] = True
4340deca
P
2354 else:
2355 # Just a single file
0202b52a 2356 dl_filename = existing_file(full_filename, temp_filename)
2357 if dl_filename is None:
2358 success, real_download = dl(temp_filename, info_dict)
2359 info_dict['__real_download'] = real_download
2360
0202b52a 2361 dl_filename = dl_filename or temp_filename
c571435f 2362 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2363
4340deca 2364 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2365 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2366 return
2367 except (OSError, IOError) as err:
2368 raise UnavailableVideoError(err)
2369 except (ContentTooShortError, ) as err:
2370 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2371 return
8222d8de 2372
de6000d9 2373 if success and full_filename != '-':
6271f1ca 2374 # Fixup content
62cd676c
PH
2375 fixup_policy = self.params.get('fixup')
2376 if fixup_policy is None:
2377 fixup_policy = 'detect_or_warn'
2378
e4172ac9 2379 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2380
6271f1ca
PH
2381 stretched_ratio = info_dict.get('stretched_ratio')
2382 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2383 if fixup_policy == 'warn':
2384 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2385 info_dict['id'], stretched_ratio))
2386 elif fixup_policy == 'detect_or_warn':
2387 stretched_pp = FFmpegFixupStretchedPP(self)
2388 if stretched_pp.available:
6271f1ca
PH
2389 info_dict['__postprocessors'].append(stretched_pp)
2390 else:
2391 self.report_warning(
d1e4a464
S
2392 '%s: Non-uniform pixel ratio (%s). %s'
2393 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2394 else:
62cd676c
PH
2395 assert fixup_policy in ('ignore', 'never')
2396
3089bc74 2397 if (info_dict.get('requested_formats') is None
6b591b29 2398 and info_dict.get('container') == 'm4a_dash'
2399 and info_dict.get('ext') == 'm4a'):
62cd676c 2400 if fixup_policy == 'warn':
d1e4a464
S
2401 self.report_warning(
2402 '%s: writing DASH m4a. '
2403 'Only some players support this container.'
2404 % info_dict['id'])
62cd676c
PH
2405 elif fixup_policy == 'detect_or_warn':
2406 fixup_pp = FFmpegFixupM4aPP(self)
2407 if fixup_pp.available:
62cd676c
PH
2408 info_dict['__postprocessors'].append(fixup_pp)
2409 else:
2410 self.report_warning(
d1e4a464
S
2411 '%s: writing DASH m4a. '
2412 'Only some players support this container. %s'
2413 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2414 else:
2415 assert fixup_policy in ('ignore', 'never')
6271f1ca 2416
3089bc74
S
2417 if (info_dict.get('protocol') == 'm3u8_native'
2418 or info_dict.get('protocol') == 'm3u8'
2419 and self.params.get('hls_prefer_native')):
f17f8651 2420 if fixup_policy == 'warn':
a02682fd 2421 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2422 info_dict['id']))
2423 elif fixup_policy == 'detect_or_warn':
2424 fixup_pp = FFmpegFixupM3u8PP(self)
2425 if fixup_pp.available:
f17f8651 2426 info_dict['__postprocessors'].append(fixup_pp)
2427 else:
2428 self.report_warning(
a02682fd 2429 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2430 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2431 else:
2432 assert fixup_policy in ('ignore', 'never')
2433
8222d8de 2434 try:
0202b52a 2435 self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2436 except PostProcessingError as err:
2437 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2438 return
ab8e5e51
AM
2439 try:
2440 for ph in self._post_hooks:
0202b52a 2441 ph(full_filename)
ab8e5e51
AM
2442 except Exception as err:
2443 self.report_error('post hooks: %s' % str(err))
2444 return
2d30509f 2445 must_record_download_archive = True
2446
2447 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2448 self.record_download_archive(info_dict)
c3e6ffba 2449 max_downloads = self.params.get('max_downloads')
2450 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2451 raise MaxDownloadsReached()
8222d8de
JMF
2452
2453 def download(self, url_list):
2454 """Download a given list of URLs."""
de6000d9 2455 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2456 if (len(url_list) > 1
2457 and outtmpl != '-'
2458 and '%' not in outtmpl
2459 and self.params.get('max_downloads') != 1):
acd69589 2460 raise SameFileError(outtmpl)
8222d8de
JMF
2461
2462 for url in url_list:
2463 try:
5f6a1245 2464 # It also downloads the videos
61aa5ba3
S
2465 res = self.extract_info(
2466 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2467 except UnavailableVideoError:
6febd1c1 2468 self.report_error('unable to download video')
8222d8de 2469 except MaxDownloadsReached:
8b0d7497 2470 self.to_screen('[info] Maximum number of downloaded files reached')
2471 raise
2472 except ExistingVideoReached:
d83cb531 2473 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2474 raise
2475 except RejectedVideoReached:
d83cb531 2476 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2477 raise
63e0be34
PH
2478 else:
2479 if self.params.get('dump_single_json', False):
2480 self.to_stdout(json.dumps(res))
8222d8de
JMF
2481
2482 return self._download_retcode
2483
1dcc4c0c 2484 def download_with_info_file(self, info_filename):
31bd3925
JMF
2485 with contextlib.closing(fileinput.FileInput(
2486 [info_filename], mode='r',
2487 openhook=fileinput.hook_encoded('utf-8'))) as f:
2488 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2489 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2490 try:
2491 self.process_ie_result(info, download=True)
2492 except DownloadError:
2493 webpage_url = info.get('webpage_url')
2494 if webpage_url is not None:
6febd1c1 2495 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2496 return self.download([webpage_url])
2497 else:
2498 raise
2499 return self._download_retcode
1dcc4c0c 2500
cb202fd2
S
2501 @staticmethod
2502 def filter_requested_info(info_dict):
18590cec 2503 fields_to_remove = ('requested_formats', 'requested_subtitles')
cb202fd2
S
2504 return dict(
2505 (k, v) for k, v in info_dict.items()
18590cec 2506 if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
cb202fd2 2507
5bfa4862 2508 def run_pp(self, pp, infodict, files_to_move={}):
2509 files_to_delete = []
af819c21 2510 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2511 if not files_to_delete:
2512 return files_to_move, infodict
2513
2514 if self.params.get('keepvideo', False):
2515 for f in files_to_delete:
2516 files_to_move.setdefault(f, '')
2517 else:
2518 for old_filename in set(files_to_delete):
2519 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2520 try:
2521 os.remove(encodeFilename(old_filename))
2522 except (IOError, OSError):
2523 self.report_warning('Unable to remove downloaded original file')
2524 if old_filename in files_to_move:
2525 del files_to_move[old_filename]
2526 return files_to_move, infodict
2527
2528 def pre_process(self, ie_info):
2529 info = dict(ie_info)
2530 for pp in self._pps['beforedl']:
2531 info = self.run_pp(pp, info)[1]
2532 return info
2533
0202b52a 2534 def post_process(self, filename, ie_info, files_to_move={}):
8222d8de
JMF
2535 """Run all the postprocessors on the given file."""
2536 info = dict(ie_info)
2537 info['filepath'] = filename
de6000d9 2538 info['__files_to_move'] = {}
0202b52a 2539
5bfa4862 2540 for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2541 files_to_move, info = self.run_pp(pp, info, files_to_move)
de6000d9 2542 info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
5bfa4862 2543 for pp in self._pps['aftermove']:
de6000d9 2544 info = self.run_pp(pp, info, {})[1]
c1c9a79c 2545
5db07df6 2546 def _make_archive_id(self, info_dict):
e9fef7ee
S
2547 video_id = info_dict.get('id')
2548 if not video_id:
2549 return
5db07df6
PH
2550 # Future-proof against any change in case
2551 # and backwards compatibility with prior versions
e9fef7ee 2552 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2553 if extractor is None:
1211bb6d
S
2554 url = str_or_none(info_dict.get('url'))
2555 if not url:
2556 return
e9fef7ee
S
2557 # Try to find matching extractor for the URL and take its ie_key
2558 for ie in self._ies:
1211bb6d 2559 if ie.suitable(url):
e9fef7ee
S
2560 extractor = ie.ie_key()
2561 break
2562 else:
2563 return
d0757229 2564 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2565
2566 def in_download_archive(self, info_dict):
2567 fn = self.params.get('download_archive')
2568 if fn is None:
2569 return False
2570
2571 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2572 if not vid_id:
7012b23c 2573 return False # Incomplete video information
5db07df6 2574
a45e8619 2575 return vid_id in self.archive
c1c9a79c
PH
2576
2577 def record_download_archive(self, info_dict):
2578 fn = self.params.get('download_archive')
2579 if fn is None:
2580 return
5db07df6
PH
2581 vid_id = self._make_archive_id(info_dict)
2582 assert vid_id
c1c9a79c 2583 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2584 archive_file.write(vid_id + '\n')
a45e8619 2585 self.archive.add(vid_id)
dd82ffea 2586
8c51aa65 2587 @staticmethod
8abeeb94 2588 def format_resolution(format, default='unknown'):
fb04e403
PH
2589 if format.get('vcodec') == 'none':
2590 return 'audio only'
f49d89ee
PH
2591 if format.get('resolution') is not None:
2592 return format['resolution']
8c51aa65
JMF
2593 if format.get('height') is not None:
2594 if format.get('width') is not None:
6febd1c1 2595 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2596 else:
6febd1c1 2597 res = '%sp' % format['height']
f49d89ee 2598 elif format.get('width') is not None:
388ae76b 2599 res = '%dx?' % format['width']
8c51aa65 2600 else:
8abeeb94 2601 res = default
8c51aa65
JMF
2602 return res
2603
c57f7757
PH
2604 def _format_note(self, fdict):
2605 res = ''
2606 if fdict.get('ext') in ['f4f', 'f4m']:
2607 res += '(unsupported) '
32f90364
PH
2608 if fdict.get('language'):
2609 if res:
2610 res += ' '
9016d76f 2611 res += '[%s] ' % fdict['language']
c57f7757
PH
2612 if fdict.get('format_note') is not None:
2613 res += fdict['format_note'] + ' '
2614 if fdict.get('tbr') is not None:
2615 res += '%4dk ' % fdict['tbr']
2616 if fdict.get('container') is not None:
2617 if res:
2618 res += ', '
2619 res += '%s container' % fdict['container']
3089bc74
S
2620 if (fdict.get('vcodec') is not None
2621 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2622 if res:
2623 res += ', '
2624 res += fdict['vcodec']
91c7271a 2625 if fdict.get('vbr') is not None:
c57f7757
PH
2626 res += '@'
2627 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2628 res += 'video@'
2629 if fdict.get('vbr') is not None:
2630 res += '%4dk' % fdict['vbr']
fbb21cf5 2631 if fdict.get('fps') is not None:
5d583bdf
S
2632 if res:
2633 res += ', '
2634 res += '%sfps' % fdict['fps']
c57f7757
PH
2635 if fdict.get('acodec') is not None:
2636 if res:
2637 res += ', '
2638 if fdict['acodec'] == 'none':
2639 res += 'video only'
2640 else:
2641 res += '%-5s' % fdict['acodec']
2642 elif fdict.get('abr') is not None:
2643 if res:
2644 res += ', '
2645 res += 'audio'
2646 if fdict.get('abr') is not None:
2647 res += '@%3dk' % fdict['abr']
2648 if fdict.get('asr') is not None:
2649 res += ' (%5dHz)' % fdict['asr']
2650 if fdict.get('filesize') is not None:
2651 if res:
2652 res += ', '
2653 res += format_bytes(fdict['filesize'])
9732d77e
PH
2654 elif fdict.get('filesize_approx') is not None:
2655 if res:
2656 res += ', '
2657 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2658 return res
91c7271a 2659
76d321f6 2660 def _format_note_table(self, f):
2661 def join_fields(*vargs):
2662 return ', '.join((val for val in vargs if val != ''))
2663
2664 return join_fields(
2665 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2666 format_field(f, 'language', '[%s]'),
2667 format_field(f, 'format_note'),
2668 format_field(f, 'container', ignore=(None, f.get('ext'))),
2669 format_field(f, 'asr', '%5dHz'))
2670
c57f7757 2671 def list_formats(self, info_dict):
94badb25 2672 formats = info_dict.get('formats', [info_dict])
76d321f6 2673 new_format = self.params.get('listformats_table', False)
2674 if new_format:
2675 table = [
2676 [
2677 format_field(f, 'format_id'),
2678 format_field(f, 'ext'),
2679 self.format_resolution(f),
2680 format_field(f, 'fps', '%d'),
2681 '|',
2682 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2683 format_field(f, 'tbr', '%4dk'),
2684 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2685 '|',
2686 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2687 format_field(f, 'vbr', '%4dk'),
2688 format_field(f, 'acodec', default='unknown').replace('none', ''),
2689 format_field(f, 'abr', '%3dk'),
2690 format_field(f, 'asr', '%5dHz'),
2691 self._format_note_table(f)]
2692 for f in formats
2693 if f.get('preference') is None or f['preference'] >= -1000]
2694 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2695 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2696 else:
2697 table = [
2698 [
2699 format_field(f, 'format_id'),
2700 format_field(f, 'ext'),
2701 self.format_resolution(f),
2702 self._format_note(f)]
2703 for f in formats
2704 if f.get('preference') is None or f['preference'] >= -1000]
2705 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2706
76d321f6 2707 # if len(formats) > 1:
2708 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
cfb56d1a 2709 self.to_screen(
76d321f6 2710 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2711 header_line,
2712 table,
2713 delim=new_format,
2714 extraGap=(0 if new_format else 1),
2715 hideEmpty=new_format)))
cfb56d1a
PH
2716
2717 def list_thumbnails(self, info_dict):
2718 thumbnails = info_dict.get('thumbnails')
2719 if not thumbnails:
b7b72db9 2720 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2721 return
cfb56d1a
PH
2722
2723 self.to_screen(
2724 '[info] Thumbnails for %s:' % info_dict['id'])
2725 self.to_screen(render_table(
2726 ['ID', 'width', 'height', 'URL'],
2727 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2728
360e1ca5 2729 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2730 if not subtitles:
360e1ca5 2731 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2732 return
a504ced0 2733 self.to_screen(
edab9dbf
JMF
2734 'Available %s for %s:' % (name, video_id))
2735 self.to_screen(render_table(
2736 ['Language', 'formats'],
2737 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2738 for lang, formats in subtitles.items()]))
a504ced0 2739
dca08720
PH
2740 def urlopen(self, req):
2741 """ Start an HTTP download """
82d8a8b6 2742 if isinstance(req, compat_basestring):
67dda517 2743 req = sanitized_Request(req)
19a41fc6 2744 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2745
2746 def print_debug_header(self):
2747 if not self.params.get('verbose'):
2748 return
62fec3b2 2749
4192b51c 2750 if type('') is not compat_str:
067aa17e 2751 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2752 self.report_warning(
2753 'Your Python is broken! Update to a newer and supported version')
2754
c6afed48
PH
2755 stdout_encoding = getattr(
2756 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2757 encoding_str = (
734f90bb
PH
2758 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2759 locale.getpreferredencoding(),
2760 sys.getfilesystemencoding(),
c6afed48 2761 stdout_encoding,
b0472057 2762 self.get_encoding()))
4192b51c 2763 write_string(encoding_str, encoding=None)
734f90bb 2764
f74980cb 2765 self._write_string('[debug] yt-dlp version %s\n' % __version__)
e0986e31 2766 if _LAZY_LOADER:
f74980cb 2767 self._write_string('[debug] Lazy loading extractors enabled\n')
2768 if _PLUGIN_CLASSES:
2769 self._write_string(
2770 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
dca08720
PH
2771 try:
2772 sp = subprocess.Popen(
2773 ['git', 'rev-parse', '--short', 'HEAD'],
2774 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2775 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2776 out, err = process_communicate_or_kill(sp)
dca08720
PH
2777 out = out.decode().strip()
2778 if re.match('[0-9a-f]+', out):
f74980cb 2779 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 2780 except Exception:
dca08720
PH
2781 try:
2782 sys.exc_clear()
70a1165b 2783 except Exception:
dca08720 2784 pass
b300cda4
S
2785
2786 def python_implementation():
2787 impl_name = platform.python_implementation()
2788 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2789 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2790 return impl_name
2791
2792 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2793 platform.python_version(), python_implementation(),
2794 platform_name()))
d28b5171 2795
73fac4e9 2796 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2797 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2798 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2799 exe_str = ', '.join(
2800 '%s %s' % (exe, v)
2801 for exe, v in sorted(exe_versions.items())
2802 if v
2803 )
2804 if not exe_str:
2805 exe_str = 'none'
2806 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2807
2808 proxy_map = {}
2809 for handler in self._opener.handlers:
2810 if hasattr(handler, 'proxies'):
2811 proxy_map.update(handler.proxies)
734f90bb 2812 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2813
58b1f00d
PH
2814 if self.params.get('call_home', False):
2815 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2816 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2817 return
58b1f00d
PH
2818 latest_version = self.urlopen(
2819 'https://yt-dl.org/latest/version').read().decode('utf-8')
2820 if version_tuple(latest_version) > version_tuple(__version__):
2821 self.report_warning(
2822 'You are using an outdated version (newest version: %s)! '
2823 'See https://yt-dl.org/update if you need help updating.' %
2824 latest_version)
2825
e344693b 2826 def _setup_opener(self):
6ad14cab 2827 timeout_val = self.params.get('socket_timeout')
19a41fc6 2828 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2829
dca08720
PH
2830 opts_cookiefile = self.params.get('cookiefile')
2831 opts_proxy = self.params.get('proxy')
2832
2833 if opts_cookiefile is None:
2834 self.cookiejar = compat_cookiejar.CookieJar()
2835 else:
590bc6f6 2836 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2837 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2838 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2839 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2840
6a3f4c3f 2841 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2842 if opts_proxy is not None:
2843 if opts_proxy == '':
2844 proxies = {}
2845 else:
2846 proxies = {'http': opts_proxy, 'https': opts_proxy}
2847 else:
2848 proxies = compat_urllib_request.getproxies()
067aa17e 2849 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2850 if 'http' in proxies and 'https' not in proxies:
2851 proxies['https'] = proxies['http']
91410c9b 2852 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2853
2854 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2855 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2856 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2857 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2858 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2859
2860 # When passing our own FileHandler instance, build_opener won't add the
2861 # default FileHandler and allows us to disable the file protocol, which
2862 # can be used for malicious purposes (see
067aa17e 2863 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2864 file_handler = compat_urllib_request.FileHandler()
2865
2866 def file_open(*args, **kwargs):
cefecac1 2867 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2868 file_handler.file_open = file_open
2869
2870 opener = compat_urllib_request.build_opener(
fca6dba8 2871 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2872
dca08720
PH
2873 # Delete the default user-agent header, which would otherwise apply in
2874 # cases where our custom HTTP handler doesn't come into play
067aa17e 2875 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2876 opener.addheaders = []
2877 self._opener = opener
62fec3b2
PH
2878
2879 def encode(self, s):
2880 if isinstance(s, bytes):
2881 return s # Already encoded
2882
2883 try:
2884 return s.encode(self.get_encoding())
2885 except UnicodeEncodeError as err:
2886 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2887 raise
2888
2889 def get_encoding(self):
2890 encoding = self.params.get('encoding')
2891 if encoding is None:
2892 encoding = preferredencoding()
2893 return encoding
ec82d85a 2894
de6000d9 2895 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 2896 write_all = self.params.get('write_all_thumbnails', False)
2897 thumbnails = []
2898 if write_all or self.params.get('writethumbnail', False):
0202b52a 2899 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 2900 multiple = write_all and len(thumbnails) > 1
ec82d85a 2901
0202b52a 2902 ret = []
6c4fd172 2903 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 2904 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 2905 suffix = '%s.' % t['id'] if multiple else ''
2906 thumb_display_id = '%s ' % t['id'] if multiple else ''
de6000d9 2907 t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 2908
0c3d0f51 2909 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 2910 ret.append(suffix + thumb_ext)
ec82d85a
PH
2911 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2912 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2913 else:
2914 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2915 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2916 try:
2917 uf = self.urlopen(t['url'])
d3d89c32 2918 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 2919 shutil.copyfileobj(uf, thumbf)
de6000d9 2920 ret.append(suffix + thumb_ext)
ec82d85a
PH
2921 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2922 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2923 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2924 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2925 (t['url'], error_to_compat_str(err)))
6c4fd172 2926 if ret and not write_all:
2927 break
0202b52a 2928 return ret