]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
Merge pull request #187 from pukkandan/break-on-existing
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
ce02ed60 54 DownloadError,
c0384f22 55 encode_compat_str,
ce02ed60 56 encodeFilename,
9b9c5355 57 error_to_compat_str,
590bc6f6 58 expand_path,
ce02ed60 59 ExtractorError,
02dbf93f 60 format_bytes,
525ef922 61 formatSeconds,
773f291d 62 GeoRestrictedError,
c9969434 63 int_or_none,
773f291d 64 ISO3166Utils,
ce02ed60 65 locked_file,
dca08720 66 make_HTTPS_handler,
ce02ed60 67 MaxDownloadsReached,
cd6fc19e 68 orderedSet,
b7ab0590 69 PagedList,
083c9df9 70 parse_filesize,
91410c9b 71 PerRequestProxyHandler,
dca08720 72 platform_name,
eedb7ba5 73 PostProcessingError,
ce02ed60 74 preferredencoding,
eedb7ba5 75 prepend_extension,
51fb4995 76 register_socks_protocols,
cfb56d1a 77 render_table,
eedb7ba5 78 replace_extension,
ce02ed60
PH
79 SameFileError,
80 sanitize_filename,
1bb5c511 81 sanitize_path,
dcf77cf1 82 sanitize_url,
67dda517 83 sanitized_Request,
e5660ee6 84 std_headers,
1211bb6d 85 str_or_none,
ce02ed60 86 subtitles_filename,
ce02ed60 87 UnavailableVideoError,
29eb5174 88 url_basename,
58b1f00d 89 version_tuple,
ce02ed60
PH
90 write_json_file,
91 write_string,
1bab3437 92 YoutubeDLCookieJar,
6a3f4c3f 93 YoutubeDLCookieProcessor,
dca08720 94 YoutubeDLHandler,
fca6dba8 95 YoutubeDLRedirectHandler,
ce02ed60 96)
a0e07d31 97from .cache import Cache
e0986e31 98from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
4c54b89e 99from .extractor.openload import PhantomJSwrapper
3bc2ddcc 100from .downloader import get_suitable_downloader
4c83c967 101from .downloader.rtmp import rtmpdump_version
4f026faf 102from .postprocessor import (
f17f8651 103 FFmpegFixupM3u8PP,
62cd676c 104 FFmpegFixupM4aPP,
6271f1ca 105 FFmpegFixupStretchedPP,
4f026faf
PH
106 FFmpegMergerPP,
107 FFmpegPostProcessor,
57df9f53 108 FFmpegSubtitlesConvertorPP,
4f026faf
PH
109 get_postprocessor,
110)
dca08720 111from .version import __version__
8222d8de 112
e9c0cdd3
YCH
113if compat_os_name == 'nt':
114 import ctypes
115
2459b6e1 116
8222d8de
JMF
117class YoutubeDL(object):
118 """YoutubeDL class.
119
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
126
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
134
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
141
142 Available options:
143
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
180940e0 146 videopassword: Password for accessing a video.
1da50aa3
S
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
8222d8de
JMF
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
ad8915b7 153 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
525ef922 160 forceduration: Force printing duration.
8694c600 161 forcejson: Force printing info_dict as JSON.
63e0be34
PH
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
8222d8de 164 simulate: Do not download the video files.
d8600787 165 format: Video format code. See options.py for more information.
8222d8de 166 outtmpl: Template for output names.
bdc3fd2f
U
167 restrictfilenames: Do not allow "&" and spaces in file names.
168 trim_file_name: Limit length of filename (extension excluded).
8222d8de 169 ignoreerrors: Do not stop on download errors.
d22dec74 170 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
171 nooverwrites: Prevent overwriting files.
172 playliststart: Playlist item to start at.
173 playlistend: Playlist item to end at.
c14e88f0 174 playlist_items: Specific indices of playlist to download.
ff815fe6 175 playlistreverse: Download playlist items in reverse order.
75822ca7 176 playlistrandom: Download playlist items in random order.
8222d8de
JMF
177 matchtitle: Download only matching titles.
178 rejecttitle: Reject downloads for matching titles.
8bf9319e 179 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
180 logtostderr: Log messages to stderr instead of stdout.
181 writedescription: Write the video description to a .description file
182 writeinfojson: Write the video description to a .info.json file
1fb07d10 183 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 184 writethumbnail: Write the thumbnail image to a file
ec82d85a 185 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 186 writesubtitles: Write the video subtitles to a file
741dd8ea 187 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 188 allsubtitles: Downloads all the subtitles of the video
0b7f3118 189 (requires writesubtitles or writeautomaticsub)
8222d8de 190 listsubtitles: Lists all available subtitles for the video
a504ced0 191 subtitlesformat: The format code for subtitles
aa6a10c4 192 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
193 keepvideo: Keep the video file after post-processing
194 daterange: A DateRange object, download only if the upload_date is in the range.
195 skip_download: Skip the actual download of the video file
c35f9e72 196 cachedir: Location of the cache files in the filesystem.
a0e07d31 197 False to disable filesystem cache.
47192f92 198 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
199 age_limit: An integer representing the user's age in years.
200 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
201 min_views: An integer representing the minimum view count the video
202 must have in order to not be skipped.
203 Videos without view count information are always
204 downloaded. None for no limit.
205 max_views: An integer representing the maximum view count.
206 Videos that are more popular than that are not
207 downloaded.
208 Videos without view count information are always
209 downloaded. None for no limit.
210 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
211 Videos already present in the file are not downloaded
212 again.
ea6e0c2b 213 break_on_existing: Stop the download process after attempting to download a file that's
214 in the archive.
dca08720 215 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 216 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
217 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
218 At the moment, this is only supported by YouTube.
a1ee09e8 219 proxy: URL of the proxy server to use
38cce791 220 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 221 on geo-restricted sites.
e344693b 222 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
223 bidi_workaround: Work around buggy terminals without bidirectional text
224 support, using fridibi
a0ddb8a2 225 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 226 include_ads: Download ads as well
04b4d394
PH
227 default_search: Prepend this string if an input url is not valid.
228 'auto' for elaborate guessing
62fec3b2 229 encoding: Use this encoding instead of the system-specified.
e8ee972c 230 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
231 Pass in 'in_playlist' to only show this behavior for
232 playlist items.
4f026faf 233 postprocessors: A list of dictionaries, each with an entry
71b640cc 234 * key: The name of the postprocessor. See
cefecac1 235 youtube_dlc/postprocessor/__init__.py for a list.
4f026faf
PH
236 as well as any further keyword arguments for the
237 postprocessor.
71b640cc
PH
238 progress_hooks: A list of functions that get called on download
239 progress, with a dictionary with the entries
5cda4eda 240 * status: One of "downloading", "error", or "finished".
ee69b99a 241 Check this first and ignore unknown values.
71b640cc 242
5cda4eda 243 If status is one of "downloading", or "finished", the
ee69b99a
PH
244 following properties may also be present:
245 * filename: The final filename (always present)
5cda4eda 246 * tmpfilename: The filename we're currently writing to
71b640cc
PH
247 * downloaded_bytes: Bytes on disk
248 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
249 * total_bytes_estimate: Guess of the eventual file size,
250 None if unavailable.
251 * elapsed: The number of seconds since download started.
71b640cc
PH
252 * eta: The estimated time in seconds, None if unknown
253 * speed: The download speed in bytes/second, None if
254 unknown
5cda4eda
PH
255 * fragment_index: The counter of the currently
256 downloaded video fragment.
257 * fragment_count: The number of fragments (= individual
258 files that will be merged)
71b640cc
PH
259
260 Progress hooks are guaranteed to be called at least once
261 (with status "finished") if the download is successful.
45598f15 262 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
263 fixup: Automatically correct known faults of the file.
264 One of:
265 - "never": do nothing
266 - "warn": only emit a warning
267 - "detect_or_warn": check whether we can do anything
62cd676c 268 about it, warn otherwise (default)
504f20dd 269 source_address: Client-side IP address to bind to.
6ec6cb4e 270 call_home: Boolean, true iff we are allowed to contact the
cefecac1 271 youtube-dlc servers for debugging.
7aa589a5
S
272 sleep_interval: Number of seconds to sleep before each download when
273 used alone or a lower bound of a range for randomized
274 sleep before each download (minimum possible number
275 of seconds to sleep) when used along with
276 max_sleep_interval.
277 max_sleep_interval:Upper bound of a range for randomized sleep before each
278 download (maximum possible number of seconds to sleep).
279 Must only be used along with sleep_interval.
280 Actual sleep time will be a random float from range
281 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
282 listformats: Print an overview of available video formats and exit.
283 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
284 match_filter: A function that gets called with the info_dict of
285 every video.
286 If it returns a message, the video is ignored.
287 If it returns None, the video is downloaded.
288 match_filter_func in utils.py is one example for this.
7e5db8c9 289 no_color: Do not emit color codes in output.
0a840f58 290 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 291 HTTP header
0a840f58 292 geo_bypass_country:
773f291d
S
293 Two-letter ISO 3166-2 country code that will be used for
294 explicit geographic restriction bypassing via faking
504f20dd 295 X-Forwarded-For HTTP header
5f95927a
S
296 geo_bypass_ip_block:
297 IP range in CIDR notation that will be used similarly to
504f20dd 298 geo_bypass_country
71b640cc 299
85729c51
PH
300 The following options determine which downloader is picked:
301 external_downloader: Executable of the external downloader to call.
302 None or unset for standard (built-in) downloader.
bf09af3a
S
303 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
304 if True, otherwise use ffmpeg/avconv if False, otherwise
305 use downloader suggested by extractor if None.
fe7e0c98 306
8222d8de 307 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 308 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 309 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 310 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
311 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
312 http_chunk_size.
76b1bd67
JMF
313
314 The following options are used by the post processors:
d4a24f40
S
315 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
316 otherwise prefer ffmpeg.
c0b7d117
S
317 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
318 to the binary or its containing directory.
f72b0a60
S
319 postprocessor_args: A list of additional command-line arguments for the
320 postprocessor.
3836b02c 321
3600fd59
S
322 The following options are used by the Youtube extractor:
323 youtube_include_dash_manifest: If True (default), DASH manifests and related
324 data will be downloaded and processed by extractor.
325 You can reduce network I/O by disabling it if you don't
326 care about DASH.
8222d8de
JMF
327 """
328
c9969434
S
329 _NUMERIC_FIELDS = set((
330 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
331 'timestamp', 'upload_year', 'upload_month', 'upload_day',
332 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
333 'average_rating', 'comment_count', 'age_limit',
334 'start_time', 'end_time',
335 'chapter_number', 'season_number', 'episode_number',
336 'track_number', 'disc_number', 'release_year',
337 'playlist_index',
338 ))
339
8222d8de
JMF
340 params = None
341 _ies = []
342 _pps = []
343 _download_retcode = None
344 _num_downloads = None
345 _screen_file = None
346
3511266b 347 def __init__(self, params=None, auto_init=True):
8222d8de 348 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
349 if params is None:
350 params = {}
8222d8de 351 self._ies = []
56c73665 352 self._ies_instances = {}
8222d8de 353 self._pps = []
933605d7 354 self._progress_hooks = []
8222d8de
JMF
355 self._download_retcode = 0
356 self._num_downloads = 0
357 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 358 self._err_file = sys.stderr
4abf617b
S
359 self.params = {
360 # Default parameters
361 'nocheckcertificate': False,
362 }
363 self.params.update(params)
a0e07d31 364 self.cache = Cache(self)
a45e8619 365 self.archive = set()
ecdec191
JB
366
367 """Preload the archive, if any is specified"""
368 def preload_download_archive(self):
369 fn = self.params.get('download_archive')
370 if fn is None:
371 return False
372 try:
373 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
374 for line in archive_file:
a45e8619 375 self.archive.add(line.strip())
ecdec191
JB
376 except IOError as ioe:
377 if ioe.errno != errno.ENOENT:
378 raise
1d74d8d9 379 return False
ecdec191 380 return True
34308b30 381
be5df5ee
S
382 def check_deprecated(param, option, suggestion):
383 if self.params.get(param) is not None:
384 self.report_warning(
385 '%s is deprecated. Use %s instead.' % (option, suggestion))
386 return True
387 return False
388
1de7ea76
JB
389 if self.params.get('verbose'):
390 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
391
ecdec191
JB
392 preload_download_archive(self)
393
be5df5ee 394 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
395 if self.params.get('geo_verification_proxy') is None:
396 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
397
be5df5ee
S
398 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
399 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
400 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
401
0783b09b 402 if params.get('bidi_workaround', False):
1c088fa8
PH
403 try:
404 import pty
405 master, slave = pty.openpty()
003c69a8 406 width = compat_get_terminal_size().columns
1c088fa8
PH
407 if width is None:
408 width_args = []
409 else:
410 width_args = ['-w', str(width)]
5d681e96 411 sp_kwargs = dict(
1c088fa8
PH
412 stdin=subprocess.PIPE,
413 stdout=slave,
414 stderr=self._err_file)
5d681e96
PH
415 try:
416 self._output_process = subprocess.Popen(
417 ['bidiv'] + width_args, **sp_kwargs
418 )
419 except OSError:
5d681e96
PH
420 self._output_process = subprocess.Popen(
421 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
422 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 423 except OSError as ose:
66e7ace1 424 if ose.errno == errno.ENOENT:
6febd1c1 425 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
426 else:
427 raise
0783b09b 428
3089bc74
S
429 if (sys.platform != 'win32'
430 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
431 and not params.get('restrictfilenames', False)):
e9137224 432 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 433 self.report_warning(
6febd1c1 434 'Assuming --restrict-filenames since file system encoding '
1b725173 435 'cannot encode all characters. '
6febd1c1 436 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 437 self.params['restrictfilenames'] = True
34308b30 438
486dd09e
PH
439 if isinstance(params.get('outtmpl'), bytes):
440 self.report_warning(
441 'Parameter outtmpl is bytes, but should be a unicode string. '
442 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
443
dca08720
PH
444 self._setup_opener()
445
3511266b
PH
446 if auto_init:
447 self.print_debug_header()
448 self.add_default_info_extractors()
449
4f026faf
PH
450 for pp_def_raw in self.params.get('postprocessors', []):
451 pp_class = get_postprocessor(pp_def_raw['key'])
452 pp_def = dict(pp_def_raw)
453 del pp_def['key']
454 pp = pp_class(self, **compat_kwargs(pp_def))
455 self.add_post_processor(pp)
456
71b640cc
PH
457 for ph in self.params.get('progress_hooks', []):
458 self.add_progress_hook(ph)
459
51fb4995
YCH
460 register_socks_protocols()
461
7d4111ed
PH
462 def warn_if_short_id(self, argv):
463 # short YouTube ID starting with dash?
464 idxs = [
465 i for i, a in enumerate(argv)
466 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
467 if idxs:
468 correct_argv = (
cefecac1 469 ['youtube-dlc']
3089bc74
S
470 + [a for i, a in enumerate(argv) if i not in idxs]
471 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
472 )
473 self.report_warning(
474 'Long argument string detected. '
475 'Use -- to separate parameters and URLs, like this:\n%s\n' %
476 args_to_str(correct_argv))
477
8222d8de
JMF
478 def add_info_extractor(self, ie):
479 """Add an InfoExtractor object to the end of the list."""
480 self._ies.append(ie)
e52d7f85
JMF
481 if not isinstance(ie, type):
482 self._ies_instances[ie.ie_key()] = ie
483 ie.set_downloader(self)
8222d8de 484
56c73665
JMF
485 def get_info_extractor(self, ie_key):
486 """
487 Get an instance of an IE with name ie_key, it will try to get one from
488 the _ies list, if there's no instance it will create a new one and add
489 it to the extractor list.
490 """
491 ie = self._ies_instances.get(ie_key)
492 if ie is None:
493 ie = get_info_extractor(ie_key)()
494 self.add_info_extractor(ie)
495 return ie
496
023fa8c4
JMF
497 def add_default_info_extractors(self):
498 """
499 Add the InfoExtractors returned by gen_extractors to the end of the list
500 """
e52d7f85 501 for ie in gen_extractor_classes():
023fa8c4
JMF
502 self.add_info_extractor(ie)
503
8222d8de
JMF
504 def add_post_processor(self, pp):
505 """Add a PostProcessor object to the end of the chain."""
506 self._pps.append(pp)
507 pp.set_downloader(self)
508
933605d7
JMF
509 def add_progress_hook(self, ph):
510 """Add the progress hook (currently only for the file downloader)"""
511 self._progress_hooks.append(ph)
8ab470f1 512
1c088fa8 513 def _bidi_workaround(self, message):
5d681e96 514 if not hasattr(self, '_output_channel'):
1c088fa8
PH
515 return message
516
5d681e96 517 assert hasattr(self, '_output_process')
11b85ce6 518 assert isinstance(message, compat_str)
6febd1c1
PH
519 line_count = message.count('\n') + 1
520 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 521 self._output_process.stdin.flush()
6febd1c1 522 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 523 for _ in range(line_count))
6febd1c1 524 return res[:-len('\n')]
1c088fa8 525
8222d8de 526 def to_screen(self, message, skip_eol=False):
0783b09b
PH
527 """Print message to stdout if not in quiet mode."""
528 return self.to_stdout(message, skip_eol, check_quiet=True)
529
734f90bb 530 def _write_string(self, s, out=None):
b58ddb32 531 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 532
0783b09b 533 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 534 """Print message to stdout if not in quiet mode."""
8bf9319e 535 if self.params.get('logger'):
43afe285 536 self.params['logger'].debug(message)
0783b09b 537 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 538 message = self._bidi_workaround(message)
6febd1c1 539 terminator = ['\n', ''][skip_eol]
8222d8de 540 output = message + terminator
1c088fa8 541
734f90bb 542 self._write_string(output, self._screen_file)
8222d8de
JMF
543
544 def to_stderr(self, message):
545 """Print message to stderr."""
11b85ce6 546 assert isinstance(message, compat_str)
8bf9319e 547 if self.params.get('logger'):
43afe285
IB
548 self.params['logger'].error(message)
549 else:
1c088fa8 550 message = self._bidi_workaround(message)
6febd1c1 551 output = message + '\n'
734f90bb 552 self._write_string(output, self._err_file)
8222d8de 553
1e5b9a95
PH
554 def to_console_title(self, message):
555 if not self.params.get('consoletitle', False):
556 return
4bede0d8
C
557 if compat_os_name == 'nt':
558 if ctypes.windll.kernel32.GetConsoleWindow():
559 # c_wchar_p() might not be necessary if `message` is
560 # already of type unicode()
561 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 562 elif 'TERM' in os.environ:
734f90bb 563 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 564
bdde425c
PH
565 def save_console_title(self):
566 if not self.params.get('consoletitle', False):
567 return
94c3442e
S
568 if self.params.get('simulate', False):
569 return
4bede0d8 570 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 571 # Save the title on stack
734f90bb 572 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
573
574 def restore_console_title(self):
575 if not self.params.get('consoletitle', False):
576 return
94c3442e
S
577 if self.params.get('simulate', False):
578 return
4bede0d8 579 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 580 # Restore the title from stack
734f90bb 581 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
582
583 def __enter__(self):
584 self.save_console_title()
585 return self
586
587 def __exit__(self, *args):
588 self.restore_console_title()
f89197d7 589
dca08720 590 if self.params.get('cookiefile') is not None:
1bab3437 591 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 592
8222d8de
JMF
593 def trouble(self, message=None, tb=None):
594 """Determine action to take when a download problem appears.
595
596 Depending on if the downloader has been configured to ignore
597 download errors or not, this method may throw an exception or
598 not when errors are found, after printing the message.
599
600 tb, if given, is additional traceback information.
601 """
602 if message is not None:
603 self.to_stderr(message)
604 if self.params.get('verbose'):
605 if tb is None:
606 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 607 tb = ''
8222d8de 608 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 609 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 610 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
611 else:
612 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 613 tb = ''.join(tb_data)
8222d8de
JMF
614 self.to_stderr(tb)
615 if not self.params.get('ignoreerrors', False):
616 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
617 exc_info = sys.exc_info()[1].exc_info
618 else:
619 exc_info = sys.exc_info()
620 raise DownloadError(message, exc_info)
621 self._download_retcode = 1
622
623 def report_warning(self, message):
624 '''
625 Print the message to stderr, it will be prefixed with 'WARNING:'
626 If stderr is a tty file the 'WARNING:' will be colored
627 '''
6d07ce01
JMF
628 if self.params.get('logger') is not None:
629 self.params['logger'].warning(message)
8222d8de 630 else:
ad8915b7
PH
631 if self.params.get('no_warnings'):
632 return
e9c0cdd3 633 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
634 _msg_header = '\033[0;33mWARNING:\033[0m'
635 else:
636 _msg_header = 'WARNING:'
637 warning_message = '%s %s' % (_msg_header, message)
638 self.to_stderr(warning_message)
8222d8de
JMF
639
640 def report_error(self, message, tb=None):
641 '''
642 Do the same as trouble, but prefixes the message with 'ERROR:', colored
643 in red if stderr is a tty file.
644 '''
e9c0cdd3 645 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 646 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 647 else:
6febd1c1
PH
648 _msg_header = 'ERROR:'
649 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
650 self.trouble(error_message, tb)
651
8222d8de
JMF
652 def report_file_already_downloaded(self, file_name):
653 """Report file has already been fully downloaded."""
654 try:
6febd1c1 655 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 656 except UnicodeEncodeError:
6febd1c1 657 self.to_screen('[download] The file has already been downloaded')
8222d8de 658
8222d8de
JMF
659 def prepare_filename(self, info_dict):
660 """Generate the output filename."""
661 try:
662 template_dict = dict(info_dict)
663
664 template_dict['epoch'] = int(time.time())
665 autonumber_size = self.params.get('autonumber_size')
666 if autonumber_size is None:
667 autonumber_size = 5
89db639d 668 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
669 if template_dict.get('resolution') is None:
670 if template_dict.get('width') and template_dict.get('height'):
671 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
672 elif template_dict.get('height'):
805ef3c6 673 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 674 elif template_dict.get('width'):
51ce9117 675 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 676
586a91b6 677 sanitize = lambda k, v: sanitize_filename(
45598aab 678 compat_str(v),
1bb5c511 679 restricted=self.params.get('restrictfilenames'),
40df485f 680 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 681 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 682 for k, v in template_dict.items()
f0e14fdd 683 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 684 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 685
b3613d36 686 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 687
89db639d
S
688 # For fields playlist_index and autonumber convert all occurrences
689 # of %(field)s to %(field)0Nd for backward compatibility
690 field_size_compat_map = {
691 'playlist_index': len(str(template_dict['n_entries'])),
692 'autonumber': autonumber_size,
693 }
694 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
695 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
696 if mobj:
697 outtmpl = re.sub(
698 FIELD_SIZE_COMPAT_RE,
699 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
700 outtmpl)
701
d0d9ade4
S
702 # Missing numeric fields used together with integer presentation types
703 # in format specification will break the argument substitution since
704 # string 'NA' is returned for missing fields. We will patch output
705 # template for missing fields to meet string presentation type.
c9969434 706 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
707 if numeric_field not in template_dict:
708 # As of [1] format syntax is:
709 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
710 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
711 FORMAT_RE = r'''(?x)
712 (?<!%)
713 %
714 \({0}\) # mapping key
715 (?:[#0\-+ ]+)? # conversion flags (optional)
716 (?:\d+)? # minimum field width (optional)
717 (?:\.\d+)? # precision (optional)
718 [hlL]? # length modifier (optional)
719 [diouxXeEfFgGcrs%] # conversion type
720 '''
721 outtmpl = re.sub(
722 FORMAT_RE.format(numeric_field),
723 r'%({0})s'.format(numeric_field), outtmpl)
724
15da37c7
S
725 # expand_path translates '%%' into '%' and '$$' into '$'
726 # correspondingly that is not what we want since we need to keep
727 # '%%' intact for template dict substitution step. Working around
728 # with boundary-alike separator hack.
961ea474 729 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
730 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
731
732 # outtmpl should be expand_path'ed before template dict substitution
733 # because meta fields may contain env variables we don't want to
734 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
735 # title "Hello $PATH", we don't want `$PATH` to be expanded.
736 filename = expand_path(outtmpl).replace(sep, '') % template_dict
737
bdc3fd2f
U
738 # https://github.com/blackjack4494/youtube-dlc/issues/85
739 trim_file_name = self.params.get('trim_file_name', False)
740 if trim_file_name:
741 fn_groups = filename.rsplit('.')
742 ext = fn_groups[-1]
743 sub_ext = ''
744 if len(fn_groups) > 2:
745 sub_ext = fn_groups[-2]
746 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
747
3a0d2f52
S
748 # Temporary fix for #4787
749 # 'Treat' all problem characters by passing filename through preferredencoding
750 # to workaround encoding issues with subprocess on python2 @ Windows
751 if sys.version_info < (3, 0) and sys.platform == 'win32':
752 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 753 return sanitize_path(filename)
8222d8de 754 except ValueError as err:
6febd1c1 755 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
756 return None
757
442c37b7 758 def _match_entry(self, info_dict, incomplete):
ecdec191 759 """ Returns None if the file should be downloaded """
8222d8de 760
6febd1c1 761 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
762 if 'title' in info_dict:
763 # This can happen when we're just evaluating the playlist
764 title = info_dict['title']
765 matchtitle = self.params.get('matchtitle', False)
766 if matchtitle:
767 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 768 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
769 rejecttitle = self.params.get('rejecttitle', False)
770 if rejecttitle:
771 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 772 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 773 date = info_dict.get('upload_date')
8222d8de
JMF
774 if date is not None:
775 dateRange = self.params.get('daterange', DateRange())
776 if date not in dateRange:
6febd1c1 777 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 778 view_count = info_dict.get('view_count')
5fe18bdb
PH
779 if view_count is not None:
780 min_views = self.params.get('min_views')
781 if min_views is not None and view_count < min_views:
6febd1c1 782 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
783 max_views = self.params.get('max_views')
784 if max_views is not None and view_count > max_views:
6febd1c1 785 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 786 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 787 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 788 if self.in_download_archive(info_dict):
6febd1c1 789 return '%s has already been recorded in archive' % video_title
347de493 790
442c37b7
PH
791 if not incomplete:
792 match_filter = self.params.get('match_filter')
793 if match_filter is not None:
794 ret = match_filter(info_dict)
795 if ret is not None:
796 return ret
347de493 797
8222d8de 798 return None
fe7e0c98 799
b6c45014
JMF
800 @staticmethod
801 def add_extra_info(info_dict, extra_info):
802 '''Set the keys from extra_info in info dict if they are missing'''
803 for key, value in extra_info.items():
804 info_dict.setdefault(key, value)
805
0704d222 806 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 807 process=True, force_generic_extractor=False):
8222d8de
JMF
808 '''
809 Returns a list with a dictionary for each video we find.
810 If 'download', also downloads the videos.
811 extra_info is a dict containing the extra values to add to each result
613b2d9d 812 '''
fe7e0c98 813
61aa5ba3 814 if not ie_key and force_generic_extractor:
d22dec74
S
815 ie_key = 'Generic'
816
8222d8de 817 if ie_key:
56c73665 818 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
819 else:
820 ies = self._ies
821
822 for ie in ies:
823 if not ie.suitable(url):
824 continue
825
9a68de12 826 ie_key = ie.ie_key()
827 ie = self.get_info_extractor(ie_key)
8222d8de 828 if not ie.working():
6febd1c1
PH
829 self.report_warning('The program functionality for this site has been marked as broken, '
830 'and will probably not work.')
8222d8de
JMF
831
832 try:
9a68de12 833 try:
834 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
fe5caa2a 835 except (AssertionError, IndexError):
9a68de12 836 temp_id = None
837 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
fe5caa2a 838 self.to_screen("[%s] %s: has already been recorded in archive" % (
9a68de12 839 ie_key, temp_id))
840 break
841
8222d8de 842 ie_result = ie.extract(url)
5f6a1245 843 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
844 break
845 if isinstance(ie_result, list):
846 # Backwards compatibility: old IE result format
8222d8de
JMF
847 ie_result = {
848 '_type': 'compat_list',
849 'entries': ie_result,
850 }
0704d222
U
851 if info_dict:
852 if info_dict.get('id'):
853 ie_result['id'] = info_dict['id']
854 if info_dict.get('title'):
855 ie_result['title'] = info_dict['title']
ea38e55f 856 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
857 if process:
858 return self.process_ie_result(ie_result, download, extra_info)
859 else:
860 return ie_result
773f291d
S
861 except GeoRestrictedError as e:
862 msg = e.msg
863 if e.countries:
864 msg += '\nThis video is available in %s.' % ', '.join(
865 map(ISO3166Utils.short2full, e.countries))
866 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
867 self.report_error(msg)
868 break
fb043a6e 869 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 870 self.report_error(compat_str(e), e.format_traceback())
8222d8de 871 break
d3e5bbf4
PH
872 except MaxDownloadsReached:
873 raise
8222d8de
JMF
874 except Exception as e:
875 if self.params.get('ignoreerrors', False):
9b9c5355 876 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
877 break
878 else:
879 raise
880 else:
1a489545 881 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 882
ea38e55f
PH
883 def add_default_extra_info(self, ie_result, ie, url):
884 self.add_extra_info(ie_result, {
885 'extractor': ie.IE_NAME,
886 'webpage_url': url,
887 'webpage_url_basename': url_basename(url),
888 'extractor_key': ie.ie_key(),
889 })
890
8222d8de
JMF
891 def process_ie_result(self, ie_result, download=True, extra_info={}):
892 """
893 Take the result of the ie(may be modified) and resolve all unresolved
894 references (URLs, playlist items).
895
896 It will also download the videos if 'download'.
897 Returns the resolved ie_result.
898 """
e8ee972c
PH
899 result_type = ie_result.get('_type', 'video')
900
057a5206 901 if result_type in ('url', 'url_transparent'):
134c6ea8 902 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 903 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
904 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
905 or extract_flat is True):
d06daf23
S
906 self.__forced_printings(
907 ie_result, self.prepare_filename(ie_result),
908 incomplete=True)
e8ee972c
PH
909 return ie_result
910
8222d8de 911 if result_type == 'video':
b6c45014 912 self.add_extra_info(ie_result, extra_info)
feee2ecf 913 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
914 elif result_type == 'url':
915 # We have to add extra_info to the results because it may be
916 # contained in a playlist
917 return self.extract_info(ie_result['url'],
0704d222 918 download, info_dict=ie_result,
8222d8de
JMF
919 ie_key=ie_result.get('ie_key'),
920 extra_info=extra_info)
7fc3fa05
PH
921 elif result_type == 'url_transparent':
922 # Use the information from the embedding page
923 info = self.extract_info(
924 ie_result['url'], ie_key=ie_result.get('ie_key'),
925 extra_info=extra_info, download=False, process=False)
926
1640eb09
S
927 # extract_info may return None when ignoreerrors is enabled and
928 # extraction failed with an error, don't crash and return early
929 # in this case
930 if not info:
931 return info
932
412c617d
PH
933 force_properties = dict(
934 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 935 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
936 if f in force_properties:
937 del force_properties[f]
938 new_result = info.copy()
939 new_result.update(force_properties)
7fc3fa05 940
0563f7ac
S
941 # Extracted info may not be a video result (i.e.
942 # info.get('_type', 'video') != video) but rather an url or
943 # url_transparent. In such cases outer metadata (from ie_result)
944 # should be propagated to inner one (info). For this to happen
945 # _type of info should be overridden with url_transparent. This
067aa17e 946 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
947 if new_result.get('_type') == 'url':
948 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
949
950 return self.process_ie_result(
951 new_result, download=download, extra_info=extra_info)
40fcba5e 952 elif result_type in ('playlist', 'multi_video'):
8222d8de 953 # We process each entry in the playlist
d800609c 954 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 955 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
956
957 playlist_results = []
958
8222d8de 959 playliststart = self.params.get('playliststart', 1) - 1
d800609c 960 playlistend = self.params.get('playlistend')
a19fd00c 961 # For backwards compatibility, interpret -1 as whole list
8222d8de 962 if playlistend == -1:
a19fd00c 963 playlistend = None
8222d8de 964
d800609c 965 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
966 playlistitems = None
967 if playlistitems_str is not None:
968 def iter_playlistitems(format):
969 for string_segment in format.split(','):
970 if '-' in string_segment:
971 start, end = string_segment.split('-')
972 for item in range(int(start), int(end) + 1):
973 yield int(item)
974 else:
975 yield int(string_segment)
cd6fc19e 976 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
c14e88f0 977
b82f815f 978 ie_entries = ie_result['entries']
7e85e872
S
979
980 def make_playlistitems_entries(list_ie_entries):
981 num_entries = len(list_ie_entries)
982 return [
983 list_ie_entries[i - 1] for i in playlistitems
984 if -num_entries <= i - 1 < num_entries]
985
986 def report_download(num_entries):
987 self.to_screen(
988 '[%s] playlist %s: Downloading %d videos' %
989 (ie_result['extractor'], playlist, num_entries))
990
b82f815f
PH
991 if isinstance(ie_entries, list):
992 n_all_entries = len(ie_entries)
c14e88f0 993 if playlistitems:
7e85e872 994 entries = make_playlistitems_entries(ie_entries)
c14e88f0
PH
995 else:
996 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
997 n_entries = len(entries)
998 self.to_screen(
611c1dd9 999 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 1000 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 1001 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
1002 if playlistitems:
1003 entries = []
1004 for item in playlistitems:
1005 entries.extend(ie_entries.getslice(
1006 item - 1, item
1007 ))
1008 else:
1009 entries = ie_entries.getslice(
1010 playliststart, playlistend)
b7ab0590 1011 n_entries = len(entries)
7e85e872 1012 report_download(n_entries)
b82f815f 1013 else: # iterable
c14e88f0 1014 if playlistitems:
5871ebac
S
1015 entries = make_playlistitems_entries(list(itertools.islice(
1016 ie_entries, 0, max(playlistitems))))
c14e88f0
PH
1017 else:
1018 entries = list(itertools.islice(
1019 ie_entries, playliststart, playlistend))
b82f815f 1020 n_entries = len(entries)
7e85e872 1021 report_download(n_entries)
8222d8de 1022
ff815fe6
MS
1023 if self.params.get('playlistreverse', False):
1024 entries = entries[::-1]
1025
75822ca7
TC
1026 if self.params.get('playlistrandom', False):
1027 random.shuffle(entries)
1028
0016b84e
S
1029 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1030
fe7e0c98 1031 for i, entry in enumerate(entries, 1):
734ea11e 1032 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
0016b84e
S
1033 # This __x_forwarded_for_ip thing is a bit ugly but requires
1034 # minimal changes
1035 if x_forwarded_for:
1036 entry['__x_forwarded_for_ip'] = x_forwarded_for
8222d8de 1037 extra = {
c6b4132a 1038 'n_entries': n_entries,
fe7e0c98 1039 'playlist': playlist,
a1cf99d0
PH
1040 'playlist_id': ie_result.get('id'),
1041 'playlist_title': ie_result.get('title'),
3961c6cb
S
1042 'playlist_uploader': ie_result.get('uploader'),
1043 'playlist_uploader_id': ie_result.get('uploader_id'),
de1121d7 1044 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
b6c45014 1045 'extractor': ie_result['extractor'],
9103bbc5 1046 'webpage_url': ie_result['webpage_url'],
29eb5174 1047 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1048 'extractor_key': ie_result['extractor_key'],
fe7e0c98 1049 }
7012b23c 1050
442c37b7 1051 reason = self._match_entry(entry, incomplete=True)
7012b23c 1052 if reason is not None:
ea6e0c2b 1053 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1054 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1055 break
1056 else:
1057 self.to_screen('[download] ' + reason)
1058 continue
7012b23c 1059
8222d8de
JMF
1060 entry_result = self.process_ie_result(entry,
1061 download=download,
1062 extra_info=extra)
1063 playlist_results.append(entry_result)
1064 ie_result['entries'] = playlist_results
371c3b79 1065 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
1066 return ie_result
1067 elif result_type == 'compat_list':
c9bf4114
PH
1068 self.report_warning(
1069 'Extractor %s returned a compat_list result. '
1070 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1071
8222d8de 1072 def _fixup(r):
9e1a5b84
JW
1073 self.add_extra_info(
1074 r,
9103bbc5
JMF
1075 {
1076 'extractor': ie_result['extractor'],
1077 'webpage_url': ie_result['webpage_url'],
29eb5174 1078 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1079 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1080 }
1081 )
8222d8de
JMF
1082 return r
1083 ie_result['entries'] = [
b6c45014 1084 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1085 for r in ie_result['entries']
1086 ]
1087 return ie_result
1088 else:
1089 raise Exception('Invalid result type: %s' % result_type)
1090
67134eab
JMF
1091 def _build_format_filter(self, filter_spec):
1092 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1093
1094 OPERATORS = {
1095 '<': operator.lt,
1096 '<=': operator.le,
1097 '>': operator.gt,
1098 '>=': operator.ge,
1099 '=': operator.eq,
1100 '!=': operator.ne,
1101 }
67134eab 1102 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1103 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1104 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1105 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1106 $
083c9df9 1107 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1108 m = operator_rex.search(filter_spec)
9ddb6925
S
1109 if m:
1110 try:
1111 comparison_value = int(m.group('value'))
1112 except ValueError:
1113 comparison_value = parse_filesize(m.group('value'))
1114 if comparison_value is None:
1115 comparison_value = parse_filesize(m.group('value') + 'B')
1116 if comparison_value is None:
1117 raise ValueError(
1118 'Invalid value %r in format specification %r' % (
67134eab 1119 m.group('value'), filter_spec))
9ddb6925
S
1120 op = OPERATORS[m.group('op')]
1121
083c9df9 1122 if not m:
9ddb6925
S
1123 STR_OPERATORS = {
1124 '=': operator.eq,
10d33b34
YCH
1125 '^=': lambda attr, value: attr.startswith(value),
1126 '$=': lambda attr, value: attr.endswith(value),
1127 '*=': lambda attr, value: value in attr,
9ddb6925 1128 }
67134eab 1129 str_operator_rex = re.compile(r'''(?x)
d5aacf9a 1130 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
2cc779f4 1131 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1132 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1133 \s*$
9ddb6925 1134 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1135 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1136 if m:
1137 comparison_value = m.group('value')
2cc779f4
S
1138 str_op = STR_OPERATORS[m.group('op')]
1139 if m.group('negation'):
e118a879 1140 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1141 else:
1142 op = str_op
083c9df9 1143
9ddb6925 1144 if not m:
67134eab 1145 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1146
1147 def _filter(f):
1148 actual_value = f.get(m.group('key'))
1149 if actual_value is None:
1150 return m.group('none_inclusive')
1151 return op(actual_value, comparison_value)
67134eab
JMF
1152 return _filter
1153
0017d9ad 1154 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1155
af0f7428
S
1156 def can_merge():
1157 merger = FFmpegMergerPP(self)
1158 return merger.available and merger.can_merge()
1159
1160 def prefer_best():
0017d9ad 1161 if self.params.get('simulate', False):
af0f7428 1162 return False
0017d9ad 1163 if not download:
0017d9ad 1164 return False
af0f7428
S
1165 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1166 return True
0017d9ad 1167 if info_dict.get('is_live'):
af0f7428
S
1168 return True
1169 if not can_merge():
1170 return True
1171 return False
1172
1173 req_format_list = ['bestvideo+bestaudio', 'best']
1174 if prefer_best():
1175 req_format_list.reverse()
0017d9ad
S
1176 return '/'.join(req_format_list)
1177
67134eab
JMF
1178 def build_format_selector(self, format_spec):
1179 def syntax_error(note, start):
1180 message = (
1181 'Invalid format specification: '
1182 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1183 return SyntaxError(message)
1184
1185 PICKFIRST = 'PICKFIRST'
1186 MERGE = 'MERGE'
1187 SINGLE = 'SINGLE'
0130afb7 1188 GROUP = 'GROUP'
67134eab
JMF
1189 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1190
1191 def _parse_filter(tokens):
1192 filter_parts = []
1193 for type, string, start, _, _ in tokens:
1194 if type == tokenize.OP and string == ']':
1195 return ''.join(filter_parts)
1196 else:
1197 filter_parts.append(string)
1198
232541df 1199 def _remove_unused_ops(tokens):
17cc1534 1200 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1201 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1202 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1203 last_string, last_start, last_end, last_line = None, None, None, None
1204 for type, string, start, end, line in tokens:
1205 if type == tokenize.OP and string == '[':
1206 if last_string:
1207 yield tokenize.NAME, last_string, last_start, last_end, last_line
1208 last_string = None
1209 yield type, string, start, end, line
1210 # everything inside brackets will be handled by _parse_filter
1211 for type, string, start, end, line in tokens:
1212 yield type, string, start, end, line
1213 if type == tokenize.OP and string == ']':
1214 break
1215 elif type == tokenize.OP and string in ALLOWED_OPS:
1216 if last_string:
1217 yield tokenize.NAME, last_string, last_start, last_end, last_line
1218 last_string = None
1219 yield type, string, start, end, line
1220 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1221 if not last_string:
1222 last_string = string
1223 last_start = start
1224 last_end = end
1225 else:
1226 last_string += string
1227 if last_string:
1228 yield tokenize.NAME, last_string, last_start, last_end, last_line
1229
cf2ac6df 1230 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1231 selectors = []
1232 current_selector = None
1233 for type, string, start, _, _ in tokens:
1234 # ENCODING is only defined in python 3.x
1235 if type == getattr(tokenize, 'ENCODING', None):
1236 continue
1237 elif type in [tokenize.NAME, tokenize.NUMBER]:
1238 current_selector = FormatSelector(SINGLE, string, [])
1239 elif type == tokenize.OP:
cf2ac6df
JMF
1240 if string == ')':
1241 if not inside_group:
1242 # ')' will be handled by the parentheses group
1243 tokens.restore_last_token()
67134eab 1244 break
cf2ac6df 1245 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1246 tokens.restore_last_token()
1247 break
cf2ac6df
JMF
1248 elif inside_choice and string == ',':
1249 tokens.restore_last_token()
1250 break
1251 elif string == ',':
0a31a350
JMF
1252 if not current_selector:
1253 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1254 selectors.append(current_selector)
1255 current_selector = None
1256 elif string == '/':
d96d604e
JMF
1257 if not current_selector:
1258 raise syntax_error('"/" must follow a format selector', start)
67134eab 1259 first_choice = current_selector
cf2ac6df 1260 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1261 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1262 elif string == '[':
1263 if not current_selector:
1264 current_selector = FormatSelector(SINGLE, 'best', [])
1265 format_filter = _parse_filter(tokens)
1266 current_selector.filters.append(format_filter)
0130afb7
JMF
1267 elif string == '(':
1268 if current_selector:
1269 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1270 group = _parse_format_selection(tokens, inside_group=True)
1271 current_selector = FormatSelector(GROUP, group, [])
67134eab 1272 elif string == '+':
d03cfdce 1273 if not current_selector:
1274 raise syntax_error('Unexpected "+"', start)
1275 selector_1 = current_selector
1276 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1277 if not selector_2:
1278 raise syntax_error('Expected a selector', start)
1279 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1280 else:
1281 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1282 elif type == tokenize.ENDMARKER:
1283 break
1284 if current_selector:
1285 selectors.append(current_selector)
1286 return selectors
1287
1288 def _build_selector_function(selector):
1289 if isinstance(selector, list):
1290 fs = [_build_selector_function(s) for s in selector]
1291
317f7ab6 1292 def selector_function(ctx):
67134eab 1293 for f in fs:
317f7ab6 1294 for format in f(ctx):
67134eab
JMF
1295 yield format
1296 return selector_function
0130afb7
JMF
1297 elif selector.type == GROUP:
1298 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1299 elif selector.type == PICKFIRST:
1300 fs = [_build_selector_function(s) for s in selector.selector]
1301
317f7ab6 1302 def selector_function(ctx):
67134eab 1303 for f in fs:
317f7ab6 1304 picked_formats = list(f(ctx))
67134eab
JMF
1305 if picked_formats:
1306 return picked_formats
1307 return []
1308 elif selector.type == SINGLE:
1309 format_spec = selector.selector
1310
317f7ab6
S
1311 def selector_function(ctx):
1312 formats = list(ctx['formats'])
bb8e5536
JMF
1313 if not formats:
1314 return
5acfa126
JMF
1315 if format_spec == 'all':
1316 for f in formats:
1317 yield f
1318 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1319 format_idx = 0 if format_spec == 'worst' else -1
1320 audiovideo_formats = [
1321 f for f in formats
1322 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1323 if audiovideo_formats:
1324 yield audiovideo_formats[format_idx]
317f7ab6
S
1325 # for extractors with incomplete formats (audio only (soundcloud)
1326 # or video only (imgur)) we will fallback to best/worst
1327 # {video,audio}-only format
1328 elif ctx['incomplete_formats']:
67134eab
JMF
1329 yield formats[format_idx]
1330 elif format_spec == 'bestaudio':
1331 audio_formats = [
1332 f for f in formats
1333 if f.get('vcodec') == 'none']
1334 if audio_formats:
1335 yield audio_formats[-1]
1336 elif format_spec == 'worstaudio':
1337 audio_formats = [
1338 f for f in formats
1339 if f.get('vcodec') == 'none']
1340 if audio_formats:
1341 yield audio_formats[0]
1342 elif format_spec == 'bestvideo':
1343 video_formats = [
1344 f for f in formats
1345 if f.get('acodec') == 'none']
1346 if video_formats:
1347 yield video_formats[-1]
1348 elif format_spec == 'worstvideo':
1349 video_formats = [
1350 f for f in formats
1351 if f.get('acodec') == 'none']
1352 if video_formats:
1353 yield video_formats[0]
1354 else:
1355 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1356 if format_spec in extensions:
1357 filter_f = lambda f: f['ext'] == format_spec
1358 else:
1359 filter_f = lambda f: f['format_id'] == format_spec
1360 matches = list(filter(filter_f, formats))
1361 if matches:
1362 yield matches[-1]
1363 elif selector.type == MERGE:
d03cfdce 1364 def _merge(formats_pair):
1365 format_1, format_2 = formats_pair
1366
1367 formats_info = []
1368 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1369 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1370
1371 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1372 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1373
1374 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1375 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1376
1377 output_ext = self.params.get('merge_output_format')
1378 if not output_ext:
1379 if the_only_video:
1380 output_ext = the_only_video['ext']
1381 elif the_only_audio and not video_fmts:
1382 output_ext = the_only_audio['ext']
1383 else:
1384 output_ext = 'mkv'
1385
1386 new_dict = {
67134eab 1387 'requested_formats': formats_info,
d03cfdce 1388 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1389 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1390 'ext': output_ext,
1391 }
d03cfdce 1392
1393 if the_only_video:
1394 new_dict.update({
1395 'width': the_only_video.get('width'),
1396 'height': the_only_video.get('height'),
1397 'resolution': the_only_video.get('resolution'),
1398 'fps': the_only_video.get('fps'),
1399 'vcodec': the_only_video.get('vcodec'),
1400 'vbr': the_only_video.get('vbr'),
1401 'stretched_ratio': the_only_video.get('stretched_ratio'),
1402 })
1403
1404 if the_only_audio:
1405 new_dict.update({
1406 'acodec': the_only_audio.get('acodec'),
1407 'abr': the_only_audio.get('abr'),
1408 })
1409
1410 return new_dict
1411
1412 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1413
317f7ab6
S
1414 def selector_function(ctx):
1415 for pair in itertools.product(
d03cfdce 1416 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1417 yield _merge(pair)
083c9df9 1418
67134eab 1419 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1420
317f7ab6
S
1421 def final_selector(ctx):
1422 ctx_copy = copy.deepcopy(ctx)
67134eab 1423 for _filter in filters:
317f7ab6
S
1424 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1425 return selector_function(ctx_copy)
67134eab 1426 return final_selector
083c9df9 1427
67134eab 1428 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1429 try:
232541df 1430 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1431 except tokenize.TokenError:
1432 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1433
1434 class TokenIterator(object):
1435 def __init__(self, tokens):
1436 self.tokens = tokens
1437 self.counter = 0
1438
1439 def __iter__(self):
1440 return self
1441
1442 def __next__(self):
1443 if self.counter >= len(self.tokens):
1444 raise StopIteration()
1445 value = self.tokens[self.counter]
1446 self.counter += 1
1447 return value
1448
1449 next = __next__
1450
1451 def restore_last_token(self):
1452 self.counter -= 1
1453
1454 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1455 return _build_selector_function(parsed_selector)
a9c58ad9 1456
e5660ee6
JMF
1457 def _calc_headers(self, info_dict):
1458 res = std_headers.copy()
1459
1460 add_headers = info_dict.get('http_headers')
1461 if add_headers:
1462 res.update(add_headers)
1463
1464 cookies = self._calc_cookies(info_dict)
1465 if cookies:
1466 res['Cookie'] = cookies
1467
0016b84e
S
1468 if 'X-Forwarded-For' not in res:
1469 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1470 if x_forwarded_for_ip:
1471 res['X-Forwarded-For'] = x_forwarded_for_ip
1472
e5660ee6
JMF
1473 return res
1474
1475 def _calc_cookies(self, info_dict):
5c2266df 1476 pr = sanitized_Request(info_dict['url'])
e5660ee6 1477 self.cookiejar.add_cookie_header(pr)
662435f7 1478 return pr.get_header('Cookie')
e5660ee6 1479
dd82ffea
JMF
1480 def process_video_result(self, info_dict, download=True):
1481 assert info_dict.get('_type', 'video') == 'video'
1482
bec1fad2
PH
1483 if 'id' not in info_dict:
1484 raise ExtractorError('Missing "id" field in extractor result')
1485 if 'title' not in info_dict:
1486 raise ExtractorError('Missing "title" field in extractor result')
1487
c9969434
S
1488 def report_force_conversion(field, field_not, conversion):
1489 self.report_warning(
1490 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1491 % (field, field_not, conversion))
1492
1493 def sanitize_string_field(info, string_field):
1494 field = info.get(string_field)
1495 if field is None or isinstance(field, compat_str):
1496 return
1497 report_force_conversion(string_field, 'a string', 'string')
1498 info[string_field] = compat_str(field)
1499
1500 def sanitize_numeric_fields(info):
1501 for numeric_field in self._NUMERIC_FIELDS:
1502 field = info.get(numeric_field)
1503 if field is None or isinstance(field, compat_numeric_types):
1504 continue
1505 report_force_conversion(numeric_field, 'numeric', 'int')
1506 info[numeric_field] = int_or_none(field)
1507
1508 sanitize_string_field(info_dict, 'id')
1509 sanitize_numeric_fields(info_dict)
be6217b2 1510
dd82ffea
JMF
1511 if 'playlist' not in info_dict:
1512 # It isn't part of a playlist
1513 info_dict['playlist'] = None
1514 info_dict['playlist_index'] = None
1515
d5519808 1516 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1517 if thumbnails is None:
1518 thumbnail = info_dict.get('thumbnail')
1519 if thumbnail:
a7a14d95 1520 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1521 if thumbnails:
be6d7229 1522 thumbnails.sort(key=lambda t: (
d37708fc
RA
1523 t.get('preference') if t.get('preference') is not None else -1,
1524 t.get('width') if t.get('width') is not None else -1,
1525 t.get('height') if t.get('height') is not None else -1,
1526 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1527 for i, t in enumerate(thumbnails):
dcf77cf1 1528 t['url'] = sanitize_url(t['url'])
9603e8a7 1529 if t.get('width') and t.get('height'):
d5519808 1530 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1531 if t.get('id') is None:
1532 t['id'] = '%d' % i
d5519808 1533
b7b72db9 1534 if self.params.get('list_thumbnails'):
1535 self.list_thumbnails(info_dict)
1536 return
1537
536a55da
S
1538 thumbnail = info_dict.get('thumbnail')
1539 if thumbnail:
1540 info_dict['thumbnail'] = sanitize_url(thumbnail)
1541 elif thumbnails:
d5519808
PH
1542 info_dict['thumbnail'] = thumbnails[-1]['url']
1543
c9ae7b95 1544 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1545 info_dict['display_id'] = info_dict['id']
1546
955c4514 1547 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1548 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1549 # see http://bugs.python.org/issue1646728)
1550 try:
1551 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1552 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1553 except (ValueError, OverflowError, OSError):
1554 pass
9d2ecdbc 1555
33d2fc2f
S
1556 # Auto generate title fields corresponding to the *_number fields when missing
1557 # in order to always have clean titles. This is very common for TV series.
1558 for field in ('chapter', 'season', 'episode'):
1559 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1560 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1561
05108a49
S
1562 for cc_kind in ('subtitles', 'automatic_captions'):
1563 cc = info_dict.get(cc_kind)
1564 if cc:
1565 for _, subtitle in cc.items():
1566 for subtitle_format in subtitle:
1567 if subtitle_format.get('url'):
1568 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1569 if subtitle_format.get('ext') is None:
1570 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1571
1572 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1573 subtitles = info_dict.get('subtitles')
4bba3716 1574
a504ced0 1575 if self.params.get('listsubtitles', False):
360e1ca5 1576 if 'automatic_captions' in info_dict:
05108a49
S
1577 self.list_subtitles(
1578 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1579 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1580 return
05108a49 1581
360e1ca5 1582 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1583 info_dict['id'], subtitles, automatic_captions)
a504ced0 1584
dd82ffea
JMF
1585 # We now pick which formats have to be downloaded
1586 if info_dict.get('formats') is None:
1587 # There's only one format available
1588 formats = [info_dict]
1589 else:
1590 formats = info_dict['formats']
1591
db95dc13
PH
1592 if not formats:
1593 raise ExtractorError('No video formats found!')
1594
73af5cc8
S
1595 def is_wellformed(f):
1596 url = f.get('url')
a5ac0c47 1597 if not url:
73af5cc8
S
1598 self.report_warning(
1599 '"url" field is missing or empty - skipping format, '
1600 'there is an error in extractor')
a5ac0c47
S
1601 return False
1602 if isinstance(url, bytes):
1603 sanitize_string_field(f, 'url')
1604 return True
73af5cc8
S
1605
1606 # Filter out malformed formats for better extraction robustness
1607 formats = list(filter(is_wellformed, formats))
1608
181c7053
S
1609 formats_dict = {}
1610
dd82ffea 1611 # We check that all the formats have the format and format_id fields
db95dc13 1612 for i, format in enumerate(formats):
c9969434
S
1613 sanitize_string_field(format, 'format_id')
1614 sanitize_numeric_fields(format)
dcf77cf1 1615 format['url'] = sanitize_url(format['url'])
e74e3b63 1616 if not format.get('format_id'):
8016c922 1617 format['format_id'] = compat_str(i)
e2effb08
S
1618 else:
1619 # Sanitize format_id from characters used in format selector expression
ec85ded8 1620 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1621 format_id = format['format_id']
1622 if format_id not in formats_dict:
1623 formats_dict[format_id] = []
1624 formats_dict[format_id].append(format)
1625
1626 # Make sure all formats have unique format_id
1627 for format_id, ambiguous_formats in formats_dict.items():
1628 if len(ambiguous_formats) > 1:
1629 for i, format in enumerate(ambiguous_formats):
1630 format['format_id'] = '%s-%d' % (format_id, i)
1631
1632 for i, format in enumerate(formats):
8c51aa65 1633 if format.get('format') is None:
6febd1c1 1634 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1635 id=format['format_id'],
1636 res=self.format_resolution(format),
6febd1c1 1637 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1638 )
c1002e96 1639 # Automatically determine file extension if missing
5b1d8575 1640 if format.get('ext') is None:
cce929ea 1641 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1642 # Automatically determine protocol if missing (useful for format
1643 # selection purposes)
6f0be937 1644 if format.get('protocol') is None:
b5559424 1645 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1646 # Add HTTP headers, so that external programs can use them from the
1647 # json output
1648 full_format_info = info_dict.copy()
1649 full_format_info.update(format)
1650 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1651 # Remove private housekeeping stuff
1652 if '__x_forwarded_for_ip' in info_dict:
1653 del info_dict['__x_forwarded_for_ip']
dd82ffea 1654
4bcc7bd1 1655 # TODO Central sorting goes here
99e206d5 1656
f89197d7 1657 if formats[0] is not info_dict:
b3d9ef88
JMF
1658 # only set the 'formats' fields if the original info_dict list them
1659 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1660 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1661 # which can't be exported to json
b3d9ef88 1662 info_dict['formats'] = formats
cfb56d1a 1663 if self.params.get('listformats'):
bfaae0a7 1664 self.list_formats(info_dict)
1665 return
1666
de3ef3ed 1667 req_format = self.params.get('format')
a9c58ad9 1668 if req_format is None:
0017d9ad
S
1669 req_format = self._default_format_spec(info_dict, download=download)
1670 if self.params.get('verbose'):
1671 self.to_stdout('[debug] Default format spec: %s' % req_format)
1672
5acfa126 1673 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1674
1675 # While in format selection we may need to have an access to the original
1676 # format set in order to calculate some metrics or do some processing.
1677 # For now we need to be able to guess whether original formats provided
1678 # by extractor are incomplete or not (i.e. whether extractor provides only
1679 # video-only or audio-only formats) for proper formats selection for
1680 # extractors with such incomplete formats (see
067aa17e 1681 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1682 # Since formats may be filtered during format selection and may not match
1683 # the original formats the results may be incorrect. Thus original formats
1684 # or pre-calculated metrics should be passed to format selection routines
1685 # as well.
1686 # We will pass a context object containing all necessary additional data
1687 # instead of just formats.
1688 # This fixes incorrect format selection issue (see
067aa17e 1689 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1690 incomplete_formats = (
317f7ab6 1691 # All formats are video-only or
3089bc74 1692 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1693 # all formats are audio-only
3089bc74 1694 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1695
1696 ctx = {
1697 'formats': formats,
1698 'incomplete_formats': incomplete_formats,
1699 }
1700
1701 formats_to_download = list(format_selector(ctx))
dd82ffea 1702 if not formats_to_download:
6febd1c1 1703 raise ExtractorError('requested format not available',
78a3a9f8 1704 expected=True)
dd82ffea
JMF
1705
1706 if download:
1707 if len(formats_to_download) > 1:
6febd1c1 1708 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1709 for format in formats_to_download:
1710 new_info = dict(info_dict)
1711 new_info.update(format)
1712 self.process_info(new_info)
1713 # We update the info dict with the best quality format (backwards compatibility)
1714 info_dict.update(formats_to_download[-1])
1715 return info_dict
1716
98c70d6f 1717 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1718 """Select the requested subtitles and their format"""
98c70d6f
JMF
1719 available_subs = {}
1720 if normal_subtitles and self.params.get('writesubtitles'):
1721 available_subs.update(normal_subtitles)
1722 if automatic_captions and self.params.get('writeautomaticsub'):
1723 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1724 if lang not in available_subs:
1725 available_subs[lang] = cap_info
1726
4d171848
JMF
1727 if (not self.params.get('writesubtitles') and not
1728 self.params.get('writeautomaticsub') or not
1729 available_subs):
1730 return None
a504ced0
JMF
1731
1732 if self.params.get('allsubtitles', False):
1733 requested_langs = available_subs.keys()
1734 else:
1735 if self.params.get('subtitleslangs', False):
1736 requested_langs = self.params.get('subtitleslangs')
1737 elif 'en' in available_subs:
1738 requested_langs = ['en']
1739 else:
1740 requested_langs = [list(available_subs.keys())[0]]
1741
1742 formats_query = self.params.get('subtitlesformat', 'best')
1743 formats_preference = formats_query.split('/') if formats_query else []
1744 subs = {}
1745 for lang in requested_langs:
1746 formats = available_subs.get(lang)
1747 if formats is None:
1748 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1749 continue
a504ced0
JMF
1750 for ext in formats_preference:
1751 if ext == 'best':
1752 f = formats[-1]
1753 break
1754 matches = list(filter(lambda f: f['ext'] == ext, formats))
1755 if matches:
1756 f = matches[-1]
1757 break
1758 else:
1759 f = formats[-1]
1760 self.report_warning(
1761 'No subtitle format found matching "%s" for language %s, '
1762 'using %s' % (formats_query, lang, f['ext']))
1763 subs[lang] = f
1764 return subs
1765
d06daf23
S
1766 def __forced_printings(self, info_dict, filename, incomplete):
1767 def print_mandatory(field):
1768 if (self.params.get('force%s' % field, False)
1769 and (not incomplete or info_dict.get(field) is not None)):
1770 self.to_stdout(info_dict[field])
1771
1772 def print_optional(field):
1773 if (self.params.get('force%s' % field, False)
1774 and info_dict.get(field) is not None):
1775 self.to_stdout(info_dict[field])
1776
1777 print_mandatory('title')
1778 print_mandatory('id')
1779 if self.params.get('forceurl', False) and not incomplete:
1780 if info_dict.get('requested_formats') is not None:
1781 for f in info_dict['requested_formats']:
1782 self.to_stdout(f['url'] + f.get('play_path', ''))
1783 else:
1784 # For RTMP URLs, also include the playpath
1785 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1786 print_optional('thumbnail')
1787 print_optional('description')
1788 if self.params.get('forcefilename', False) and filename is not None:
1789 self.to_stdout(filename)
1790 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1791 self.to_stdout(formatSeconds(info_dict['duration']))
1792 print_mandatory('format')
1793 if self.params.get('forcejson', False):
1794 self.to_stdout(json.dumps(info_dict))
1795
8222d8de
JMF
1796 def process_info(self, info_dict):
1797 """Process a single resolved IE result."""
1798
1799 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1800
1801 max_downloads = self.params.get('max_downloads')
1802 if max_downloads is not None:
1803 if self._num_downloads >= int(max_downloads):
1804 raise MaxDownloadsReached()
8222d8de 1805
d06daf23 1806 # TODO: backward compatibility, to be removed
8222d8de 1807 info_dict['fulltitle'] = info_dict['title']
8222d8de 1808
11b85ce6 1809 if 'format' not in info_dict:
8222d8de
JMF
1810 info_dict['format'] = info_dict['ext']
1811
442c37b7 1812 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1813 if reason is not None:
6febd1c1 1814 self.to_screen('[download] ' + reason)
8222d8de
JMF
1815 return
1816
fd288278 1817 self._num_downloads += 1
8222d8de 1818
e72c7e41 1819 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1820
1821 # Forced printings
d06daf23 1822 self.__forced_printings(info_dict, filename, incomplete=False)
8222d8de
JMF
1823
1824 # Do nothing else if in simulate mode
1825 if self.params.get('simulate', False):
1826 return
1827
1828 if filename is None:
1829 return
1830
c5c9bf0c
S
1831 def ensure_dir_exists(path):
1832 try:
1833 dn = os.path.dirname(path)
1834 if dn and not os.path.exists(dn):
1835 os.makedirs(dn)
1836 return True
1837 except (OSError, IOError) as err:
1838 self.report_error('unable to create directory ' + error_to_compat_str(err))
1839 return False
1840
1841 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
8222d8de
JMF
1842 return
1843
1844 if self.params.get('writedescription', False):
2699da80 1845 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1847 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1848 elif info_dict.get('description') is None:
1849 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1850 else:
1851 try:
6febd1c1 1852 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1853 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1854 descfile.write(info_dict['description'])
7b6fefc9 1855 except (OSError, IOError):
6febd1c1 1856 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1857 return
8222d8de 1858
1fb07d10 1859 if self.params.get('writeannotations', False):
98727e12 1860 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1862 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
1863 elif not info_dict.get('annotations'):
1864 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
1865 else:
1866 try:
6febd1c1 1867 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1868 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1869 annofile.write(info_dict['annotations'])
1870 except (KeyError, TypeError):
6febd1c1 1871 self.report_warning('There are no annotations to write.')
7b6fefc9 1872 except (OSError, IOError):
6febd1c1 1873 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1874 return
1fb07d10 1875
9f448fcb 1876 def dl(name, info, subtitle=False):
98b69821 1877 fd = get_suitable_downloader(info, self.params)(self, self.params)
1878 for ph in self._progress_hooks:
1879 fd.add_progress_hook(ph)
1880 if self.params.get('verbose'):
1881 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 1882 return fd.download(name, info, subtitle)
98b69821 1883
c4a91be7 1884 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1885 self.params.get('writeautomaticsub')])
c4a91be7 1886
c84dd8a9 1887 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1888 # subtitles download errors are already managed as troubles in relevant IE
1889 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1890 subtitles = info_dict['requested_subtitles']
fa57af1e 1891 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1892 for sub_lang, sub_info in subtitles.items():
1893 sub_format = sub_info['ext']
824fa511 1894 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
5ff1bc0c
RA
1895 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1896 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
a504ced0 1897 else:
0c9df79e 1898 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
1899 if sub_info.get('data') is not None:
1900 try:
1901 # Use newline='' to prevent conversion of newline characters
067aa17e 1902 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
1903 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1904 subfile.write(sub_info['data'])
1905 except (OSError, IOError):
1906 self.report_error('Cannot write subtitles file ' + sub_filename)
1907 return
7b6fefc9 1908 else:
5ff1bc0c 1909 try:
9f448fcb
U
1910 dl(sub_filename, sub_info, subtitle=True)
1911 '''
0c9df79e
U
1912 if self.params.get('sleep_interval_subtitles', False):
1913 dl(sub_filename, sub_info)
1914 else:
1915 sub_data = ie._request_webpage(
1916 sub_info['url'], info_dict['id'], note=False).read()
1917 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1918 subfile.write(sub_data)
9f448fcb 1919 '''
0c9df79e 1920 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
1921 self.report_warning('Unable to download subtitle for "%s": %s' %
1922 (sub_lang, error_to_compat_str(err)))
1923 continue
8222d8de 1924
57df9f53
U
1925 if self.params.get('skip_download', False):
1926 if self.params.get('convertsubtitles', False):
1927 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1928 filename_real_ext = os.path.splitext(filename)[1][1:]
1929 filename_wo_ext = (
1930 os.path.splitext(filename)[0]
1931 if filename_real_ext == info_dict['ext']
1932 else filename)
1933 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1934 if subconv.available:
1935 info_dict.setdefault('__postprocessors', [])
1936 # info_dict['__postprocessors'].append(subconv)
1937 if os.path.exists(encodeFilename(afilename)):
f791b419
U
1938 self.to_screen(
1939 '[download] %s has already been downloaded and '
1940 'converted' % afilename)
57df9f53
U
1941 else:
1942 try:
1943 self.post_process(filename, info_dict)
1944 except (PostProcessingError) as err:
1945 self.report_error('postprocessing: %s' % str(err))
1946 return
1947
8222d8de 1948 if self.params.get('writeinfojson', False):
b29e0000 1949 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1950 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1951 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1952 else:
6febd1c1 1953 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1954 try:
cb202fd2 1955 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1956 except (OSError, IOError):
6febd1c1 1957 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1958 return
8222d8de 1959
ec82d85a 1960 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1961
1962 if not self.params.get('skip_download', False):
4340deca 1963 try:
4340deca
P
1964 if info_dict.get('requested_formats') is not None:
1965 downloaded = []
1966 success = True
d47aeb22 1967 merger = FFmpegMergerPP(self)
f740fae2 1968 if not merger.available:
4340deca
P
1969 postprocessors = []
1970 self.report_warning('You have requested multiple '
1971 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1972 ' The formats won\'t be merged.')
6350728b 1973 else:
4340deca 1974 postprocessors = [merger]
81cd954a
S
1975
1976 def compatible_formats(formats):
d03cfdce 1977 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1978 video_formats = [format for format in formats if format.get('vcodec') != 'none']
1979 audio_formats = [format for format in formats if format.get('acodec') != 'none']
1980 if len(video_formats) > 2 or len(audio_formats) > 2:
1981 return False
1982
81cd954a 1983 # Check extension
d03cfdce 1984 exts = set(format.get('ext') for format in formats)
1985 COMPATIBLE_EXTS = (
1986 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1987 set(('webm',)),
1988 )
1989 for ext_sets in COMPATIBLE_EXTS:
1990 if ext_sets.issuperset(exts):
1991 return True
81cd954a
S
1992 # TODO: Check acodec/vcodec
1993 return False
1994
38c6902b
S
1995 filename_real_ext = os.path.splitext(filename)[1][1:]
1996 filename_wo_ext = (
1997 os.path.splitext(filename)[0]
1998 if filename_real_ext == info_dict['ext']
1999 else filename)
81cd954a 2000 requested_formats = info_dict['requested_formats']
c0dea0a7 2001 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2002 info_dict['ext'] = 'mkv'
4a5a898a
S
2003 self.report_warning(
2004 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
2005 # Ensure filename always has a correct extension for successful merge
2006 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
2007 if os.path.exists(encodeFilename(filename)):
2008 self.to_screen(
2009 '[download] %s has already been downloaded and '
2010 'merged' % filename)
2011 else:
81cd954a 2012 for f in requested_formats:
5b5fbc08
JMF
2013 new_info = dict(info_dict)
2014 new_info.update(f)
c5c9bf0c
S
2015 fname = prepend_extension(
2016 self.prepare_filename(new_info),
2017 'f%s' % f['format_id'], new_info['ext'])
2018 if not ensure_dir_exists(fname):
2019 return
5b5fbc08
JMF
2020 downloaded.append(fname)
2021 partial_success = dl(fname, new_info)
2022 success = success and partial_success
2023 info_dict['__postprocessors'] = postprocessors
2024 info_dict['__files_to_merge'] = downloaded
4340deca
P
2025 else:
2026 # Just a single file
2027 success = dl(filename, info_dict)
2028 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2029 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2030 return
2031 except (OSError, IOError) as err:
2032 raise UnavailableVideoError(err)
2033 except (ContentTooShortError, ) as err:
2034 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2035 return
8222d8de 2036
e38cafe9 2037 if success and filename != '-':
6271f1ca 2038 # Fixup content
62cd676c
PH
2039 fixup_policy = self.params.get('fixup')
2040 if fixup_policy is None:
2041 fixup_policy = 'detect_or_warn'
2042
d1e4a464
S
2043 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2044
6271f1ca
PH
2045 stretched_ratio = info_dict.get('stretched_ratio')
2046 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2047 if fixup_policy == 'warn':
2048 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2049 info_dict['id'], stretched_ratio))
2050 elif fixup_policy == 'detect_or_warn':
2051 stretched_pp = FFmpegFixupStretchedPP(self)
2052 if stretched_pp.available:
2053 info_dict.setdefault('__postprocessors', [])
2054 info_dict['__postprocessors'].append(stretched_pp)
2055 else:
2056 self.report_warning(
d1e4a464
S
2057 '%s: Non-uniform pixel ratio (%s). %s'
2058 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2059 else:
62cd676c
PH
2060 assert fixup_policy in ('ignore', 'never')
2061
3089bc74
S
2062 if (info_dict.get('requested_formats') is None
2063 and info_dict.get('container') == 'm4a_dash'):
62cd676c 2064 if fixup_policy == 'warn':
d1e4a464
S
2065 self.report_warning(
2066 '%s: writing DASH m4a. '
2067 'Only some players support this container.'
2068 % info_dict['id'])
62cd676c
PH
2069 elif fixup_policy == 'detect_or_warn':
2070 fixup_pp = FFmpegFixupM4aPP(self)
2071 if fixup_pp.available:
2072 info_dict.setdefault('__postprocessors', [])
2073 info_dict['__postprocessors'].append(fixup_pp)
2074 else:
2075 self.report_warning(
d1e4a464
S
2076 '%s: writing DASH m4a. '
2077 'Only some players support this container. %s'
2078 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2079 else:
2080 assert fixup_policy in ('ignore', 'never')
6271f1ca 2081
3089bc74
S
2082 if (info_dict.get('protocol') == 'm3u8_native'
2083 or info_dict.get('protocol') == 'm3u8'
2084 and self.params.get('hls_prefer_native')):
f17f8651 2085 if fixup_policy == 'warn':
a02682fd 2086 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2087 info_dict['id']))
2088 elif fixup_policy == 'detect_or_warn':
2089 fixup_pp = FFmpegFixupM3u8PP(self)
2090 if fixup_pp.available:
2091 info_dict.setdefault('__postprocessors', [])
2092 info_dict['__postprocessors'].append(fixup_pp)
2093 else:
2094 self.report_warning(
a02682fd 2095 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2096 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2097 else:
2098 assert fixup_policy in ('ignore', 'never')
2099
8222d8de
JMF
2100 try:
2101 self.post_process(filename, info_dict)
2102 except (PostProcessingError) as err:
6febd1c1 2103 self.report_error('postprocessing: %s' % str(err))
8222d8de 2104 return
cd58dc3e 2105 self.record_download_archive(info_dict)
8222d8de
JMF
2106
2107 def download(self, url_list):
2108 """Download a given list of URLs."""
acd69589 2109 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2110 if (len(url_list) > 1
2111 and outtmpl != '-'
2112 and '%' not in outtmpl
2113 and self.params.get('max_downloads') != 1):
acd69589 2114 raise SameFileError(outtmpl)
8222d8de
JMF
2115
2116 for url in url_list:
2117 try:
5f6a1245 2118 # It also downloads the videos
61aa5ba3
S
2119 res = self.extract_info(
2120 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2121 except UnavailableVideoError:
6febd1c1 2122 self.report_error('unable to download video')
8222d8de 2123 except MaxDownloadsReached:
6febd1c1 2124 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 2125 raise
63e0be34
PH
2126 else:
2127 if self.params.get('dump_single_json', False):
2128 self.to_stdout(json.dumps(res))
8222d8de
JMF
2129
2130 return self._download_retcode
2131
1dcc4c0c 2132 def download_with_info_file(self, info_filename):
31bd3925
JMF
2133 with contextlib.closing(fileinput.FileInput(
2134 [info_filename], mode='r',
2135 openhook=fileinput.hook_encoded('utf-8'))) as f:
2136 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2137 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2138 try:
2139 self.process_ie_result(info, download=True)
2140 except DownloadError:
2141 webpage_url = info.get('webpage_url')
2142 if webpage_url is not None:
6febd1c1 2143 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2144 return self.download([webpage_url])
2145 else:
2146 raise
2147 return self._download_retcode
1dcc4c0c 2148
cb202fd2
S
2149 @staticmethod
2150 def filter_requested_info(info_dict):
2151 return dict(
2152 (k, v) for k, v in info_dict.items()
2153 if k not in ['requested_formats', 'requested_subtitles'])
2154
8222d8de
JMF
2155 def post_process(self, filename, ie_info):
2156 """Run all the postprocessors on the given file."""
2157 info = dict(ie_info)
2158 info['filepath'] = filename
6350728b
JMF
2159 pps_chain = []
2160 if ie_info.get('__postprocessors') is not None:
2161 pps_chain.extend(ie_info['__postprocessors'])
2162 pps_chain.extend(self._pps)
2163 for pp in pps_chain:
71646e46 2164 files_to_delete = []
8222d8de 2165 try:
592e97e8 2166 files_to_delete, info = pp.run(info)
8222d8de 2167 except PostProcessingError as e:
bbcbf4d4 2168 self.report_error(e.msg)
592e97e8 2169 if files_to_delete and not self.params.get('keepvideo', False):
d03cfdce 2170 for old_filename in set(files_to_delete):
f3ff1a36 2171 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
2172 try:
2173 os.remove(encodeFilename(old_filename))
2174 except (IOError, OSError):
2175 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 2176
5db07df6 2177 def _make_archive_id(self, info_dict):
e9fef7ee
S
2178 video_id = info_dict.get('id')
2179 if not video_id:
2180 return
5db07df6
PH
2181 # Future-proof against any change in case
2182 # and backwards compatibility with prior versions
e9fef7ee 2183 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2184 if extractor is None:
1211bb6d
S
2185 url = str_or_none(info_dict.get('url'))
2186 if not url:
2187 return
e9fef7ee
S
2188 # Try to find matching extractor for the URL and take its ie_key
2189 for ie in self._ies:
1211bb6d 2190 if ie.suitable(url):
e9fef7ee
S
2191 extractor = ie.ie_key()
2192 break
2193 else:
2194 return
2195 return extractor.lower() + ' ' + video_id
5db07df6
PH
2196
2197 def in_download_archive(self, info_dict):
2198 fn = self.params.get('download_archive')
2199 if fn is None:
2200 return False
2201
2202 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2203 if not vid_id:
7012b23c 2204 return False # Incomplete video information
5db07df6 2205
a45e8619 2206 return vid_id in self.archive
c1c9a79c
PH
2207
2208 def record_download_archive(self, info_dict):
2209 fn = self.params.get('download_archive')
2210 if fn is None:
2211 return
5db07df6
PH
2212 vid_id = self._make_archive_id(info_dict)
2213 assert vid_id
c1c9a79c 2214 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2215 archive_file.write(vid_id + '\n')
a45e8619 2216 self.archive.add(vid_id)
dd82ffea 2217
8c51aa65 2218 @staticmethod
8abeeb94 2219 def format_resolution(format, default='unknown'):
fb04e403
PH
2220 if format.get('vcodec') == 'none':
2221 return 'audio only'
f49d89ee
PH
2222 if format.get('resolution') is not None:
2223 return format['resolution']
8c51aa65
JMF
2224 if format.get('height') is not None:
2225 if format.get('width') is not None:
6febd1c1 2226 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2227 else:
6febd1c1 2228 res = '%sp' % format['height']
f49d89ee 2229 elif format.get('width') is not None:
388ae76b 2230 res = '%dx?' % format['width']
8c51aa65 2231 else:
8abeeb94 2232 res = default
8c51aa65
JMF
2233 return res
2234
c57f7757
PH
2235 def _format_note(self, fdict):
2236 res = ''
2237 if fdict.get('ext') in ['f4f', 'f4m']:
2238 res += '(unsupported) '
32f90364
PH
2239 if fdict.get('language'):
2240 if res:
2241 res += ' '
9016d76f 2242 res += '[%s] ' % fdict['language']
c57f7757
PH
2243 if fdict.get('format_note') is not None:
2244 res += fdict['format_note'] + ' '
2245 if fdict.get('tbr') is not None:
2246 res += '%4dk ' % fdict['tbr']
2247 if fdict.get('container') is not None:
2248 if res:
2249 res += ', '
2250 res += '%s container' % fdict['container']
3089bc74
S
2251 if (fdict.get('vcodec') is not None
2252 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2253 if res:
2254 res += ', '
2255 res += fdict['vcodec']
91c7271a 2256 if fdict.get('vbr') is not None:
c57f7757
PH
2257 res += '@'
2258 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2259 res += 'video@'
2260 if fdict.get('vbr') is not None:
2261 res += '%4dk' % fdict['vbr']
fbb21cf5 2262 if fdict.get('fps') is not None:
5d583bdf
S
2263 if res:
2264 res += ', '
2265 res += '%sfps' % fdict['fps']
c57f7757
PH
2266 if fdict.get('acodec') is not None:
2267 if res:
2268 res += ', '
2269 if fdict['acodec'] == 'none':
2270 res += 'video only'
2271 else:
2272 res += '%-5s' % fdict['acodec']
2273 elif fdict.get('abr') is not None:
2274 if res:
2275 res += ', '
2276 res += 'audio'
2277 if fdict.get('abr') is not None:
2278 res += '@%3dk' % fdict['abr']
2279 if fdict.get('asr') is not None:
2280 res += ' (%5dHz)' % fdict['asr']
2281 if fdict.get('filesize') is not None:
2282 if res:
2283 res += ', '
2284 res += format_bytes(fdict['filesize'])
9732d77e
PH
2285 elif fdict.get('filesize_approx') is not None:
2286 if res:
2287 res += ', '
2288 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2289 return res
91c7271a 2290
c57f7757 2291 def list_formats(self, info_dict):
94badb25 2292 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
2293 table = [
2294 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2295 for f in formats
e65566a9 2296 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 2297 if len(formats) > 1:
b81a359e 2298 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 2299
b81a359e 2300 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 2301 self.to_screen(
b81a359e
PH
2302 '[info] Available formats for %s:\n%s' %
2303 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
2304
2305 def list_thumbnails(self, info_dict):
2306 thumbnails = info_dict.get('thumbnails')
2307 if not thumbnails:
b7b72db9 2308 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2309 return
cfb56d1a
PH
2310
2311 self.to_screen(
2312 '[info] Thumbnails for %s:' % info_dict['id'])
2313 self.to_screen(render_table(
2314 ['ID', 'width', 'height', 'URL'],
2315 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2316
360e1ca5 2317 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2318 if not subtitles:
360e1ca5 2319 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2320 return
a504ced0 2321 self.to_screen(
edab9dbf
JMF
2322 'Available %s for %s:' % (name, video_id))
2323 self.to_screen(render_table(
2324 ['Language', 'formats'],
2325 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2326 for lang, formats in subtitles.items()]))
a504ced0 2327
dca08720
PH
2328 def urlopen(self, req):
2329 """ Start an HTTP download """
82d8a8b6 2330 if isinstance(req, compat_basestring):
67dda517 2331 req = sanitized_Request(req)
19a41fc6 2332 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2333
2334 def print_debug_header(self):
2335 if not self.params.get('verbose'):
2336 return
62fec3b2 2337
4192b51c 2338 if type('') is not compat_str:
067aa17e 2339 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2340 self.report_warning(
2341 'Your Python is broken! Update to a newer and supported version')
2342
c6afed48
PH
2343 stdout_encoding = getattr(
2344 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2345 encoding_str = (
734f90bb
PH
2346 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2347 locale.getpreferredencoding(),
2348 sys.getfilesystemencoding(),
c6afed48 2349 stdout_encoding,
b0472057 2350 self.get_encoding()))
4192b51c 2351 write_string(encoding_str, encoding=None)
734f90bb 2352
cefecac1 2353 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
e0986e31
JMF
2354 if _LAZY_LOADER:
2355 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2356 try:
2357 sp = subprocess.Popen(
2358 ['git', 'rev-parse', '--short', 'HEAD'],
2359 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2360 cwd=os.path.dirname(os.path.abspath(__file__)))
2361 out, err = sp.communicate()
2362 out = out.decode().strip()
2363 if re.match('[0-9a-f]+', out):
734f90bb 2364 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2365 except Exception:
dca08720
PH
2366 try:
2367 sys.exc_clear()
70a1165b 2368 except Exception:
dca08720 2369 pass
b300cda4
S
2370
2371 def python_implementation():
2372 impl_name = platform.python_implementation()
2373 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2374 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2375 return impl_name
2376
2377 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2378 platform.python_version(), python_implementation(),
2379 platform_name()))
d28b5171 2380
73fac4e9 2381 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2382 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2383 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2384 exe_str = ', '.join(
2385 '%s %s' % (exe, v)
2386 for exe, v in sorted(exe_versions.items())
2387 if v
2388 )
2389 if not exe_str:
2390 exe_str = 'none'
2391 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2392
2393 proxy_map = {}
2394 for handler in self._opener.handlers:
2395 if hasattr(handler, 'proxies'):
2396 proxy_map.update(handler.proxies)
734f90bb 2397 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2398
58b1f00d
PH
2399 if self.params.get('call_home', False):
2400 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2401 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2402 latest_version = self.urlopen(
2403 'https://yt-dl.org/latest/version').read().decode('utf-8')
2404 if version_tuple(latest_version) > version_tuple(__version__):
2405 self.report_warning(
2406 'You are using an outdated version (newest version: %s)! '
2407 'See https://yt-dl.org/update if you need help updating.' %
2408 latest_version)
2409
e344693b 2410 def _setup_opener(self):
6ad14cab 2411 timeout_val = self.params.get('socket_timeout')
19a41fc6 2412 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2413
dca08720
PH
2414 opts_cookiefile = self.params.get('cookiefile')
2415 opts_proxy = self.params.get('proxy')
2416
2417 if opts_cookiefile is None:
2418 self.cookiejar = compat_cookiejar.CookieJar()
2419 else:
590bc6f6 2420 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2421 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2422 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2423 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2424
6a3f4c3f 2425 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2426 if opts_proxy is not None:
2427 if opts_proxy == '':
2428 proxies = {}
2429 else:
2430 proxies = {'http': opts_proxy, 'https': opts_proxy}
2431 else:
2432 proxies = compat_urllib_request.getproxies()
067aa17e 2433 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2434 if 'http' in proxies and 'https' not in proxies:
2435 proxies['https'] = proxies['http']
91410c9b 2436 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2437
2438 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2439 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2440 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2441 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2442 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2443
2444 # When passing our own FileHandler instance, build_opener won't add the
2445 # default FileHandler and allows us to disable the file protocol, which
2446 # can be used for malicious purposes (see
067aa17e 2447 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2448 file_handler = compat_urllib_request.FileHandler()
2449
2450 def file_open(*args, **kwargs):
cefecac1 2451 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2452 file_handler.file_open = file_open
2453
2454 opener = compat_urllib_request.build_opener(
fca6dba8 2455 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2456
dca08720
PH
2457 # Delete the default user-agent header, which would otherwise apply in
2458 # cases where our custom HTTP handler doesn't come into play
067aa17e 2459 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2460 opener.addheaders = []
2461 self._opener = opener
62fec3b2
PH
2462
2463 def encode(self, s):
2464 if isinstance(s, bytes):
2465 return s # Already encoded
2466
2467 try:
2468 return s.encode(self.get_encoding())
2469 except UnicodeEncodeError as err:
2470 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2471 raise
2472
2473 def get_encoding(self):
2474 encoding = self.params.get('encoding')
2475 if encoding is None:
2476 encoding = preferredencoding()
2477 return encoding
ec82d85a
PH
2478
2479 def _write_thumbnails(self, info_dict, filename):
2480 if self.params.get('writethumbnail', False):
2481 thumbnails = info_dict.get('thumbnails')
2482 if thumbnails:
2483 thumbnails = [thumbnails[-1]]
2484 elif self.params.get('write_all_thumbnails', False):
2485 thumbnails = info_dict.get('thumbnails')
2486 else:
2487 return
2488
2489 if not thumbnails:
2490 # No thumbnails present, so return immediately
2491 return
2492
2493 for t in thumbnails:
2494 thumb_ext = determine_ext(t['url'], 'jpg')
2495 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2496 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2497 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2498
2499 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2500 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2501 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2502 else:
2503 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2504 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2505 try:
2506 uf = self.urlopen(t['url'])
d3d89c32 2507 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2508 shutil.copyfileobj(uf, thumbf)
2509 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2510 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2511 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2512 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2513 (t['url'], error_to_compat_str(err)))