]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
[skip travis] finalised workflow
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
ce02ed60 54 DownloadError,
c0384f22 55 encode_compat_str,
ce02ed60 56 encodeFilename,
9b9c5355 57 error_to_compat_str,
590bc6f6 58 expand_path,
ce02ed60 59 ExtractorError,
02dbf93f 60 format_bytes,
525ef922 61 formatSeconds,
773f291d 62 GeoRestrictedError,
c9969434 63 int_or_none,
773f291d 64 ISO3166Utils,
ce02ed60 65 locked_file,
dca08720 66 make_HTTPS_handler,
ce02ed60 67 MaxDownloadsReached,
cd6fc19e 68 orderedSet,
b7ab0590 69 PagedList,
083c9df9 70 parse_filesize,
91410c9b 71 PerRequestProxyHandler,
dca08720 72 platform_name,
eedb7ba5 73 PostProcessingError,
ce02ed60 74 preferredencoding,
eedb7ba5 75 prepend_extension,
51fb4995 76 register_socks_protocols,
cfb56d1a 77 render_table,
eedb7ba5 78 replace_extension,
ce02ed60
PH
79 SameFileError,
80 sanitize_filename,
1bb5c511 81 sanitize_path,
dcf77cf1 82 sanitize_url,
67dda517 83 sanitized_Request,
e5660ee6 84 std_headers,
1211bb6d 85 str_or_none,
ce02ed60 86 subtitles_filename,
ce02ed60 87 UnavailableVideoError,
29eb5174 88 url_basename,
58b1f00d 89 version_tuple,
ce02ed60
PH
90 write_json_file,
91 write_string,
1bab3437 92 YoutubeDLCookieJar,
6a3f4c3f 93 YoutubeDLCookieProcessor,
dca08720 94 YoutubeDLHandler,
fca6dba8 95 YoutubeDLRedirectHandler,
ce02ed60 96)
a0e07d31 97from .cache import Cache
e0986e31 98from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
4c54b89e 99from .extractor.openload import PhantomJSwrapper
3bc2ddcc 100from .downloader import get_suitable_downloader
4c83c967 101from .downloader.rtmp import rtmpdump_version
4f026faf 102from .postprocessor import (
f17f8651 103 FFmpegFixupM3u8PP,
62cd676c 104 FFmpegFixupM4aPP,
6271f1ca 105 FFmpegFixupStretchedPP,
4f026faf
PH
106 FFmpegMergerPP,
107 FFmpegPostProcessor,
57df9f53 108 FFmpegSubtitlesConvertorPP,
4f026faf
PH
109 get_postprocessor,
110)
dca08720 111from .version import __version__
8222d8de 112
e9c0cdd3
YCH
113if compat_os_name == 'nt':
114 import ctypes
115
2459b6e1 116
8222d8de
JMF
117class YoutubeDL(object):
118 """YoutubeDL class.
119
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
126
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
134
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
141
142 Available options:
143
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
180940e0 146 videopassword: Password for accessing a video.
1da50aa3
S
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
8222d8de
JMF
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
ad8915b7 153 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
525ef922 160 forceduration: Force printing duration.
8694c600 161 forcejson: Force printing info_dict as JSON.
63e0be34
PH
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
8222d8de 164 simulate: Do not download the video files.
d8600787 165 format: Video format code. See options.py for more information.
8222d8de 166 outtmpl: Template for output names.
bdc3fd2f
U
167 restrictfilenames: Do not allow "&" and spaces in file names.
168 trim_file_name: Limit length of filename (extension excluded).
8222d8de 169 ignoreerrors: Do not stop on download errors.
d22dec74 170 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
171 nooverwrites: Prevent overwriting files.
172 playliststart: Playlist item to start at.
173 playlistend: Playlist item to end at.
c14e88f0 174 playlist_items: Specific indices of playlist to download.
ff815fe6 175 playlistreverse: Download playlist items in reverse order.
75822ca7 176 playlistrandom: Download playlist items in random order.
8222d8de
JMF
177 matchtitle: Download only matching titles.
178 rejecttitle: Reject downloads for matching titles.
8bf9319e 179 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
180 logtostderr: Log messages to stderr instead of stdout.
181 writedescription: Write the video description to a .description file
182 writeinfojson: Write the video description to a .info.json file
1fb07d10 183 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 184 writethumbnail: Write the thumbnail image to a file
ec82d85a 185 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 186 writesubtitles: Write the video subtitles to a file
741dd8ea 187 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 188 allsubtitles: Downloads all the subtitles of the video
0b7f3118 189 (requires writesubtitles or writeautomaticsub)
8222d8de 190 listsubtitles: Lists all available subtitles for the video
a504ced0 191 subtitlesformat: The format code for subtitles
aa6a10c4 192 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
193 keepvideo: Keep the video file after post-processing
194 daterange: A DateRange object, download only if the upload_date is in the range.
195 skip_download: Skip the actual download of the video file
c35f9e72 196 cachedir: Location of the cache files in the filesystem.
a0e07d31 197 False to disable filesystem cache.
47192f92 198 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
199 age_limit: An integer representing the user's age in years.
200 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
201 min_views: An integer representing the minimum view count the video
202 must have in order to not be skipped.
203 Videos without view count information are always
204 downloaded. None for no limit.
205 max_views: An integer representing the maximum view count.
206 Videos that are more popular than that are not
207 downloaded.
208 Videos without view count information are always
209 downloaded. None for no limit.
210 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
211 Videos already present in the file are not downloaded
212 again.
dca08720 213 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 214 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
215 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
216 At the moment, this is only supported by YouTube.
a1ee09e8 217 proxy: URL of the proxy server to use
38cce791 218 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 219 on geo-restricted sites.
e344693b 220 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
221 bidi_workaround: Work around buggy terminals without bidirectional text
222 support, using fridibi
a0ddb8a2 223 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 224 include_ads: Download ads as well
04b4d394
PH
225 default_search: Prepend this string if an input url is not valid.
226 'auto' for elaborate guessing
62fec3b2 227 encoding: Use this encoding instead of the system-specified.
e8ee972c 228 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
229 Pass in 'in_playlist' to only show this behavior for
230 playlist items.
4f026faf 231 postprocessors: A list of dictionaries, each with an entry
71b640cc 232 * key: The name of the postprocessor. See
cefecac1 233 youtube_dlc/postprocessor/__init__.py for a list.
4f026faf
PH
234 as well as any further keyword arguments for the
235 postprocessor.
71b640cc
PH
236 progress_hooks: A list of functions that get called on download
237 progress, with a dictionary with the entries
5cda4eda 238 * status: One of "downloading", "error", or "finished".
ee69b99a 239 Check this first and ignore unknown values.
71b640cc 240
5cda4eda 241 If status is one of "downloading", or "finished", the
ee69b99a
PH
242 following properties may also be present:
243 * filename: The final filename (always present)
5cda4eda 244 * tmpfilename: The filename we're currently writing to
71b640cc
PH
245 * downloaded_bytes: Bytes on disk
246 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
247 * total_bytes_estimate: Guess of the eventual file size,
248 None if unavailable.
249 * elapsed: The number of seconds since download started.
71b640cc
PH
250 * eta: The estimated time in seconds, None if unknown
251 * speed: The download speed in bytes/second, None if
252 unknown
5cda4eda
PH
253 * fragment_index: The counter of the currently
254 downloaded video fragment.
255 * fragment_count: The number of fragments (= individual
256 files that will be merged)
71b640cc
PH
257
258 Progress hooks are guaranteed to be called at least once
259 (with status "finished") if the download is successful.
45598f15 260 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
261 fixup: Automatically correct known faults of the file.
262 One of:
263 - "never": do nothing
264 - "warn": only emit a warning
265 - "detect_or_warn": check whether we can do anything
62cd676c 266 about it, warn otherwise (default)
504f20dd 267 source_address: Client-side IP address to bind to.
6ec6cb4e 268 call_home: Boolean, true iff we are allowed to contact the
cefecac1 269 youtube-dlc servers for debugging.
7aa589a5
S
270 sleep_interval: Number of seconds to sleep before each download when
271 used alone or a lower bound of a range for randomized
272 sleep before each download (minimum possible number
273 of seconds to sleep) when used along with
274 max_sleep_interval.
275 max_sleep_interval:Upper bound of a range for randomized sleep before each
276 download (maximum possible number of seconds to sleep).
277 Must only be used along with sleep_interval.
278 Actual sleep time will be a random float from range
279 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
280 listformats: Print an overview of available video formats and exit.
281 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
282 match_filter: A function that gets called with the info_dict of
283 every video.
284 If it returns a message, the video is ignored.
285 If it returns None, the video is downloaded.
286 match_filter_func in utils.py is one example for this.
7e5db8c9 287 no_color: Do not emit color codes in output.
0a840f58 288 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 289 HTTP header
0a840f58 290 geo_bypass_country:
773f291d
S
291 Two-letter ISO 3166-2 country code that will be used for
292 explicit geographic restriction bypassing via faking
504f20dd 293 X-Forwarded-For HTTP header
5f95927a
S
294 geo_bypass_ip_block:
295 IP range in CIDR notation that will be used similarly to
504f20dd 296 geo_bypass_country
71b640cc 297
85729c51
PH
298 The following options determine which downloader is picked:
299 external_downloader: Executable of the external downloader to call.
300 None or unset for standard (built-in) downloader.
bf09af3a
S
301 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
302 if True, otherwise use ffmpeg/avconv if False, otherwise
303 use downloader suggested by extractor if None.
fe7e0c98 304
8222d8de 305 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 306 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 307 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 308 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
309 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
310 http_chunk_size.
76b1bd67
JMF
311
312 The following options are used by the post processors:
d4a24f40
S
313 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
314 otherwise prefer ffmpeg.
c0b7d117
S
315 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
316 to the binary or its containing directory.
f72b0a60
S
317 postprocessor_args: A list of additional command-line arguments for the
318 postprocessor.
3836b02c 319
3600fd59
S
320 The following options are used by the Youtube extractor:
321 youtube_include_dash_manifest: If True (default), DASH manifests and related
322 data will be downloaded and processed by extractor.
323 You can reduce network I/O by disabling it if you don't
324 care about DASH.
8222d8de
JMF
325 """
326
c9969434
S
327 _NUMERIC_FIELDS = set((
328 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
329 'timestamp', 'upload_year', 'upload_month', 'upload_day',
330 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
331 'average_rating', 'comment_count', 'age_limit',
332 'start_time', 'end_time',
333 'chapter_number', 'season_number', 'episode_number',
334 'track_number', 'disc_number', 'release_year',
335 'playlist_index',
336 ))
337
8222d8de
JMF
338 params = None
339 _ies = []
340 _pps = []
341 _download_retcode = None
342 _num_downloads = None
343 _screen_file = None
344
3511266b 345 def __init__(self, params=None, auto_init=True):
8222d8de 346 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
347 if params is None:
348 params = {}
8222d8de 349 self._ies = []
56c73665 350 self._ies_instances = {}
8222d8de 351 self._pps = []
933605d7 352 self._progress_hooks = []
8222d8de
JMF
353 self._download_retcode = 0
354 self._num_downloads = 0
355 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 356 self._err_file = sys.stderr
4abf617b
S
357 self.params = {
358 # Default parameters
359 'nocheckcertificate': False,
360 }
361 self.params.update(params)
a0e07d31 362 self.cache = Cache(self)
a45e8619 363 self.archive = set()
ecdec191
JB
364
365 """Preload the archive, if any is specified"""
366 def preload_download_archive(self):
367 fn = self.params.get('download_archive')
368 if fn is None:
369 return False
370 try:
371 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
372 for line in archive_file:
a45e8619 373 self.archive.add(line.strip())
ecdec191
JB
374 except IOError as ioe:
375 if ioe.errno != errno.ENOENT:
376 raise
1d74d8d9 377 return False
ecdec191 378 return True
34308b30 379
be5df5ee
S
380 def check_deprecated(param, option, suggestion):
381 if self.params.get(param) is not None:
382 self.report_warning(
383 '%s is deprecated. Use %s instead.' % (option, suggestion))
384 return True
385 return False
386
1de7ea76
JB
387 if self.params.get('verbose'):
388 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
389
ecdec191
JB
390 preload_download_archive(self)
391
be5df5ee 392 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
393 if self.params.get('geo_verification_proxy') is None:
394 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
395
be5df5ee
S
396 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
397 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
398 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
399
0783b09b 400 if params.get('bidi_workaround', False):
1c088fa8
PH
401 try:
402 import pty
403 master, slave = pty.openpty()
003c69a8 404 width = compat_get_terminal_size().columns
1c088fa8
PH
405 if width is None:
406 width_args = []
407 else:
408 width_args = ['-w', str(width)]
5d681e96 409 sp_kwargs = dict(
1c088fa8
PH
410 stdin=subprocess.PIPE,
411 stdout=slave,
412 stderr=self._err_file)
5d681e96
PH
413 try:
414 self._output_process = subprocess.Popen(
415 ['bidiv'] + width_args, **sp_kwargs
416 )
417 except OSError:
5d681e96
PH
418 self._output_process = subprocess.Popen(
419 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
420 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 421 except OSError as ose:
66e7ace1 422 if ose.errno == errno.ENOENT:
6febd1c1 423 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
424 else:
425 raise
0783b09b 426
3089bc74
S
427 if (sys.platform != 'win32'
428 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
429 and not params.get('restrictfilenames', False)):
e9137224 430 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 431 self.report_warning(
6febd1c1 432 'Assuming --restrict-filenames since file system encoding '
1b725173 433 'cannot encode all characters. '
6febd1c1 434 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 435 self.params['restrictfilenames'] = True
34308b30 436
486dd09e
PH
437 if isinstance(params.get('outtmpl'), bytes):
438 self.report_warning(
439 'Parameter outtmpl is bytes, but should be a unicode string. '
440 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
441
dca08720
PH
442 self._setup_opener()
443
3511266b
PH
444 if auto_init:
445 self.print_debug_header()
446 self.add_default_info_extractors()
447
4f026faf
PH
448 for pp_def_raw in self.params.get('postprocessors', []):
449 pp_class = get_postprocessor(pp_def_raw['key'])
450 pp_def = dict(pp_def_raw)
451 del pp_def['key']
452 pp = pp_class(self, **compat_kwargs(pp_def))
453 self.add_post_processor(pp)
454
71b640cc
PH
455 for ph in self.params.get('progress_hooks', []):
456 self.add_progress_hook(ph)
457
51fb4995
YCH
458 register_socks_protocols()
459
7d4111ed
PH
460 def warn_if_short_id(self, argv):
461 # short YouTube ID starting with dash?
462 idxs = [
463 i for i, a in enumerate(argv)
464 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
465 if idxs:
466 correct_argv = (
cefecac1 467 ['youtube-dlc']
3089bc74
S
468 + [a for i, a in enumerate(argv) if i not in idxs]
469 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
470 )
471 self.report_warning(
472 'Long argument string detected. '
473 'Use -- to separate parameters and URLs, like this:\n%s\n' %
474 args_to_str(correct_argv))
475
8222d8de
JMF
476 def add_info_extractor(self, ie):
477 """Add an InfoExtractor object to the end of the list."""
478 self._ies.append(ie)
e52d7f85
JMF
479 if not isinstance(ie, type):
480 self._ies_instances[ie.ie_key()] = ie
481 ie.set_downloader(self)
8222d8de 482
56c73665
JMF
483 def get_info_extractor(self, ie_key):
484 """
485 Get an instance of an IE with name ie_key, it will try to get one from
486 the _ies list, if there's no instance it will create a new one and add
487 it to the extractor list.
488 """
489 ie = self._ies_instances.get(ie_key)
490 if ie is None:
491 ie = get_info_extractor(ie_key)()
492 self.add_info_extractor(ie)
493 return ie
494
023fa8c4
JMF
495 def add_default_info_extractors(self):
496 """
497 Add the InfoExtractors returned by gen_extractors to the end of the list
498 """
e52d7f85 499 for ie in gen_extractor_classes():
023fa8c4
JMF
500 self.add_info_extractor(ie)
501
8222d8de
JMF
502 def add_post_processor(self, pp):
503 """Add a PostProcessor object to the end of the chain."""
504 self._pps.append(pp)
505 pp.set_downloader(self)
506
933605d7
JMF
507 def add_progress_hook(self, ph):
508 """Add the progress hook (currently only for the file downloader)"""
509 self._progress_hooks.append(ph)
8ab470f1 510
1c088fa8 511 def _bidi_workaround(self, message):
5d681e96 512 if not hasattr(self, '_output_channel'):
1c088fa8
PH
513 return message
514
5d681e96 515 assert hasattr(self, '_output_process')
11b85ce6 516 assert isinstance(message, compat_str)
6febd1c1
PH
517 line_count = message.count('\n') + 1
518 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 519 self._output_process.stdin.flush()
6febd1c1 520 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 521 for _ in range(line_count))
6febd1c1 522 return res[:-len('\n')]
1c088fa8 523
8222d8de 524 def to_screen(self, message, skip_eol=False):
0783b09b
PH
525 """Print message to stdout if not in quiet mode."""
526 return self.to_stdout(message, skip_eol, check_quiet=True)
527
734f90bb 528 def _write_string(self, s, out=None):
b58ddb32 529 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 530
0783b09b 531 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 532 """Print message to stdout if not in quiet mode."""
8bf9319e 533 if self.params.get('logger'):
43afe285 534 self.params['logger'].debug(message)
0783b09b 535 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 536 message = self._bidi_workaround(message)
6febd1c1 537 terminator = ['\n', ''][skip_eol]
8222d8de 538 output = message + terminator
1c088fa8 539
734f90bb 540 self._write_string(output, self._screen_file)
8222d8de
JMF
541
542 def to_stderr(self, message):
543 """Print message to stderr."""
11b85ce6 544 assert isinstance(message, compat_str)
8bf9319e 545 if self.params.get('logger'):
43afe285
IB
546 self.params['logger'].error(message)
547 else:
1c088fa8 548 message = self._bidi_workaround(message)
6febd1c1 549 output = message + '\n'
734f90bb 550 self._write_string(output, self._err_file)
8222d8de 551
1e5b9a95
PH
552 def to_console_title(self, message):
553 if not self.params.get('consoletitle', False):
554 return
4bede0d8
C
555 if compat_os_name == 'nt':
556 if ctypes.windll.kernel32.GetConsoleWindow():
557 # c_wchar_p() might not be necessary if `message` is
558 # already of type unicode()
559 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 560 elif 'TERM' in os.environ:
734f90bb 561 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 562
bdde425c
PH
563 def save_console_title(self):
564 if not self.params.get('consoletitle', False):
565 return
94c3442e
S
566 if self.params.get('simulate', False):
567 return
4bede0d8 568 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 569 # Save the title on stack
734f90bb 570 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
571
572 def restore_console_title(self):
573 if not self.params.get('consoletitle', False):
574 return
94c3442e
S
575 if self.params.get('simulate', False):
576 return
4bede0d8 577 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 578 # Restore the title from stack
734f90bb 579 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
580
581 def __enter__(self):
582 self.save_console_title()
583 return self
584
585 def __exit__(self, *args):
586 self.restore_console_title()
f89197d7 587
dca08720 588 if self.params.get('cookiefile') is not None:
1bab3437 589 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 590
8222d8de
JMF
591 def trouble(self, message=None, tb=None):
592 """Determine action to take when a download problem appears.
593
594 Depending on if the downloader has been configured to ignore
595 download errors or not, this method may throw an exception or
596 not when errors are found, after printing the message.
597
598 tb, if given, is additional traceback information.
599 """
600 if message is not None:
601 self.to_stderr(message)
602 if self.params.get('verbose'):
603 if tb is None:
604 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 605 tb = ''
8222d8de 606 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 607 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 608 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
609 else:
610 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 611 tb = ''.join(tb_data)
8222d8de
JMF
612 self.to_stderr(tb)
613 if not self.params.get('ignoreerrors', False):
614 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
615 exc_info = sys.exc_info()[1].exc_info
616 else:
617 exc_info = sys.exc_info()
618 raise DownloadError(message, exc_info)
619 self._download_retcode = 1
620
621 def report_warning(self, message):
622 '''
623 Print the message to stderr, it will be prefixed with 'WARNING:'
624 If stderr is a tty file the 'WARNING:' will be colored
625 '''
6d07ce01
JMF
626 if self.params.get('logger') is not None:
627 self.params['logger'].warning(message)
8222d8de 628 else:
ad8915b7
PH
629 if self.params.get('no_warnings'):
630 return
e9c0cdd3 631 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
632 _msg_header = '\033[0;33mWARNING:\033[0m'
633 else:
634 _msg_header = 'WARNING:'
635 warning_message = '%s %s' % (_msg_header, message)
636 self.to_stderr(warning_message)
8222d8de
JMF
637
638 def report_error(self, message, tb=None):
639 '''
640 Do the same as trouble, but prefixes the message with 'ERROR:', colored
641 in red if stderr is a tty file.
642 '''
e9c0cdd3 643 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 644 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 645 else:
6febd1c1
PH
646 _msg_header = 'ERROR:'
647 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
648 self.trouble(error_message, tb)
649
8222d8de
JMF
650 def report_file_already_downloaded(self, file_name):
651 """Report file has already been fully downloaded."""
652 try:
6febd1c1 653 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 654 except UnicodeEncodeError:
6febd1c1 655 self.to_screen('[download] The file has already been downloaded')
8222d8de 656
8222d8de
JMF
657 def prepare_filename(self, info_dict):
658 """Generate the output filename."""
659 try:
660 template_dict = dict(info_dict)
661
662 template_dict['epoch'] = int(time.time())
663 autonumber_size = self.params.get('autonumber_size')
664 if autonumber_size is None:
665 autonumber_size = 5
89db639d 666 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
667 if template_dict.get('resolution') is None:
668 if template_dict.get('width') and template_dict.get('height'):
669 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
670 elif template_dict.get('height'):
805ef3c6 671 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 672 elif template_dict.get('width'):
51ce9117 673 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 674
586a91b6 675 sanitize = lambda k, v: sanitize_filename(
45598aab 676 compat_str(v),
1bb5c511 677 restricted=self.params.get('restrictfilenames'),
40df485f 678 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 679 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 680 for k, v in template_dict.items()
f0e14fdd 681 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 682 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 683
b3613d36 684 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 685
89db639d
S
686 # For fields playlist_index and autonumber convert all occurrences
687 # of %(field)s to %(field)0Nd for backward compatibility
688 field_size_compat_map = {
689 'playlist_index': len(str(template_dict['n_entries'])),
690 'autonumber': autonumber_size,
691 }
692 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
693 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
694 if mobj:
695 outtmpl = re.sub(
696 FIELD_SIZE_COMPAT_RE,
697 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
698 outtmpl)
699
d0d9ade4
S
700 # Missing numeric fields used together with integer presentation types
701 # in format specification will break the argument substitution since
702 # string 'NA' is returned for missing fields. We will patch output
703 # template for missing fields to meet string presentation type.
c9969434 704 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
705 if numeric_field not in template_dict:
706 # As of [1] format syntax is:
707 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
708 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
709 FORMAT_RE = r'''(?x)
710 (?<!%)
711 %
712 \({0}\) # mapping key
713 (?:[#0\-+ ]+)? # conversion flags (optional)
714 (?:\d+)? # minimum field width (optional)
715 (?:\.\d+)? # precision (optional)
716 [hlL]? # length modifier (optional)
717 [diouxXeEfFgGcrs%] # conversion type
718 '''
719 outtmpl = re.sub(
720 FORMAT_RE.format(numeric_field),
721 r'%({0})s'.format(numeric_field), outtmpl)
722
15da37c7
S
723 # expand_path translates '%%' into '%' and '$$' into '$'
724 # correspondingly that is not what we want since we need to keep
725 # '%%' intact for template dict substitution step. Working around
726 # with boundary-alike separator hack.
961ea474 727 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
728 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
729
730 # outtmpl should be expand_path'ed before template dict substitution
731 # because meta fields may contain env variables we don't want to
732 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
733 # title "Hello $PATH", we don't want `$PATH` to be expanded.
734 filename = expand_path(outtmpl).replace(sep, '') % template_dict
735
bdc3fd2f
U
736 # https://github.com/blackjack4494/youtube-dlc/issues/85
737 trim_file_name = self.params.get('trim_file_name', False)
738 if trim_file_name:
739 fn_groups = filename.rsplit('.')
740 ext = fn_groups[-1]
741 sub_ext = ''
742 if len(fn_groups) > 2:
743 sub_ext = fn_groups[-2]
744 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
745
3a0d2f52
S
746 # Temporary fix for #4787
747 # 'Treat' all problem characters by passing filename through preferredencoding
748 # to workaround encoding issues with subprocess on python2 @ Windows
749 if sys.version_info < (3, 0) and sys.platform == 'win32':
750 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 751 return sanitize_path(filename)
8222d8de 752 except ValueError as err:
6febd1c1 753 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
754 return None
755
442c37b7 756 def _match_entry(self, info_dict, incomplete):
ecdec191 757 """ Returns None if the file should be downloaded """
8222d8de 758
6febd1c1 759 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
760 if 'title' in info_dict:
761 # This can happen when we're just evaluating the playlist
762 title = info_dict['title']
763 matchtitle = self.params.get('matchtitle', False)
764 if matchtitle:
765 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 766 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
767 rejecttitle = self.params.get('rejecttitle', False)
768 if rejecttitle:
769 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 770 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 771 date = info_dict.get('upload_date')
8222d8de
JMF
772 if date is not None:
773 dateRange = self.params.get('daterange', DateRange())
774 if date not in dateRange:
6febd1c1 775 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 776 view_count = info_dict.get('view_count')
5fe18bdb
PH
777 if view_count is not None:
778 min_views = self.params.get('min_views')
779 if min_views is not None and view_count < min_views:
6febd1c1 780 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
781 max_views = self.params.get('max_views')
782 if max_views is not None and view_count > max_views:
6febd1c1 783 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 784 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 785 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 786 if self.in_download_archive(info_dict):
6febd1c1 787 return '%s has already been recorded in archive' % video_title
347de493 788
442c37b7
PH
789 if not incomplete:
790 match_filter = self.params.get('match_filter')
791 if match_filter is not None:
792 ret = match_filter(info_dict)
793 if ret is not None:
794 return ret
347de493 795
8222d8de 796 return None
fe7e0c98 797
b6c45014
JMF
798 @staticmethod
799 def add_extra_info(info_dict, extra_info):
800 '''Set the keys from extra_info in info dict if they are missing'''
801 for key, value in extra_info.items():
802 info_dict.setdefault(key, value)
803
0704d222 804 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 805 process=True, force_generic_extractor=False):
8222d8de
JMF
806 '''
807 Returns a list with a dictionary for each video we find.
808 If 'download', also downloads the videos.
809 extra_info is a dict containing the extra values to add to each result
613b2d9d 810 '''
fe7e0c98 811
61aa5ba3 812 if not ie_key and force_generic_extractor:
d22dec74
S
813 ie_key = 'Generic'
814
8222d8de 815 if ie_key:
56c73665 816 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
817 else:
818 ies = self._ies
819
820 for ie in ies:
821 if not ie.suitable(url):
822 continue
823
e52d7f85 824 ie = self.get_info_extractor(ie.ie_key())
8222d8de 825 if not ie.working():
6febd1c1
PH
826 self.report_warning('The program functionality for this site has been marked as broken, '
827 'and will probably not work.')
8222d8de
JMF
828
829 try:
830 ie_result = ie.extract(url)
5f6a1245 831 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
832 break
833 if isinstance(ie_result, list):
834 # Backwards compatibility: old IE result format
8222d8de
JMF
835 ie_result = {
836 '_type': 'compat_list',
837 'entries': ie_result,
838 }
0704d222
U
839 if info_dict:
840 if info_dict.get('id'):
841 ie_result['id'] = info_dict['id']
842 if info_dict.get('title'):
843 ie_result['title'] = info_dict['title']
ea38e55f 844 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
845 if process:
846 return self.process_ie_result(ie_result, download, extra_info)
847 else:
848 return ie_result
773f291d
S
849 except GeoRestrictedError as e:
850 msg = e.msg
851 if e.countries:
852 msg += '\nThis video is available in %s.' % ', '.join(
853 map(ISO3166Utils.short2full, e.countries))
854 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
855 self.report_error(msg)
856 break
fb043a6e 857 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 858 self.report_error(compat_str(e), e.format_traceback())
8222d8de 859 break
d3e5bbf4
PH
860 except MaxDownloadsReached:
861 raise
8222d8de
JMF
862 except Exception as e:
863 if self.params.get('ignoreerrors', False):
9b9c5355 864 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
865 break
866 else:
867 raise
868 else:
1a489545 869 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 870
ea38e55f
PH
871 def add_default_extra_info(self, ie_result, ie, url):
872 self.add_extra_info(ie_result, {
873 'extractor': ie.IE_NAME,
874 'webpage_url': url,
875 'webpage_url_basename': url_basename(url),
876 'extractor_key': ie.ie_key(),
877 })
878
8222d8de
JMF
879 def process_ie_result(self, ie_result, download=True, extra_info={}):
880 """
881 Take the result of the ie(may be modified) and resolve all unresolved
882 references (URLs, playlist items).
883
884 It will also download the videos if 'download'.
885 Returns the resolved ie_result.
886 """
e8ee972c
PH
887 result_type = ie_result.get('_type', 'video')
888
057a5206 889 if result_type in ('url', 'url_transparent'):
134c6ea8 890 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 891 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
892 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
893 or extract_flat is True):
d06daf23
S
894 self.__forced_printings(
895 ie_result, self.prepare_filename(ie_result),
896 incomplete=True)
e8ee972c
PH
897 return ie_result
898
8222d8de 899 if result_type == 'video':
b6c45014 900 self.add_extra_info(ie_result, extra_info)
feee2ecf 901 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
902 elif result_type == 'url':
903 # We have to add extra_info to the results because it may be
904 # contained in a playlist
905 return self.extract_info(ie_result['url'],
0704d222 906 download, info_dict=ie_result,
8222d8de
JMF
907 ie_key=ie_result.get('ie_key'),
908 extra_info=extra_info)
7fc3fa05
PH
909 elif result_type == 'url_transparent':
910 # Use the information from the embedding page
911 info = self.extract_info(
912 ie_result['url'], ie_key=ie_result.get('ie_key'),
913 extra_info=extra_info, download=False, process=False)
914
1640eb09
S
915 # extract_info may return None when ignoreerrors is enabled and
916 # extraction failed with an error, don't crash and return early
917 # in this case
918 if not info:
919 return info
920
412c617d
PH
921 force_properties = dict(
922 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 923 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
924 if f in force_properties:
925 del force_properties[f]
926 new_result = info.copy()
927 new_result.update(force_properties)
7fc3fa05 928
0563f7ac
S
929 # Extracted info may not be a video result (i.e.
930 # info.get('_type', 'video') != video) but rather an url or
931 # url_transparent. In such cases outer metadata (from ie_result)
932 # should be propagated to inner one (info). For this to happen
933 # _type of info should be overridden with url_transparent. This
067aa17e 934 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
935 if new_result.get('_type') == 'url':
936 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
937
938 return self.process_ie_result(
939 new_result, download=download, extra_info=extra_info)
40fcba5e 940 elif result_type in ('playlist', 'multi_video'):
8222d8de 941 # We process each entry in the playlist
d800609c 942 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 943 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
944
945 playlist_results = []
946
8222d8de 947 playliststart = self.params.get('playliststart', 1) - 1
d800609c 948 playlistend = self.params.get('playlistend')
a19fd00c 949 # For backwards compatibility, interpret -1 as whole list
8222d8de 950 if playlistend == -1:
a19fd00c 951 playlistend = None
8222d8de 952
d800609c 953 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
954 playlistitems = None
955 if playlistitems_str is not None:
956 def iter_playlistitems(format):
957 for string_segment in format.split(','):
958 if '-' in string_segment:
959 start, end = string_segment.split('-')
960 for item in range(int(start), int(end) + 1):
961 yield int(item)
962 else:
963 yield int(string_segment)
cd6fc19e 964 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
c14e88f0 965
b82f815f 966 ie_entries = ie_result['entries']
7e85e872
S
967
968 def make_playlistitems_entries(list_ie_entries):
969 num_entries = len(list_ie_entries)
970 return [
971 list_ie_entries[i - 1] for i in playlistitems
972 if -num_entries <= i - 1 < num_entries]
973
974 def report_download(num_entries):
975 self.to_screen(
976 '[%s] playlist %s: Downloading %d videos' %
977 (ie_result['extractor'], playlist, num_entries))
978
b82f815f
PH
979 if isinstance(ie_entries, list):
980 n_all_entries = len(ie_entries)
c14e88f0 981 if playlistitems:
7e85e872 982 entries = make_playlistitems_entries(ie_entries)
c14e88f0
PH
983 else:
984 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
985 n_entries = len(entries)
986 self.to_screen(
611c1dd9 987 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 988 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 989 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
990 if playlistitems:
991 entries = []
992 for item in playlistitems:
993 entries.extend(ie_entries.getslice(
994 item - 1, item
995 ))
996 else:
997 entries = ie_entries.getslice(
998 playliststart, playlistend)
b7ab0590 999 n_entries = len(entries)
7e85e872 1000 report_download(n_entries)
b82f815f 1001 else: # iterable
c14e88f0 1002 if playlistitems:
5871ebac
S
1003 entries = make_playlistitems_entries(list(itertools.islice(
1004 ie_entries, 0, max(playlistitems))))
c14e88f0
PH
1005 else:
1006 entries = list(itertools.islice(
1007 ie_entries, playliststart, playlistend))
b82f815f 1008 n_entries = len(entries)
7e85e872 1009 report_download(n_entries)
8222d8de 1010
ff815fe6
MS
1011 if self.params.get('playlistreverse', False):
1012 entries = entries[::-1]
1013
75822ca7
TC
1014 if self.params.get('playlistrandom', False):
1015 random.shuffle(entries)
1016
0016b84e
S
1017 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1018
fe7e0c98 1019 for i, entry in enumerate(entries, 1):
734ea11e 1020 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
0016b84e
S
1021 # This __x_forwarded_for_ip thing is a bit ugly but requires
1022 # minimal changes
1023 if x_forwarded_for:
1024 entry['__x_forwarded_for_ip'] = x_forwarded_for
8222d8de 1025 extra = {
c6b4132a 1026 'n_entries': n_entries,
fe7e0c98 1027 'playlist': playlist,
a1cf99d0
PH
1028 'playlist_id': ie_result.get('id'),
1029 'playlist_title': ie_result.get('title'),
3961c6cb
S
1030 'playlist_uploader': ie_result.get('uploader'),
1031 'playlist_uploader_id': ie_result.get('uploader_id'),
de1121d7 1032 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
b6c45014 1033 'extractor': ie_result['extractor'],
9103bbc5 1034 'webpage_url': ie_result['webpage_url'],
29eb5174 1035 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1036 'extractor_key': ie_result['extractor_key'],
fe7e0c98 1037 }
7012b23c 1038
442c37b7 1039 reason = self._match_entry(entry, incomplete=True)
7012b23c 1040 if reason is not None:
6febd1c1 1041 self.to_screen('[download] ' + reason)
7012b23c
PH
1042 continue
1043
8222d8de
JMF
1044 entry_result = self.process_ie_result(entry,
1045 download=download,
1046 extra_info=extra)
1047 playlist_results.append(entry_result)
1048 ie_result['entries'] = playlist_results
371c3b79 1049 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
1050 return ie_result
1051 elif result_type == 'compat_list':
c9bf4114
PH
1052 self.report_warning(
1053 'Extractor %s returned a compat_list result. '
1054 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1055
8222d8de 1056 def _fixup(r):
9e1a5b84
JW
1057 self.add_extra_info(
1058 r,
9103bbc5
JMF
1059 {
1060 'extractor': ie_result['extractor'],
1061 'webpage_url': ie_result['webpage_url'],
29eb5174 1062 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1063 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1064 }
1065 )
8222d8de
JMF
1066 return r
1067 ie_result['entries'] = [
b6c45014 1068 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1069 for r in ie_result['entries']
1070 ]
1071 return ie_result
1072 else:
1073 raise Exception('Invalid result type: %s' % result_type)
1074
67134eab
JMF
1075 def _build_format_filter(self, filter_spec):
1076 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1077
1078 OPERATORS = {
1079 '<': operator.lt,
1080 '<=': operator.le,
1081 '>': operator.gt,
1082 '>=': operator.ge,
1083 '=': operator.eq,
1084 '!=': operator.ne,
1085 }
67134eab 1086 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1087 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1088 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1089 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1090 $
083c9df9 1091 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1092 m = operator_rex.search(filter_spec)
9ddb6925
S
1093 if m:
1094 try:
1095 comparison_value = int(m.group('value'))
1096 except ValueError:
1097 comparison_value = parse_filesize(m.group('value'))
1098 if comparison_value is None:
1099 comparison_value = parse_filesize(m.group('value') + 'B')
1100 if comparison_value is None:
1101 raise ValueError(
1102 'Invalid value %r in format specification %r' % (
67134eab 1103 m.group('value'), filter_spec))
9ddb6925
S
1104 op = OPERATORS[m.group('op')]
1105
083c9df9 1106 if not m:
9ddb6925
S
1107 STR_OPERATORS = {
1108 '=': operator.eq,
10d33b34
YCH
1109 '^=': lambda attr, value: attr.startswith(value),
1110 '$=': lambda attr, value: attr.endswith(value),
1111 '*=': lambda attr, value: value in attr,
9ddb6925 1112 }
67134eab 1113 str_operator_rex = re.compile(r'''(?x)
d5aacf9a 1114 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
2cc779f4 1115 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1116 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1117 \s*$
9ddb6925 1118 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1119 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1120 if m:
1121 comparison_value = m.group('value')
2cc779f4
S
1122 str_op = STR_OPERATORS[m.group('op')]
1123 if m.group('negation'):
e118a879 1124 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1125 else:
1126 op = str_op
083c9df9 1127
9ddb6925 1128 if not m:
67134eab 1129 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1130
1131 def _filter(f):
1132 actual_value = f.get(m.group('key'))
1133 if actual_value is None:
1134 return m.group('none_inclusive')
1135 return op(actual_value, comparison_value)
67134eab
JMF
1136 return _filter
1137
0017d9ad 1138 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1139
af0f7428
S
1140 def can_merge():
1141 merger = FFmpegMergerPP(self)
1142 return merger.available and merger.can_merge()
1143
1144 def prefer_best():
0017d9ad 1145 if self.params.get('simulate', False):
af0f7428 1146 return False
0017d9ad 1147 if not download:
0017d9ad 1148 return False
af0f7428
S
1149 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1150 return True
0017d9ad 1151 if info_dict.get('is_live'):
af0f7428
S
1152 return True
1153 if not can_merge():
1154 return True
1155 return False
1156
1157 req_format_list = ['bestvideo+bestaudio', 'best']
1158 if prefer_best():
1159 req_format_list.reverse()
0017d9ad
S
1160 return '/'.join(req_format_list)
1161
67134eab
JMF
1162 def build_format_selector(self, format_spec):
1163 def syntax_error(note, start):
1164 message = (
1165 'Invalid format specification: '
1166 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1167 return SyntaxError(message)
1168
1169 PICKFIRST = 'PICKFIRST'
1170 MERGE = 'MERGE'
1171 SINGLE = 'SINGLE'
0130afb7 1172 GROUP = 'GROUP'
67134eab
JMF
1173 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1174
1175 def _parse_filter(tokens):
1176 filter_parts = []
1177 for type, string, start, _, _ in tokens:
1178 if type == tokenize.OP and string == ']':
1179 return ''.join(filter_parts)
1180 else:
1181 filter_parts.append(string)
1182
232541df 1183 def _remove_unused_ops(tokens):
17cc1534 1184 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1185 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1186 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1187 last_string, last_start, last_end, last_line = None, None, None, None
1188 for type, string, start, end, line in tokens:
1189 if type == tokenize.OP and string == '[':
1190 if last_string:
1191 yield tokenize.NAME, last_string, last_start, last_end, last_line
1192 last_string = None
1193 yield type, string, start, end, line
1194 # everything inside brackets will be handled by _parse_filter
1195 for type, string, start, end, line in tokens:
1196 yield type, string, start, end, line
1197 if type == tokenize.OP and string == ']':
1198 break
1199 elif type == tokenize.OP and string in ALLOWED_OPS:
1200 if last_string:
1201 yield tokenize.NAME, last_string, last_start, last_end, last_line
1202 last_string = None
1203 yield type, string, start, end, line
1204 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1205 if not last_string:
1206 last_string = string
1207 last_start = start
1208 last_end = end
1209 else:
1210 last_string += string
1211 if last_string:
1212 yield tokenize.NAME, last_string, last_start, last_end, last_line
1213
cf2ac6df 1214 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1215 selectors = []
1216 current_selector = None
1217 for type, string, start, _, _ in tokens:
1218 # ENCODING is only defined in python 3.x
1219 if type == getattr(tokenize, 'ENCODING', None):
1220 continue
1221 elif type in [tokenize.NAME, tokenize.NUMBER]:
1222 current_selector = FormatSelector(SINGLE, string, [])
1223 elif type == tokenize.OP:
cf2ac6df
JMF
1224 if string == ')':
1225 if not inside_group:
1226 # ')' will be handled by the parentheses group
1227 tokens.restore_last_token()
67134eab 1228 break
cf2ac6df 1229 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1230 tokens.restore_last_token()
1231 break
cf2ac6df
JMF
1232 elif inside_choice and string == ',':
1233 tokens.restore_last_token()
1234 break
1235 elif string == ',':
0a31a350
JMF
1236 if not current_selector:
1237 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1238 selectors.append(current_selector)
1239 current_selector = None
1240 elif string == '/':
d96d604e
JMF
1241 if not current_selector:
1242 raise syntax_error('"/" must follow a format selector', start)
67134eab 1243 first_choice = current_selector
cf2ac6df 1244 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1245 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1246 elif string == '[':
1247 if not current_selector:
1248 current_selector = FormatSelector(SINGLE, 'best', [])
1249 format_filter = _parse_filter(tokens)
1250 current_selector.filters.append(format_filter)
0130afb7
JMF
1251 elif string == '(':
1252 if current_selector:
1253 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1254 group = _parse_format_selection(tokens, inside_group=True)
1255 current_selector = FormatSelector(GROUP, group, [])
67134eab 1256 elif string == '+':
d03cfdce 1257 if not current_selector:
1258 raise syntax_error('Unexpected "+"', start)
1259 selector_1 = current_selector
1260 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1261 if not selector_2:
1262 raise syntax_error('Expected a selector', start)
1263 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1264 else:
1265 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1266 elif type == tokenize.ENDMARKER:
1267 break
1268 if current_selector:
1269 selectors.append(current_selector)
1270 return selectors
1271
1272 def _build_selector_function(selector):
1273 if isinstance(selector, list):
1274 fs = [_build_selector_function(s) for s in selector]
1275
317f7ab6 1276 def selector_function(ctx):
67134eab 1277 for f in fs:
317f7ab6 1278 for format in f(ctx):
67134eab
JMF
1279 yield format
1280 return selector_function
0130afb7
JMF
1281 elif selector.type == GROUP:
1282 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1283 elif selector.type == PICKFIRST:
1284 fs = [_build_selector_function(s) for s in selector.selector]
1285
317f7ab6 1286 def selector_function(ctx):
67134eab 1287 for f in fs:
317f7ab6 1288 picked_formats = list(f(ctx))
67134eab
JMF
1289 if picked_formats:
1290 return picked_formats
1291 return []
1292 elif selector.type == SINGLE:
1293 format_spec = selector.selector
1294
317f7ab6
S
1295 def selector_function(ctx):
1296 formats = list(ctx['formats'])
bb8e5536
JMF
1297 if not formats:
1298 return
5acfa126
JMF
1299 if format_spec == 'all':
1300 for f in formats:
1301 yield f
1302 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1303 format_idx = 0 if format_spec == 'worst' else -1
1304 audiovideo_formats = [
1305 f for f in formats
1306 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1307 if audiovideo_formats:
1308 yield audiovideo_formats[format_idx]
317f7ab6
S
1309 # for extractors with incomplete formats (audio only (soundcloud)
1310 # or video only (imgur)) we will fallback to best/worst
1311 # {video,audio}-only format
1312 elif ctx['incomplete_formats']:
67134eab
JMF
1313 yield formats[format_idx]
1314 elif format_spec == 'bestaudio':
1315 audio_formats = [
1316 f for f in formats
1317 if f.get('vcodec') == 'none']
1318 if audio_formats:
1319 yield audio_formats[-1]
1320 elif format_spec == 'worstaudio':
1321 audio_formats = [
1322 f for f in formats
1323 if f.get('vcodec') == 'none']
1324 if audio_formats:
1325 yield audio_formats[0]
1326 elif format_spec == 'bestvideo':
1327 video_formats = [
1328 f for f in formats
1329 if f.get('acodec') == 'none']
1330 if video_formats:
1331 yield video_formats[-1]
1332 elif format_spec == 'worstvideo':
1333 video_formats = [
1334 f for f in formats
1335 if f.get('acodec') == 'none']
1336 if video_formats:
1337 yield video_formats[0]
1338 else:
1339 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1340 if format_spec in extensions:
1341 filter_f = lambda f: f['ext'] == format_spec
1342 else:
1343 filter_f = lambda f: f['format_id'] == format_spec
1344 matches = list(filter(filter_f, formats))
1345 if matches:
1346 yield matches[-1]
1347 elif selector.type == MERGE:
d03cfdce 1348 def _merge(formats_pair):
1349 format_1, format_2 = formats_pair
1350
1351 formats_info = []
1352 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1353 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1354
1355 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1356 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1357
1358 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1359 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1360
1361 output_ext = self.params.get('merge_output_format')
1362 if not output_ext:
1363 if the_only_video:
1364 output_ext = the_only_video['ext']
1365 elif the_only_audio and not video_fmts:
1366 output_ext = the_only_audio['ext']
1367 else:
1368 output_ext = 'mkv'
1369
1370 new_dict = {
67134eab 1371 'requested_formats': formats_info,
d03cfdce 1372 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1373 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1374 'ext': output_ext,
1375 }
d03cfdce 1376
1377 if the_only_video:
1378 new_dict.update({
1379 'width': the_only_video.get('width'),
1380 'height': the_only_video.get('height'),
1381 'resolution': the_only_video.get('resolution'),
1382 'fps': the_only_video.get('fps'),
1383 'vcodec': the_only_video.get('vcodec'),
1384 'vbr': the_only_video.get('vbr'),
1385 'stretched_ratio': the_only_video.get('stretched_ratio'),
1386 })
1387
1388 if the_only_audio:
1389 new_dict.update({
1390 'acodec': the_only_audio.get('acodec'),
1391 'abr': the_only_audio.get('abr'),
1392 })
1393
1394 return new_dict
1395
1396 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1397
317f7ab6
S
1398 def selector_function(ctx):
1399 for pair in itertools.product(
d03cfdce 1400 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1401 yield _merge(pair)
083c9df9 1402
67134eab 1403 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1404
317f7ab6
S
1405 def final_selector(ctx):
1406 ctx_copy = copy.deepcopy(ctx)
67134eab 1407 for _filter in filters:
317f7ab6
S
1408 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1409 return selector_function(ctx_copy)
67134eab 1410 return final_selector
083c9df9 1411
67134eab 1412 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1413 try:
232541df 1414 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1415 except tokenize.TokenError:
1416 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1417
1418 class TokenIterator(object):
1419 def __init__(self, tokens):
1420 self.tokens = tokens
1421 self.counter = 0
1422
1423 def __iter__(self):
1424 return self
1425
1426 def __next__(self):
1427 if self.counter >= len(self.tokens):
1428 raise StopIteration()
1429 value = self.tokens[self.counter]
1430 self.counter += 1
1431 return value
1432
1433 next = __next__
1434
1435 def restore_last_token(self):
1436 self.counter -= 1
1437
1438 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1439 return _build_selector_function(parsed_selector)
a9c58ad9 1440
e5660ee6
JMF
1441 def _calc_headers(self, info_dict):
1442 res = std_headers.copy()
1443
1444 add_headers = info_dict.get('http_headers')
1445 if add_headers:
1446 res.update(add_headers)
1447
1448 cookies = self._calc_cookies(info_dict)
1449 if cookies:
1450 res['Cookie'] = cookies
1451
0016b84e
S
1452 if 'X-Forwarded-For' not in res:
1453 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1454 if x_forwarded_for_ip:
1455 res['X-Forwarded-For'] = x_forwarded_for_ip
1456
e5660ee6
JMF
1457 return res
1458
1459 def _calc_cookies(self, info_dict):
5c2266df 1460 pr = sanitized_Request(info_dict['url'])
e5660ee6 1461 self.cookiejar.add_cookie_header(pr)
662435f7 1462 return pr.get_header('Cookie')
e5660ee6 1463
dd82ffea
JMF
1464 def process_video_result(self, info_dict, download=True):
1465 assert info_dict.get('_type', 'video') == 'video'
1466
bec1fad2
PH
1467 if 'id' not in info_dict:
1468 raise ExtractorError('Missing "id" field in extractor result')
1469 if 'title' not in info_dict:
1470 raise ExtractorError('Missing "title" field in extractor result')
1471
c9969434
S
1472 def report_force_conversion(field, field_not, conversion):
1473 self.report_warning(
1474 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1475 % (field, field_not, conversion))
1476
1477 def sanitize_string_field(info, string_field):
1478 field = info.get(string_field)
1479 if field is None or isinstance(field, compat_str):
1480 return
1481 report_force_conversion(string_field, 'a string', 'string')
1482 info[string_field] = compat_str(field)
1483
1484 def sanitize_numeric_fields(info):
1485 for numeric_field in self._NUMERIC_FIELDS:
1486 field = info.get(numeric_field)
1487 if field is None or isinstance(field, compat_numeric_types):
1488 continue
1489 report_force_conversion(numeric_field, 'numeric', 'int')
1490 info[numeric_field] = int_or_none(field)
1491
1492 sanitize_string_field(info_dict, 'id')
1493 sanitize_numeric_fields(info_dict)
be6217b2 1494
dd82ffea
JMF
1495 if 'playlist' not in info_dict:
1496 # It isn't part of a playlist
1497 info_dict['playlist'] = None
1498 info_dict['playlist_index'] = None
1499
d5519808 1500 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1501 if thumbnails is None:
1502 thumbnail = info_dict.get('thumbnail')
1503 if thumbnail:
a7a14d95 1504 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1505 if thumbnails:
be6d7229 1506 thumbnails.sort(key=lambda t: (
d37708fc
RA
1507 t.get('preference') if t.get('preference') is not None else -1,
1508 t.get('width') if t.get('width') is not None else -1,
1509 t.get('height') if t.get('height') is not None else -1,
1510 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1511 for i, t in enumerate(thumbnails):
dcf77cf1 1512 t['url'] = sanitize_url(t['url'])
9603e8a7 1513 if t.get('width') and t.get('height'):
d5519808 1514 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1515 if t.get('id') is None:
1516 t['id'] = '%d' % i
d5519808 1517
b7b72db9 1518 if self.params.get('list_thumbnails'):
1519 self.list_thumbnails(info_dict)
1520 return
1521
536a55da
S
1522 thumbnail = info_dict.get('thumbnail')
1523 if thumbnail:
1524 info_dict['thumbnail'] = sanitize_url(thumbnail)
1525 elif thumbnails:
d5519808
PH
1526 info_dict['thumbnail'] = thumbnails[-1]['url']
1527
c9ae7b95 1528 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1529 info_dict['display_id'] = info_dict['id']
1530
955c4514 1531 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1532 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1533 # see http://bugs.python.org/issue1646728)
1534 try:
1535 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1536 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1537 except (ValueError, OverflowError, OSError):
1538 pass
9d2ecdbc 1539
33d2fc2f
S
1540 # Auto generate title fields corresponding to the *_number fields when missing
1541 # in order to always have clean titles. This is very common for TV series.
1542 for field in ('chapter', 'season', 'episode'):
1543 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1544 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1545
05108a49
S
1546 for cc_kind in ('subtitles', 'automatic_captions'):
1547 cc = info_dict.get(cc_kind)
1548 if cc:
1549 for _, subtitle in cc.items():
1550 for subtitle_format in subtitle:
1551 if subtitle_format.get('url'):
1552 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1553 if subtitle_format.get('ext') is None:
1554 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1555
1556 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1557 subtitles = info_dict.get('subtitles')
4bba3716 1558
a504ced0 1559 if self.params.get('listsubtitles', False):
360e1ca5 1560 if 'automatic_captions' in info_dict:
05108a49
S
1561 self.list_subtitles(
1562 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1563 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1564 return
05108a49 1565
360e1ca5 1566 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1567 info_dict['id'], subtitles, automatic_captions)
a504ced0 1568
dd82ffea
JMF
1569 # We now pick which formats have to be downloaded
1570 if info_dict.get('formats') is None:
1571 # There's only one format available
1572 formats = [info_dict]
1573 else:
1574 formats = info_dict['formats']
1575
db95dc13
PH
1576 if not formats:
1577 raise ExtractorError('No video formats found!')
1578
73af5cc8
S
1579 def is_wellformed(f):
1580 url = f.get('url')
a5ac0c47 1581 if not url:
73af5cc8
S
1582 self.report_warning(
1583 '"url" field is missing or empty - skipping format, '
1584 'there is an error in extractor')
a5ac0c47
S
1585 return False
1586 if isinstance(url, bytes):
1587 sanitize_string_field(f, 'url')
1588 return True
73af5cc8
S
1589
1590 # Filter out malformed formats for better extraction robustness
1591 formats = list(filter(is_wellformed, formats))
1592
181c7053
S
1593 formats_dict = {}
1594
dd82ffea 1595 # We check that all the formats have the format and format_id fields
db95dc13 1596 for i, format in enumerate(formats):
c9969434
S
1597 sanitize_string_field(format, 'format_id')
1598 sanitize_numeric_fields(format)
dcf77cf1 1599 format['url'] = sanitize_url(format['url'])
e74e3b63 1600 if not format.get('format_id'):
8016c922 1601 format['format_id'] = compat_str(i)
e2effb08
S
1602 else:
1603 # Sanitize format_id from characters used in format selector expression
ec85ded8 1604 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1605 format_id = format['format_id']
1606 if format_id not in formats_dict:
1607 formats_dict[format_id] = []
1608 formats_dict[format_id].append(format)
1609
1610 # Make sure all formats have unique format_id
1611 for format_id, ambiguous_formats in formats_dict.items():
1612 if len(ambiguous_formats) > 1:
1613 for i, format in enumerate(ambiguous_formats):
1614 format['format_id'] = '%s-%d' % (format_id, i)
1615
1616 for i, format in enumerate(formats):
8c51aa65 1617 if format.get('format') is None:
6febd1c1 1618 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1619 id=format['format_id'],
1620 res=self.format_resolution(format),
6febd1c1 1621 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1622 )
c1002e96 1623 # Automatically determine file extension if missing
5b1d8575 1624 if format.get('ext') is None:
cce929ea 1625 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1626 # Automatically determine protocol if missing (useful for format
1627 # selection purposes)
6f0be937 1628 if format.get('protocol') is None:
b5559424 1629 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1630 # Add HTTP headers, so that external programs can use them from the
1631 # json output
1632 full_format_info = info_dict.copy()
1633 full_format_info.update(format)
1634 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1635 # Remove private housekeeping stuff
1636 if '__x_forwarded_for_ip' in info_dict:
1637 del info_dict['__x_forwarded_for_ip']
dd82ffea 1638
4bcc7bd1 1639 # TODO Central sorting goes here
99e206d5 1640
f89197d7 1641 if formats[0] is not info_dict:
b3d9ef88
JMF
1642 # only set the 'formats' fields if the original info_dict list them
1643 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1644 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1645 # which can't be exported to json
b3d9ef88 1646 info_dict['formats'] = formats
cfb56d1a 1647 if self.params.get('listformats'):
bfaae0a7 1648 self.list_formats(info_dict)
1649 return
1650
de3ef3ed 1651 req_format = self.params.get('format')
a9c58ad9 1652 if req_format is None:
0017d9ad
S
1653 req_format = self._default_format_spec(info_dict, download=download)
1654 if self.params.get('verbose'):
1655 self.to_stdout('[debug] Default format spec: %s' % req_format)
1656
5acfa126 1657 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1658
1659 # While in format selection we may need to have an access to the original
1660 # format set in order to calculate some metrics or do some processing.
1661 # For now we need to be able to guess whether original formats provided
1662 # by extractor are incomplete or not (i.e. whether extractor provides only
1663 # video-only or audio-only formats) for proper formats selection for
1664 # extractors with such incomplete formats (see
067aa17e 1665 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1666 # Since formats may be filtered during format selection and may not match
1667 # the original formats the results may be incorrect. Thus original formats
1668 # or pre-calculated metrics should be passed to format selection routines
1669 # as well.
1670 # We will pass a context object containing all necessary additional data
1671 # instead of just formats.
1672 # This fixes incorrect format selection issue (see
067aa17e 1673 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1674 incomplete_formats = (
317f7ab6 1675 # All formats are video-only or
3089bc74 1676 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1677 # all formats are audio-only
3089bc74 1678 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1679
1680 ctx = {
1681 'formats': formats,
1682 'incomplete_formats': incomplete_formats,
1683 }
1684
1685 formats_to_download = list(format_selector(ctx))
dd82ffea 1686 if not formats_to_download:
6febd1c1 1687 raise ExtractorError('requested format not available',
78a3a9f8 1688 expected=True)
dd82ffea
JMF
1689
1690 if download:
1691 if len(formats_to_download) > 1:
6febd1c1 1692 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1693 for format in formats_to_download:
1694 new_info = dict(info_dict)
1695 new_info.update(format)
1696 self.process_info(new_info)
1697 # We update the info dict with the best quality format (backwards compatibility)
1698 info_dict.update(formats_to_download[-1])
1699 return info_dict
1700
98c70d6f 1701 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1702 """Select the requested subtitles and their format"""
98c70d6f
JMF
1703 available_subs = {}
1704 if normal_subtitles and self.params.get('writesubtitles'):
1705 available_subs.update(normal_subtitles)
1706 if automatic_captions and self.params.get('writeautomaticsub'):
1707 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1708 if lang not in available_subs:
1709 available_subs[lang] = cap_info
1710
4d171848
JMF
1711 if (not self.params.get('writesubtitles') and not
1712 self.params.get('writeautomaticsub') or not
1713 available_subs):
1714 return None
a504ced0
JMF
1715
1716 if self.params.get('allsubtitles', False):
1717 requested_langs = available_subs.keys()
1718 else:
1719 if self.params.get('subtitleslangs', False):
1720 requested_langs = self.params.get('subtitleslangs')
1721 elif 'en' in available_subs:
1722 requested_langs = ['en']
1723 else:
1724 requested_langs = [list(available_subs.keys())[0]]
1725
1726 formats_query = self.params.get('subtitlesformat', 'best')
1727 formats_preference = formats_query.split('/') if formats_query else []
1728 subs = {}
1729 for lang in requested_langs:
1730 formats = available_subs.get(lang)
1731 if formats is None:
1732 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1733 continue
a504ced0
JMF
1734 for ext in formats_preference:
1735 if ext == 'best':
1736 f = formats[-1]
1737 break
1738 matches = list(filter(lambda f: f['ext'] == ext, formats))
1739 if matches:
1740 f = matches[-1]
1741 break
1742 else:
1743 f = formats[-1]
1744 self.report_warning(
1745 'No subtitle format found matching "%s" for language %s, '
1746 'using %s' % (formats_query, lang, f['ext']))
1747 subs[lang] = f
1748 return subs
1749
d06daf23
S
1750 def __forced_printings(self, info_dict, filename, incomplete):
1751 def print_mandatory(field):
1752 if (self.params.get('force%s' % field, False)
1753 and (not incomplete or info_dict.get(field) is not None)):
1754 self.to_stdout(info_dict[field])
1755
1756 def print_optional(field):
1757 if (self.params.get('force%s' % field, False)
1758 and info_dict.get(field) is not None):
1759 self.to_stdout(info_dict[field])
1760
1761 print_mandatory('title')
1762 print_mandatory('id')
1763 if self.params.get('forceurl', False) and not incomplete:
1764 if info_dict.get('requested_formats') is not None:
1765 for f in info_dict['requested_formats']:
1766 self.to_stdout(f['url'] + f.get('play_path', ''))
1767 else:
1768 # For RTMP URLs, also include the playpath
1769 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1770 print_optional('thumbnail')
1771 print_optional('description')
1772 if self.params.get('forcefilename', False) and filename is not None:
1773 self.to_stdout(filename)
1774 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1775 self.to_stdout(formatSeconds(info_dict['duration']))
1776 print_mandatory('format')
1777 if self.params.get('forcejson', False):
1778 self.to_stdout(json.dumps(info_dict))
1779
8222d8de
JMF
1780 def process_info(self, info_dict):
1781 """Process a single resolved IE result."""
1782
1783 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1784
1785 max_downloads = self.params.get('max_downloads')
1786 if max_downloads is not None:
1787 if self._num_downloads >= int(max_downloads):
1788 raise MaxDownloadsReached()
8222d8de 1789
d06daf23 1790 # TODO: backward compatibility, to be removed
8222d8de 1791 info_dict['fulltitle'] = info_dict['title']
8222d8de 1792
11b85ce6 1793 if 'format' not in info_dict:
8222d8de
JMF
1794 info_dict['format'] = info_dict['ext']
1795
442c37b7 1796 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1797 if reason is not None:
6febd1c1 1798 self.to_screen('[download] ' + reason)
8222d8de
JMF
1799 return
1800
fd288278 1801 self._num_downloads += 1
8222d8de 1802
e72c7e41 1803 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1804
1805 # Forced printings
d06daf23 1806 self.__forced_printings(info_dict, filename, incomplete=False)
8222d8de
JMF
1807
1808 # Do nothing else if in simulate mode
1809 if self.params.get('simulate', False):
1810 return
1811
1812 if filename is None:
1813 return
1814
c5c9bf0c
S
1815 def ensure_dir_exists(path):
1816 try:
1817 dn = os.path.dirname(path)
1818 if dn and not os.path.exists(dn):
1819 os.makedirs(dn)
1820 return True
1821 except (OSError, IOError) as err:
1822 self.report_error('unable to create directory ' + error_to_compat_str(err))
1823 return False
1824
1825 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
8222d8de
JMF
1826 return
1827
1828 if self.params.get('writedescription', False):
2699da80 1829 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1830 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1831 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1832 elif info_dict.get('description') is None:
1833 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1834 else:
1835 try:
6febd1c1 1836 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1837 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1838 descfile.write(info_dict['description'])
7b6fefc9 1839 except (OSError, IOError):
6febd1c1 1840 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1841 return
8222d8de 1842
1fb07d10 1843 if self.params.get('writeannotations', False):
98727e12 1844 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1845 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1846 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
1847 elif not info_dict.get('annotations'):
1848 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
1849 else:
1850 try:
6febd1c1 1851 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1852 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1853 annofile.write(info_dict['annotations'])
1854 except (KeyError, TypeError):
6febd1c1 1855 self.report_warning('There are no annotations to write.')
7b6fefc9 1856 except (OSError, IOError):
6febd1c1 1857 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1858 return
1fb07d10 1859
9f448fcb 1860 def dl(name, info, subtitle=False):
98b69821 1861 fd = get_suitable_downloader(info, self.params)(self, self.params)
1862 for ph in self._progress_hooks:
1863 fd.add_progress_hook(ph)
1864 if self.params.get('verbose'):
1865 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 1866 return fd.download(name, info, subtitle)
98b69821 1867
c4a91be7 1868 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1869 self.params.get('writeautomaticsub')])
c4a91be7 1870
c84dd8a9 1871 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1872 # subtitles download errors are already managed as troubles in relevant IE
1873 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1874 subtitles = info_dict['requested_subtitles']
0c9df79e 1875 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1876 for sub_lang, sub_info in subtitles.items():
1877 sub_format = sub_info['ext']
824fa511 1878 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
5ff1bc0c
RA
1879 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1880 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
a504ced0 1881 else:
0c9df79e 1882 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
1883 if sub_info.get('data') is not None:
1884 try:
1885 # Use newline='' to prevent conversion of newline characters
067aa17e 1886 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
1887 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1888 subfile.write(sub_info['data'])
1889 except (OSError, IOError):
1890 self.report_error('Cannot write subtitles file ' + sub_filename)
1891 return
7b6fefc9 1892 else:
5ff1bc0c 1893 try:
9f448fcb
U
1894 dl(sub_filename, sub_info, subtitle=True)
1895 '''
0c9df79e
U
1896 if self.params.get('sleep_interval_subtitles', False):
1897 dl(sub_filename, sub_info)
1898 else:
1899 sub_data = ie._request_webpage(
1900 sub_info['url'], info_dict['id'], note=False).read()
1901 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1902 subfile.write(sub_data)
9f448fcb 1903 '''
0c9df79e 1904 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
1905 self.report_warning('Unable to download subtitle for "%s": %s' %
1906 (sub_lang, error_to_compat_str(err)))
1907 continue
8222d8de 1908
57df9f53
U
1909 if self.params.get('skip_download', False):
1910 if self.params.get('convertsubtitles', False):
1911 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1912 filename_real_ext = os.path.splitext(filename)[1][1:]
1913 filename_wo_ext = (
1914 os.path.splitext(filename)[0]
1915 if filename_real_ext == info_dict['ext']
1916 else filename)
1917 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1918 if subconv.available:
1919 info_dict.setdefault('__postprocessors', [])
1920 # info_dict['__postprocessors'].append(subconv)
1921 if os.path.exists(encodeFilename(afilename)):
f791b419
U
1922 self.to_screen(
1923 '[download] %s has already been downloaded and '
1924 'converted' % afilename)
57df9f53
U
1925 else:
1926 try:
1927 self.post_process(filename, info_dict)
1928 except (PostProcessingError) as err:
1929 self.report_error('postprocessing: %s' % str(err))
1930 return
1931
8222d8de 1932 if self.params.get('writeinfojson', False):
b29e0000 1933 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1934 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1935 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1936 else:
6febd1c1 1937 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1938 try:
cb202fd2 1939 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1940 except (OSError, IOError):
6febd1c1 1941 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1942 return
8222d8de 1943
ec82d85a 1944 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1945
1946 if not self.params.get('skip_download', False):
4340deca 1947 try:
4340deca
P
1948 if info_dict.get('requested_formats') is not None:
1949 downloaded = []
1950 success = True
d47aeb22 1951 merger = FFmpegMergerPP(self)
f740fae2 1952 if not merger.available:
4340deca
P
1953 postprocessors = []
1954 self.report_warning('You have requested multiple '
1955 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1956 ' The formats won\'t be merged.')
6350728b 1957 else:
4340deca 1958 postprocessors = [merger]
81cd954a
S
1959
1960 def compatible_formats(formats):
d03cfdce 1961 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1962 video_formats = [format for format in formats if format.get('vcodec') != 'none']
1963 audio_formats = [format for format in formats if format.get('acodec') != 'none']
1964 if len(video_formats) > 2 or len(audio_formats) > 2:
1965 return False
1966
81cd954a 1967 # Check extension
d03cfdce 1968 exts = set(format.get('ext') for format in formats)
1969 COMPATIBLE_EXTS = (
1970 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1971 set(('webm',)),
1972 )
1973 for ext_sets in COMPATIBLE_EXTS:
1974 if ext_sets.issuperset(exts):
1975 return True
81cd954a
S
1976 # TODO: Check acodec/vcodec
1977 return False
1978
38c6902b
S
1979 filename_real_ext = os.path.splitext(filename)[1][1:]
1980 filename_wo_ext = (
1981 os.path.splitext(filename)[0]
1982 if filename_real_ext == info_dict['ext']
1983 else filename)
81cd954a 1984 requested_formats = info_dict['requested_formats']
c0dea0a7 1985 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1986 info_dict['ext'] = 'mkv'
4a5a898a
S
1987 self.report_warning(
1988 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1989 # Ensure filename always has a correct extension for successful merge
1990 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1991 if os.path.exists(encodeFilename(filename)):
1992 self.to_screen(
1993 '[download] %s has already been downloaded and '
1994 'merged' % filename)
1995 else:
81cd954a 1996 for f in requested_formats:
5b5fbc08
JMF
1997 new_info = dict(info_dict)
1998 new_info.update(f)
c5c9bf0c
S
1999 fname = prepend_extension(
2000 self.prepare_filename(new_info),
2001 'f%s' % f['format_id'], new_info['ext'])
2002 if not ensure_dir_exists(fname):
2003 return
5b5fbc08
JMF
2004 downloaded.append(fname)
2005 partial_success = dl(fname, new_info)
2006 success = success and partial_success
2007 info_dict['__postprocessors'] = postprocessors
2008 info_dict['__files_to_merge'] = downloaded
4340deca
P
2009 else:
2010 # Just a single file
2011 success = dl(filename, info_dict)
2012 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2013 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2014 return
2015 except (OSError, IOError) as err:
2016 raise UnavailableVideoError(err)
2017 except (ContentTooShortError, ) as err:
2018 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2019 return
8222d8de 2020
e38cafe9 2021 if success and filename != '-':
6271f1ca 2022 # Fixup content
62cd676c
PH
2023 fixup_policy = self.params.get('fixup')
2024 if fixup_policy is None:
2025 fixup_policy = 'detect_or_warn'
2026
d1e4a464
S
2027 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2028
6271f1ca
PH
2029 stretched_ratio = info_dict.get('stretched_ratio')
2030 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2031 if fixup_policy == 'warn':
2032 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2033 info_dict['id'], stretched_ratio))
2034 elif fixup_policy == 'detect_or_warn':
2035 stretched_pp = FFmpegFixupStretchedPP(self)
2036 if stretched_pp.available:
2037 info_dict.setdefault('__postprocessors', [])
2038 info_dict['__postprocessors'].append(stretched_pp)
2039 else:
2040 self.report_warning(
d1e4a464
S
2041 '%s: Non-uniform pixel ratio (%s). %s'
2042 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2043 else:
62cd676c
PH
2044 assert fixup_policy in ('ignore', 'never')
2045
3089bc74
S
2046 if (info_dict.get('requested_formats') is None
2047 and info_dict.get('container') == 'm4a_dash'):
62cd676c 2048 if fixup_policy == 'warn':
d1e4a464
S
2049 self.report_warning(
2050 '%s: writing DASH m4a. '
2051 'Only some players support this container.'
2052 % info_dict['id'])
62cd676c
PH
2053 elif fixup_policy == 'detect_or_warn':
2054 fixup_pp = FFmpegFixupM4aPP(self)
2055 if fixup_pp.available:
2056 info_dict.setdefault('__postprocessors', [])
2057 info_dict['__postprocessors'].append(fixup_pp)
2058 else:
2059 self.report_warning(
d1e4a464
S
2060 '%s: writing DASH m4a. '
2061 'Only some players support this container. %s'
2062 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2063 else:
2064 assert fixup_policy in ('ignore', 'never')
6271f1ca 2065
3089bc74
S
2066 if (info_dict.get('protocol') == 'm3u8_native'
2067 or info_dict.get('protocol') == 'm3u8'
2068 and self.params.get('hls_prefer_native')):
f17f8651 2069 if fixup_policy == 'warn':
a02682fd 2070 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2071 info_dict['id']))
2072 elif fixup_policy == 'detect_or_warn':
2073 fixup_pp = FFmpegFixupM3u8PP(self)
2074 if fixup_pp.available:
2075 info_dict.setdefault('__postprocessors', [])
2076 info_dict['__postprocessors'].append(fixup_pp)
2077 else:
2078 self.report_warning(
a02682fd 2079 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2080 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2081 else:
2082 assert fixup_policy in ('ignore', 'never')
2083
8222d8de
JMF
2084 try:
2085 self.post_process(filename, info_dict)
2086 except (PostProcessingError) as err:
6febd1c1 2087 self.report_error('postprocessing: %s' % str(err))
8222d8de 2088 return
cd58dc3e 2089 self.record_download_archive(info_dict)
8222d8de
JMF
2090
2091 def download(self, url_list):
2092 """Download a given list of URLs."""
acd69589 2093 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2094 if (len(url_list) > 1
2095 and outtmpl != '-'
2096 and '%' not in outtmpl
2097 and self.params.get('max_downloads') != 1):
acd69589 2098 raise SameFileError(outtmpl)
8222d8de
JMF
2099
2100 for url in url_list:
2101 try:
5f6a1245 2102 # It also downloads the videos
61aa5ba3
S
2103 res = self.extract_info(
2104 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2105 except UnavailableVideoError:
6febd1c1 2106 self.report_error('unable to download video')
8222d8de 2107 except MaxDownloadsReached:
6febd1c1 2108 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 2109 raise
63e0be34
PH
2110 else:
2111 if self.params.get('dump_single_json', False):
2112 self.to_stdout(json.dumps(res))
8222d8de
JMF
2113
2114 return self._download_retcode
2115
1dcc4c0c 2116 def download_with_info_file(self, info_filename):
31bd3925
JMF
2117 with contextlib.closing(fileinput.FileInput(
2118 [info_filename], mode='r',
2119 openhook=fileinput.hook_encoded('utf-8'))) as f:
2120 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2121 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2122 try:
2123 self.process_ie_result(info, download=True)
2124 except DownloadError:
2125 webpage_url = info.get('webpage_url')
2126 if webpage_url is not None:
6febd1c1 2127 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2128 return self.download([webpage_url])
2129 else:
2130 raise
2131 return self._download_retcode
1dcc4c0c 2132
cb202fd2
S
2133 @staticmethod
2134 def filter_requested_info(info_dict):
2135 return dict(
2136 (k, v) for k, v in info_dict.items()
2137 if k not in ['requested_formats', 'requested_subtitles'])
2138
8222d8de
JMF
2139 def post_process(self, filename, ie_info):
2140 """Run all the postprocessors on the given file."""
2141 info = dict(ie_info)
2142 info['filepath'] = filename
6350728b
JMF
2143 pps_chain = []
2144 if ie_info.get('__postprocessors') is not None:
2145 pps_chain.extend(ie_info['__postprocessors'])
2146 pps_chain.extend(self._pps)
2147 for pp in pps_chain:
71646e46 2148 files_to_delete = []
8222d8de 2149 try:
592e97e8 2150 files_to_delete, info = pp.run(info)
8222d8de 2151 except PostProcessingError as e:
bbcbf4d4 2152 self.report_error(e.msg)
592e97e8 2153 if files_to_delete and not self.params.get('keepvideo', False):
d03cfdce 2154 for old_filename in set(files_to_delete):
f3ff1a36 2155 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
2156 try:
2157 os.remove(encodeFilename(old_filename))
2158 except (IOError, OSError):
2159 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 2160
5db07df6 2161 def _make_archive_id(self, info_dict):
e9fef7ee
S
2162 video_id = info_dict.get('id')
2163 if not video_id:
2164 return
5db07df6
PH
2165 # Future-proof against any change in case
2166 # and backwards compatibility with prior versions
e9fef7ee 2167 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2168 if extractor is None:
1211bb6d
S
2169 url = str_or_none(info_dict.get('url'))
2170 if not url:
2171 return
e9fef7ee
S
2172 # Try to find matching extractor for the URL and take its ie_key
2173 for ie in self._ies:
1211bb6d 2174 if ie.suitable(url):
e9fef7ee
S
2175 extractor = ie.ie_key()
2176 break
2177 else:
2178 return
2179 return extractor.lower() + ' ' + video_id
5db07df6
PH
2180
2181 def in_download_archive(self, info_dict):
2182 fn = self.params.get('download_archive')
2183 if fn is None:
2184 return False
2185
2186 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2187 if not vid_id:
7012b23c 2188 return False # Incomplete video information
5db07df6 2189
a45e8619 2190 return vid_id in self.archive
c1c9a79c
PH
2191
2192 def record_download_archive(self, info_dict):
2193 fn = self.params.get('download_archive')
2194 if fn is None:
2195 return
5db07df6
PH
2196 vid_id = self._make_archive_id(info_dict)
2197 assert vid_id
c1c9a79c 2198 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2199 archive_file.write(vid_id + '\n')
a45e8619 2200 self.archive.add(vid_id)
dd82ffea 2201
8c51aa65 2202 @staticmethod
8abeeb94 2203 def format_resolution(format, default='unknown'):
fb04e403
PH
2204 if format.get('vcodec') == 'none':
2205 return 'audio only'
f49d89ee
PH
2206 if format.get('resolution') is not None:
2207 return format['resolution']
8c51aa65
JMF
2208 if format.get('height') is not None:
2209 if format.get('width') is not None:
6febd1c1 2210 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2211 else:
6febd1c1 2212 res = '%sp' % format['height']
f49d89ee 2213 elif format.get('width') is not None:
388ae76b 2214 res = '%dx?' % format['width']
8c51aa65 2215 else:
8abeeb94 2216 res = default
8c51aa65
JMF
2217 return res
2218
c57f7757
PH
2219 def _format_note(self, fdict):
2220 res = ''
2221 if fdict.get('ext') in ['f4f', 'f4m']:
2222 res += '(unsupported) '
32f90364
PH
2223 if fdict.get('language'):
2224 if res:
2225 res += ' '
9016d76f 2226 res += '[%s] ' % fdict['language']
c57f7757
PH
2227 if fdict.get('format_note') is not None:
2228 res += fdict['format_note'] + ' '
2229 if fdict.get('tbr') is not None:
2230 res += '%4dk ' % fdict['tbr']
2231 if fdict.get('container') is not None:
2232 if res:
2233 res += ', '
2234 res += '%s container' % fdict['container']
3089bc74
S
2235 if (fdict.get('vcodec') is not None
2236 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2237 if res:
2238 res += ', '
2239 res += fdict['vcodec']
91c7271a 2240 if fdict.get('vbr') is not None:
c57f7757
PH
2241 res += '@'
2242 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2243 res += 'video@'
2244 if fdict.get('vbr') is not None:
2245 res += '%4dk' % fdict['vbr']
fbb21cf5 2246 if fdict.get('fps') is not None:
5d583bdf
S
2247 if res:
2248 res += ', '
2249 res += '%sfps' % fdict['fps']
c57f7757
PH
2250 if fdict.get('acodec') is not None:
2251 if res:
2252 res += ', '
2253 if fdict['acodec'] == 'none':
2254 res += 'video only'
2255 else:
2256 res += '%-5s' % fdict['acodec']
2257 elif fdict.get('abr') is not None:
2258 if res:
2259 res += ', '
2260 res += 'audio'
2261 if fdict.get('abr') is not None:
2262 res += '@%3dk' % fdict['abr']
2263 if fdict.get('asr') is not None:
2264 res += ' (%5dHz)' % fdict['asr']
2265 if fdict.get('filesize') is not None:
2266 if res:
2267 res += ', '
2268 res += format_bytes(fdict['filesize'])
9732d77e
PH
2269 elif fdict.get('filesize_approx') is not None:
2270 if res:
2271 res += ', '
2272 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2273 return res
91c7271a 2274
c57f7757 2275 def list_formats(self, info_dict):
94badb25 2276 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
2277 table = [
2278 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2279 for f in formats
e65566a9 2280 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 2281 if len(formats) > 1:
b81a359e 2282 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 2283
b81a359e 2284 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 2285 self.to_screen(
b81a359e
PH
2286 '[info] Available formats for %s:\n%s' %
2287 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
2288
2289 def list_thumbnails(self, info_dict):
2290 thumbnails = info_dict.get('thumbnails')
2291 if not thumbnails:
b7b72db9 2292 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2293 return
cfb56d1a
PH
2294
2295 self.to_screen(
2296 '[info] Thumbnails for %s:' % info_dict['id'])
2297 self.to_screen(render_table(
2298 ['ID', 'width', 'height', 'URL'],
2299 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2300
360e1ca5 2301 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2302 if not subtitles:
360e1ca5 2303 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2304 return
a504ced0 2305 self.to_screen(
edab9dbf
JMF
2306 'Available %s for %s:' % (name, video_id))
2307 self.to_screen(render_table(
2308 ['Language', 'formats'],
2309 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2310 for lang, formats in subtitles.items()]))
a504ced0 2311
dca08720
PH
2312 def urlopen(self, req):
2313 """ Start an HTTP download """
82d8a8b6 2314 if isinstance(req, compat_basestring):
67dda517 2315 req = sanitized_Request(req)
19a41fc6 2316 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2317
2318 def print_debug_header(self):
2319 if not self.params.get('verbose'):
2320 return
62fec3b2 2321
4192b51c 2322 if type('') is not compat_str:
067aa17e 2323 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2324 self.report_warning(
2325 'Your Python is broken! Update to a newer and supported version')
2326
c6afed48
PH
2327 stdout_encoding = getattr(
2328 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2329 encoding_str = (
734f90bb
PH
2330 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2331 locale.getpreferredencoding(),
2332 sys.getfilesystemencoding(),
c6afed48 2333 stdout_encoding,
b0472057 2334 self.get_encoding()))
4192b51c 2335 write_string(encoding_str, encoding=None)
734f90bb 2336
cefecac1 2337 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
e0986e31
JMF
2338 if _LAZY_LOADER:
2339 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2340 try:
2341 sp = subprocess.Popen(
2342 ['git', 'rev-parse', '--short', 'HEAD'],
2343 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2344 cwd=os.path.dirname(os.path.abspath(__file__)))
2345 out, err = sp.communicate()
2346 out = out.decode().strip()
2347 if re.match('[0-9a-f]+', out):
734f90bb 2348 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2349 except Exception:
dca08720
PH
2350 try:
2351 sys.exc_clear()
70a1165b 2352 except Exception:
dca08720 2353 pass
b300cda4
S
2354
2355 def python_implementation():
2356 impl_name = platform.python_implementation()
2357 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2358 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2359 return impl_name
2360
2361 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2362 platform.python_version(), python_implementation(),
2363 platform_name()))
d28b5171 2364
73fac4e9 2365 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2366 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2367 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2368 exe_str = ', '.join(
2369 '%s %s' % (exe, v)
2370 for exe, v in sorted(exe_versions.items())
2371 if v
2372 )
2373 if not exe_str:
2374 exe_str = 'none'
2375 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2376
2377 proxy_map = {}
2378 for handler in self._opener.handlers:
2379 if hasattr(handler, 'proxies'):
2380 proxy_map.update(handler.proxies)
734f90bb 2381 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2382
58b1f00d
PH
2383 if self.params.get('call_home', False):
2384 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2385 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2386 latest_version = self.urlopen(
2387 'https://yt-dl.org/latest/version').read().decode('utf-8')
2388 if version_tuple(latest_version) > version_tuple(__version__):
2389 self.report_warning(
2390 'You are using an outdated version (newest version: %s)! '
2391 'See https://yt-dl.org/update if you need help updating.' %
2392 latest_version)
2393
e344693b 2394 def _setup_opener(self):
6ad14cab 2395 timeout_val = self.params.get('socket_timeout')
19a41fc6 2396 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2397
dca08720
PH
2398 opts_cookiefile = self.params.get('cookiefile')
2399 opts_proxy = self.params.get('proxy')
2400
2401 if opts_cookiefile is None:
2402 self.cookiejar = compat_cookiejar.CookieJar()
2403 else:
590bc6f6 2404 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2405 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2406 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2407 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2408
6a3f4c3f 2409 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2410 if opts_proxy is not None:
2411 if opts_proxy == '':
2412 proxies = {}
2413 else:
2414 proxies = {'http': opts_proxy, 'https': opts_proxy}
2415 else:
2416 proxies = compat_urllib_request.getproxies()
067aa17e 2417 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2418 if 'http' in proxies and 'https' not in proxies:
2419 proxies['https'] = proxies['http']
91410c9b 2420 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2421
2422 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2423 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2424 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2425 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2426 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2427
2428 # When passing our own FileHandler instance, build_opener won't add the
2429 # default FileHandler and allows us to disable the file protocol, which
2430 # can be used for malicious purposes (see
067aa17e 2431 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2432 file_handler = compat_urllib_request.FileHandler()
2433
2434 def file_open(*args, **kwargs):
cefecac1 2435 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2436 file_handler.file_open = file_open
2437
2438 opener = compat_urllib_request.build_opener(
fca6dba8 2439 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2440
dca08720
PH
2441 # Delete the default user-agent header, which would otherwise apply in
2442 # cases where our custom HTTP handler doesn't come into play
067aa17e 2443 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2444 opener.addheaders = []
2445 self._opener = opener
62fec3b2
PH
2446
2447 def encode(self, s):
2448 if isinstance(s, bytes):
2449 return s # Already encoded
2450
2451 try:
2452 return s.encode(self.get_encoding())
2453 except UnicodeEncodeError as err:
2454 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2455 raise
2456
2457 def get_encoding(self):
2458 encoding = self.params.get('encoding')
2459 if encoding is None:
2460 encoding = preferredencoding()
2461 return encoding
ec82d85a
PH
2462
2463 def _write_thumbnails(self, info_dict, filename):
2464 if self.params.get('writethumbnail', False):
2465 thumbnails = info_dict.get('thumbnails')
2466 if thumbnails:
2467 thumbnails = [thumbnails[-1]]
2468 elif self.params.get('write_all_thumbnails', False):
2469 thumbnails = info_dict.get('thumbnails')
2470 else:
2471 return
2472
2473 if not thumbnails:
2474 # No thumbnails present, so return immediately
2475 return
2476
2477 for t in thumbnails:
2478 thumb_ext = determine_ext(t['url'], 'jpg')
2479 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2480 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2481 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2482
2483 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2484 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2485 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2486 else:
2487 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2488 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2489 try:
2490 uf = self.urlopen(t['url'])
d3d89c32 2491 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2492 shutil.copyfileobj(uf, thumbf)
2493 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2494 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2495 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2496 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2497 (t['url'], error_to_compat_str(err)))