]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
Better Format Selection
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
ce02ed60 54 DownloadError,
c0384f22 55 encode_compat_str,
ce02ed60 56 encodeFilename,
9b9c5355 57 error_to_compat_str,
590bc6f6 58 expand_path,
ce02ed60 59 ExtractorError,
02dbf93f 60 format_bytes,
525ef922 61 formatSeconds,
773f291d 62 GeoRestrictedError,
c9969434 63 int_or_none,
773f291d 64 ISO3166Utils,
ce02ed60 65 locked_file,
dca08720 66 make_HTTPS_handler,
ce02ed60 67 MaxDownloadsReached,
cd6fc19e 68 orderedSet,
b7ab0590 69 PagedList,
083c9df9 70 parse_filesize,
91410c9b 71 PerRequestProxyHandler,
dca08720 72 platform_name,
eedb7ba5 73 PostProcessingError,
ce02ed60 74 preferredencoding,
eedb7ba5 75 prepend_extension,
51fb4995 76 register_socks_protocols,
cfb56d1a 77 render_table,
eedb7ba5 78 replace_extension,
ce02ed60
PH
79 SameFileError,
80 sanitize_filename,
1bb5c511 81 sanitize_path,
dcf77cf1 82 sanitize_url,
67dda517 83 sanitized_Request,
e5660ee6 84 std_headers,
1211bb6d 85 str_or_none,
ce02ed60 86 subtitles_filename,
ce02ed60 87 UnavailableVideoError,
29eb5174 88 url_basename,
58b1f00d 89 version_tuple,
ce02ed60
PH
90 write_json_file,
91 write_string,
1bab3437 92 YoutubeDLCookieJar,
6a3f4c3f 93 YoutubeDLCookieProcessor,
dca08720 94 YoutubeDLHandler,
fca6dba8 95 YoutubeDLRedirectHandler,
ce02ed60 96)
a0e07d31 97from .cache import Cache
e0986e31 98from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
4c54b89e 99from .extractor.openload import PhantomJSwrapper
3bc2ddcc 100from .downloader import get_suitable_downloader
4c83c967 101from .downloader.rtmp import rtmpdump_version
4f026faf 102from .postprocessor import (
f17f8651 103 FFmpegFixupM3u8PP,
62cd676c 104 FFmpegFixupM4aPP,
6271f1ca 105 FFmpegFixupStretchedPP,
4f026faf
PH
106 FFmpegMergerPP,
107 FFmpegPostProcessor,
57df9f53 108 FFmpegSubtitlesConvertorPP,
4f026faf
PH
109 get_postprocessor,
110)
dca08720 111from .version import __version__
8222d8de 112
e9c0cdd3
YCH
113if compat_os_name == 'nt':
114 import ctypes
115
2459b6e1 116
8222d8de
JMF
117class YoutubeDL(object):
118 """YoutubeDL class.
119
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
126
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
134
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
141
142 Available options:
143
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
180940e0 146 videopassword: Password for accessing a video.
1da50aa3
S
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
8222d8de
JMF
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
ad8915b7 153 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
525ef922 160 forceduration: Force printing duration.
8694c600 161 forcejson: Force printing info_dict as JSON.
63e0be34
PH
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
8222d8de 164 simulate: Do not download the video files.
eb8a4433 165 format: Video format code. see "FORMAT SELECTION" for more details.
166 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
167 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
909d24dd 168 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
169 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
8222d8de 170 outtmpl: Template for output names.
bdc3fd2f
U
171 restrictfilenames: Do not allow "&" and spaces in file names.
172 trim_file_name: Limit length of filename (extension excluded).
8222d8de 173 ignoreerrors: Do not stop on download errors.
d22dec74 174 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
175 nooverwrites: Prevent overwriting files.
176 playliststart: Playlist item to start at.
177 playlistend: Playlist item to end at.
c14e88f0 178 playlist_items: Specific indices of playlist to download.
ff815fe6 179 playlistreverse: Download playlist items in reverse order.
75822ca7 180 playlistrandom: Download playlist items in random order.
8222d8de
JMF
181 matchtitle: Download only matching titles.
182 rejecttitle: Reject downloads for matching titles.
8bf9319e 183 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
184 logtostderr: Log messages to stderr instead of stdout.
185 writedescription: Write the video description to a .description file
186 writeinfojson: Write the video description to a .info.json file
1fb07d10 187 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 188 writethumbnail: Write the thumbnail image to a file
ec82d85a 189 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 190 writesubtitles: Write the video subtitles to a file
741dd8ea 191 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 192 allsubtitles: Downloads all the subtitles of the video
0b7f3118 193 (requires writesubtitles or writeautomaticsub)
8222d8de 194 listsubtitles: Lists all available subtitles for the video
a504ced0 195 subtitlesformat: The format code for subtitles
aa6a10c4 196 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
197 keepvideo: Keep the video file after post-processing
198 daterange: A DateRange object, download only if the upload_date is in the range.
199 skip_download: Skip the actual download of the video file
c35f9e72 200 cachedir: Location of the cache files in the filesystem.
a0e07d31 201 False to disable filesystem cache.
47192f92 202 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
203 age_limit: An integer representing the user's age in years.
204 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
205 min_views: An integer representing the minimum view count the video
206 must have in order to not be skipped.
207 Videos without view count information are always
208 downloaded. None for no limit.
209 max_views: An integer representing the maximum view count.
210 Videos that are more popular than that are not
211 downloaded.
212 Videos without view count information are always
213 downloaded. None for no limit.
214 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
215 Videos already present in the file are not downloaded
216 again.
ea6e0c2b 217 break_on_existing: Stop the download process after attempting to download a file that's
218 in the archive.
dca08720 219 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 220 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
221 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
222 At the moment, this is only supported by YouTube.
a1ee09e8 223 proxy: URL of the proxy server to use
38cce791 224 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 225 on geo-restricted sites.
e344693b 226 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
227 bidi_workaround: Work around buggy terminals without bidirectional text
228 support, using fridibi
a0ddb8a2 229 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 230 include_ads: Download ads as well
04b4d394
PH
231 default_search: Prepend this string if an input url is not valid.
232 'auto' for elaborate guessing
62fec3b2 233 encoding: Use this encoding instead of the system-specified.
e8ee972c 234 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
235 Pass in 'in_playlist' to only show this behavior for
236 playlist items.
4f026faf 237 postprocessors: A list of dictionaries, each with an entry
71b640cc 238 * key: The name of the postprocessor. See
cefecac1 239 youtube_dlc/postprocessor/__init__.py for a list.
4f026faf
PH
240 as well as any further keyword arguments for the
241 postprocessor.
71b640cc
PH
242 progress_hooks: A list of functions that get called on download
243 progress, with a dictionary with the entries
5cda4eda 244 * status: One of "downloading", "error", or "finished".
ee69b99a 245 Check this first and ignore unknown values.
71b640cc 246
5cda4eda 247 If status is one of "downloading", or "finished", the
ee69b99a
PH
248 following properties may also be present:
249 * filename: The final filename (always present)
5cda4eda 250 * tmpfilename: The filename we're currently writing to
71b640cc
PH
251 * downloaded_bytes: Bytes on disk
252 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
253 * total_bytes_estimate: Guess of the eventual file size,
254 None if unavailable.
255 * elapsed: The number of seconds since download started.
71b640cc
PH
256 * eta: The estimated time in seconds, None if unknown
257 * speed: The download speed in bytes/second, None if
258 unknown
5cda4eda
PH
259 * fragment_index: The counter of the currently
260 downloaded video fragment.
261 * fragment_count: The number of fragments (= individual
262 files that will be merged)
71b640cc
PH
263
264 Progress hooks are guaranteed to be called at least once
265 (with status "finished") if the download is successful.
45598f15 266 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
267 fixup: Automatically correct known faults of the file.
268 One of:
269 - "never": do nothing
270 - "warn": only emit a warning
271 - "detect_or_warn": check whether we can do anything
62cd676c 272 about it, warn otherwise (default)
504f20dd 273 source_address: Client-side IP address to bind to.
6ec6cb4e 274 call_home: Boolean, true iff we are allowed to contact the
cefecac1 275 youtube-dlc servers for debugging.
7aa589a5
S
276 sleep_interval: Number of seconds to sleep before each download when
277 used alone or a lower bound of a range for randomized
278 sleep before each download (minimum possible number
279 of seconds to sleep) when used along with
280 max_sleep_interval.
281 max_sleep_interval:Upper bound of a range for randomized sleep before each
282 download (maximum possible number of seconds to sleep).
283 Must only be used along with sleep_interval.
284 Actual sleep time will be a random float from range
285 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
286 listformats: Print an overview of available video formats and exit.
287 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
288 match_filter: A function that gets called with the info_dict of
289 every video.
290 If it returns a message, the video is ignored.
291 If it returns None, the video is downloaded.
292 match_filter_func in utils.py is one example for this.
7e5db8c9 293 no_color: Do not emit color codes in output.
0a840f58 294 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 295 HTTP header
0a840f58 296 geo_bypass_country:
773f291d
S
297 Two-letter ISO 3166-2 country code that will be used for
298 explicit geographic restriction bypassing via faking
504f20dd 299 X-Forwarded-For HTTP header
5f95927a
S
300 geo_bypass_ip_block:
301 IP range in CIDR notation that will be used similarly to
504f20dd 302 geo_bypass_country
71b640cc 303
85729c51
PH
304 The following options determine which downloader is picked:
305 external_downloader: Executable of the external downloader to call.
306 None or unset for standard (built-in) downloader.
bf09af3a
S
307 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
308 if True, otherwise use ffmpeg/avconv if False, otherwise
309 use downloader suggested by extractor if None.
fe7e0c98 310
8222d8de 311 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 312 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 313 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 314 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
315 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
316 http_chunk_size.
76b1bd67
JMF
317
318 The following options are used by the post processors:
d4a24f40
S
319 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
320 otherwise prefer ffmpeg.
c0b7d117
S
321 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
322 to the binary or its containing directory.
f72b0a60
S
323 postprocessor_args: A list of additional command-line arguments for the
324 postprocessor.
3836b02c 325
3600fd59
S
326 The following options are used by the Youtube extractor:
327 youtube_include_dash_manifest: If True (default), DASH manifests and related
328 data will be downloaded and processed by extractor.
329 You can reduce network I/O by disabling it if you don't
330 care about DASH.
8222d8de
JMF
331 """
332
c9969434
S
333 _NUMERIC_FIELDS = set((
334 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
335 'timestamp', 'upload_year', 'upload_month', 'upload_day',
336 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
337 'average_rating', 'comment_count', 'age_limit',
338 'start_time', 'end_time',
339 'chapter_number', 'season_number', 'episode_number',
340 'track_number', 'disc_number', 'release_year',
341 'playlist_index',
342 ))
343
8222d8de
JMF
344 params = None
345 _ies = []
346 _pps = []
347 _download_retcode = None
348 _num_downloads = None
349 _screen_file = None
350
3511266b 351 def __init__(self, params=None, auto_init=True):
8222d8de 352 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
353 if params is None:
354 params = {}
8222d8de 355 self._ies = []
56c73665 356 self._ies_instances = {}
8222d8de 357 self._pps = []
933605d7 358 self._progress_hooks = []
8222d8de
JMF
359 self._download_retcode = 0
360 self._num_downloads = 0
361 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 362 self._err_file = sys.stderr
4abf617b
S
363 self.params = {
364 # Default parameters
365 'nocheckcertificate': False,
366 }
367 self.params.update(params)
a0e07d31 368 self.cache = Cache(self)
a45e8619 369 self.archive = set()
ecdec191
JB
370
371 """Preload the archive, if any is specified"""
372 def preload_download_archive(self):
373 fn = self.params.get('download_archive')
374 if fn is None:
375 return False
376 try:
377 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
378 for line in archive_file:
a45e8619 379 self.archive.add(line.strip())
ecdec191
JB
380 except IOError as ioe:
381 if ioe.errno != errno.ENOENT:
382 raise
1d74d8d9 383 return False
ecdec191 384 return True
34308b30 385
be5df5ee
S
386 def check_deprecated(param, option, suggestion):
387 if self.params.get(param) is not None:
388 self.report_warning(
389 '%s is deprecated. Use %s instead.' % (option, suggestion))
390 return True
391 return False
392
1de7ea76
JB
393 if self.params.get('verbose'):
394 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
395
ecdec191
JB
396 preload_download_archive(self)
397
be5df5ee 398 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
399 if self.params.get('geo_verification_proxy') is None:
400 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
401
be5df5ee
S
402 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
403 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
404 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
405
0783b09b 406 if params.get('bidi_workaround', False):
1c088fa8
PH
407 try:
408 import pty
409 master, slave = pty.openpty()
003c69a8 410 width = compat_get_terminal_size().columns
1c088fa8
PH
411 if width is None:
412 width_args = []
413 else:
414 width_args = ['-w', str(width)]
5d681e96 415 sp_kwargs = dict(
1c088fa8
PH
416 stdin=subprocess.PIPE,
417 stdout=slave,
418 stderr=self._err_file)
5d681e96
PH
419 try:
420 self._output_process = subprocess.Popen(
421 ['bidiv'] + width_args, **sp_kwargs
422 )
423 except OSError:
5d681e96
PH
424 self._output_process = subprocess.Popen(
425 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
426 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 427 except OSError as ose:
66e7ace1 428 if ose.errno == errno.ENOENT:
6febd1c1 429 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
430 else:
431 raise
0783b09b 432
3089bc74
S
433 if (sys.platform != 'win32'
434 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
435 and not params.get('restrictfilenames', False)):
e9137224 436 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 437 self.report_warning(
6febd1c1 438 'Assuming --restrict-filenames since file system encoding '
1b725173 439 'cannot encode all characters. '
6febd1c1 440 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 441 self.params['restrictfilenames'] = True
34308b30 442
486dd09e
PH
443 if isinstance(params.get('outtmpl'), bytes):
444 self.report_warning(
445 'Parameter outtmpl is bytes, but should be a unicode string. '
446 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
447
dca08720
PH
448 self._setup_opener()
449
3511266b
PH
450 if auto_init:
451 self.print_debug_header()
452 self.add_default_info_extractors()
453
4f026faf
PH
454 for pp_def_raw in self.params.get('postprocessors', []):
455 pp_class = get_postprocessor(pp_def_raw['key'])
456 pp_def = dict(pp_def_raw)
457 del pp_def['key']
458 pp = pp_class(self, **compat_kwargs(pp_def))
459 self.add_post_processor(pp)
460
71b640cc
PH
461 for ph in self.params.get('progress_hooks', []):
462 self.add_progress_hook(ph)
463
51fb4995
YCH
464 register_socks_protocols()
465
7d4111ed
PH
466 def warn_if_short_id(self, argv):
467 # short YouTube ID starting with dash?
468 idxs = [
469 i for i, a in enumerate(argv)
470 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
471 if idxs:
472 correct_argv = (
cefecac1 473 ['youtube-dlc']
3089bc74
S
474 + [a for i, a in enumerate(argv) if i not in idxs]
475 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
476 )
477 self.report_warning(
478 'Long argument string detected. '
479 'Use -- to separate parameters and URLs, like this:\n%s\n' %
480 args_to_str(correct_argv))
481
8222d8de
JMF
482 def add_info_extractor(self, ie):
483 """Add an InfoExtractor object to the end of the list."""
484 self._ies.append(ie)
e52d7f85
JMF
485 if not isinstance(ie, type):
486 self._ies_instances[ie.ie_key()] = ie
487 ie.set_downloader(self)
8222d8de 488
56c73665
JMF
489 def get_info_extractor(self, ie_key):
490 """
491 Get an instance of an IE with name ie_key, it will try to get one from
492 the _ies list, if there's no instance it will create a new one and add
493 it to the extractor list.
494 """
495 ie = self._ies_instances.get(ie_key)
496 if ie is None:
497 ie = get_info_extractor(ie_key)()
498 self.add_info_extractor(ie)
499 return ie
500
023fa8c4
JMF
501 def add_default_info_extractors(self):
502 """
503 Add the InfoExtractors returned by gen_extractors to the end of the list
504 """
e52d7f85 505 for ie in gen_extractor_classes():
023fa8c4
JMF
506 self.add_info_extractor(ie)
507
8222d8de
JMF
508 def add_post_processor(self, pp):
509 """Add a PostProcessor object to the end of the chain."""
510 self._pps.append(pp)
511 pp.set_downloader(self)
512
933605d7
JMF
513 def add_progress_hook(self, ph):
514 """Add the progress hook (currently only for the file downloader)"""
515 self._progress_hooks.append(ph)
8ab470f1 516
1c088fa8 517 def _bidi_workaround(self, message):
5d681e96 518 if not hasattr(self, '_output_channel'):
1c088fa8
PH
519 return message
520
5d681e96 521 assert hasattr(self, '_output_process')
11b85ce6 522 assert isinstance(message, compat_str)
6febd1c1
PH
523 line_count = message.count('\n') + 1
524 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 525 self._output_process.stdin.flush()
6febd1c1 526 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 527 for _ in range(line_count))
6febd1c1 528 return res[:-len('\n')]
1c088fa8 529
8222d8de 530 def to_screen(self, message, skip_eol=False):
0783b09b
PH
531 """Print message to stdout if not in quiet mode."""
532 return self.to_stdout(message, skip_eol, check_quiet=True)
533
734f90bb 534 def _write_string(self, s, out=None):
b58ddb32 535 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 536
0783b09b 537 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 538 """Print message to stdout if not in quiet mode."""
8bf9319e 539 if self.params.get('logger'):
43afe285 540 self.params['logger'].debug(message)
0783b09b 541 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 542 message = self._bidi_workaround(message)
6febd1c1 543 terminator = ['\n', ''][skip_eol]
8222d8de 544 output = message + terminator
1c088fa8 545
734f90bb 546 self._write_string(output, self._screen_file)
8222d8de
JMF
547
548 def to_stderr(self, message):
549 """Print message to stderr."""
11b85ce6 550 assert isinstance(message, compat_str)
8bf9319e 551 if self.params.get('logger'):
43afe285
IB
552 self.params['logger'].error(message)
553 else:
1c088fa8 554 message = self._bidi_workaround(message)
6febd1c1 555 output = message + '\n'
734f90bb 556 self._write_string(output, self._err_file)
8222d8de 557
1e5b9a95
PH
558 def to_console_title(self, message):
559 if not self.params.get('consoletitle', False):
560 return
4bede0d8
C
561 if compat_os_name == 'nt':
562 if ctypes.windll.kernel32.GetConsoleWindow():
563 # c_wchar_p() might not be necessary if `message` is
564 # already of type unicode()
565 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 566 elif 'TERM' in os.environ:
734f90bb 567 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 568
bdde425c
PH
569 def save_console_title(self):
570 if not self.params.get('consoletitle', False):
571 return
94c3442e
S
572 if self.params.get('simulate', False):
573 return
4bede0d8 574 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 575 # Save the title on stack
734f90bb 576 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
577
578 def restore_console_title(self):
579 if not self.params.get('consoletitle', False):
580 return
94c3442e
S
581 if self.params.get('simulate', False):
582 return
4bede0d8 583 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 584 # Restore the title from stack
734f90bb 585 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
586
587 def __enter__(self):
588 self.save_console_title()
589 return self
590
591 def __exit__(self, *args):
592 self.restore_console_title()
f89197d7 593
dca08720 594 if self.params.get('cookiefile') is not None:
1bab3437 595 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 596
8222d8de
JMF
597 def trouble(self, message=None, tb=None):
598 """Determine action to take when a download problem appears.
599
600 Depending on if the downloader has been configured to ignore
601 download errors or not, this method may throw an exception or
602 not when errors are found, after printing the message.
603
604 tb, if given, is additional traceback information.
605 """
606 if message is not None:
607 self.to_stderr(message)
608 if self.params.get('verbose'):
609 if tb is None:
610 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 611 tb = ''
8222d8de 612 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 613 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 614 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
615 else:
616 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 617 tb = ''.join(tb_data)
8222d8de
JMF
618 self.to_stderr(tb)
619 if not self.params.get('ignoreerrors', False):
620 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
621 exc_info = sys.exc_info()[1].exc_info
622 else:
623 exc_info = sys.exc_info()
624 raise DownloadError(message, exc_info)
625 self._download_retcode = 1
626
627 def report_warning(self, message):
628 '''
629 Print the message to stderr, it will be prefixed with 'WARNING:'
630 If stderr is a tty file the 'WARNING:' will be colored
631 '''
6d07ce01
JMF
632 if self.params.get('logger') is not None:
633 self.params['logger'].warning(message)
8222d8de 634 else:
ad8915b7
PH
635 if self.params.get('no_warnings'):
636 return
e9c0cdd3 637 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
638 _msg_header = '\033[0;33mWARNING:\033[0m'
639 else:
640 _msg_header = 'WARNING:'
641 warning_message = '%s %s' % (_msg_header, message)
642 self.to_stderr(warning_message)
8222d8de
JMF
643
644 def report_error(self, message, tb=None):
645 '''
646 Do the same as trouble, but prefixes the message with 'ERROR:', colored
647 in red if stderr is a tty file.
648 '''
e9c0cdd3 649 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 650 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 651 else:
6febd1c1
PH
652 _msg_header = 'ERROR:'
653 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
654 self.trouble(error_message, tb)
655
8222d8de
JMF
656 def report_file_already_downloaded(self, file_name):
657 """Report file has already been fully downloaded."""
658 try:
6febd1c1 659 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 660 except UnicodeEncodeError:
6febd1c1 661 self.to_screen('[download] The file has already been downloaded')
8222d8de 662
8222d8de
JMF
663 def prepare_filename(self, info_dict):
664 """Generate the output filename."""
665 try:
666 template_dict = dict(info_dict)
667
668 template_dict['epoch'] = int(time.time())
669 autonumber_size = self.params.get('autonumber_size')
670 if autonumber_size is None:
671 autonumber_size = 5
89db639d 672 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
673 if template_dict.get('resolution') is None:
674 if template_dict.get('width') and template_dict.get('height'):
675 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
676 elif template_dict.get('height'):
805ef3c6 677 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 678 elif template_dict.get('width'):
51ce9117 679 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 680
586a91b6 681 sanitize = lambda k, v: sanitize_filename(
45598aab 682 compat_str(v),
1bb5c511 683 restricted=self.params.get('restrictfilenames'),
40df485f 684 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 685 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 686 for k, v in template_dict.items()
f0e14fdd 687 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 688 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 689
b3613d36 690 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 691
89db639d
S
692 # For fields playlist_index and autonumber convert all occurrences
693 # of %(field)s to %(field)0Nd for backward compatibility
694 field_size_compat_map = {
695 'playlist_index': len(str(template_dict['n_entries'])),
696 'autonumber': autonumber_size,
697 }
698 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
699 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
700 if mobj:
701 outtmpl = re.sub(
702 FIELD_SIZE_COMPAT_RE,
703 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
704 outtmpl)
705
d0d9ade4
S
706 # Missing numeric fields used together with integer presentation types
707 # in format specification will break the argument substitution since
708 # string 'NA' is returned for missing fields. We will patch output
709 # template for missing fields to meet string presentation type.
c9969434 710 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
711 if numeric_field not in template_dict:
712 # As of [1] format syntax is:
713 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
714 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
715 FORMAT_RE = r'''(?x)
716 (?<!%)
717 %
718 \({0}\) # mapping key
719 (?:[#0\-+ ]+)? # conversion flags (optional)
720 (?:\d+)? # minimum field width (optional)
721 (?:\.\d+)? # precision (optional)
722 [hlL]? # length modifier (optional)
723 [diouxXeEfFgGcrs%] # conversion type
724 '''
725 outtmpl = re.sub(
726 FORMAT_RE.format(numeric_field),
727 r'%({0})s'.format(numeric_field), outtmpl)
728
15da37c7
S
729 # expand_path translates '%%' into '%' and '$$' into '$'
730 # correspondingly that is not what we want since we need to keep
731 # '%%' intact for template dict substitution step. Working around
732 # with boundary-alike separator hack.
961ea474 733 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
734 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
735
736 # outtmpl should be expand_path'ed before template dict substitution
737 # because meta fields may contain env variables we don't want to
738 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
739 # title "Hello $PATH", we don't want `$PATH` to be expanded.
740 filename = expand_path(outtmpl).replace(sep, '') % template_dict
741
bdc3fd2f
U
742 # https://github.com/blackjack4494/youtube-dlc/issues/85
743 trim_file_name = self.params.get('trim_file_name', False)
744 if trim_file_name:
745 fn_groups = filename.rsplit('.')
746 ext = fn_groups[-1]
747 sub_ext = ''
748 if len(fn_groups) > 2:
749 sub_ext = fn_groups[-2]
750 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
751
3a0d2f52
S
752 # Temporary fix for #4787
753 # 'Treat' all problem characters by passing filename through preferredencoding
754 # to workaround encoding issues with subprocess on python2 @ Windows
755 if sys.version_info < (3, 0) and sys.platform == 'win32':
756 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 757 return sanitize_path(filename)
8222d8de 758 except ValueError as err:
6febd1c1 759 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
760 return None
761
442c37b7 762 def _match_entry(self, info_dict, incomplete):
ecdec191 763 """ Returns None if the file should be downloaded """
8222d8de 764
6febd1c1 765 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
766 if 'title' in info_dict:
767 # This can happen when we're just evaluating the playlist
768 title = info_dict['title']
769 matchtitle = self.params.get('matchtitle', False)
770 if matchtitle:
771 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 772 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
773 rejecttitle = self.params.get('rejecttitle', False)
774 if rejecttitle:
775 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 776 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 777 date = info_dict.get('upload_date')
8222d8de
JMF
778 if date is not None:
779 dateRange = self.params.get('daterange', DateRange())
780 if date not in dateRange:
6febd1c1 781 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 782 view_count = info_dict.get('view_count')
5fe18bdb
PH
783 if view_count is not None:
784 min_views = self.params.get('min_views')
785 if min_views is not None and view_count < min_views:
6febd1c1 786 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
787 max_views = self.params.get('max_views')
788 if max_views is not None and view_count > max_views:
6febd1c1 789 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 790 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 791 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 792 if self.in_download_archive(info_dict):
6febd1c1 793 return '%s has already been recorded in archive' % video_title
347de493 794
442c37b7
PH
795 if not incomplete:
796 match_filter = self.params.get('match_filter')
797 if match_filter is not None:
798 ret = match_filter(info_dict)
799 if ret is not None:
800 return ret
347de493 801
8222d8de 802 return None
fe7e0c98 803
b6c45014
JMF
804 @staticmethod
805 def add_extra_info(info_dict, extra_info):
806 '''Set the keys from extra_info in info dict if they are missing'''
807 for key, value in extra_info.items():
808 info_dict.setdefault(key, value)
809
0704d222 810 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 811 process=True, force_generic_extractor=False):
8222d8de
JMF
812 '''
813 Returns a list with a dictionary for each video we find.
814 If 'download', also downloads the videos.
815 extra_info is a dict containing the extra values to add to each result
613b2d9d 816 '''
fe7e0c98 817
61aa5ba3 818 if not ie_key and force_generic_extractor:
d22dec74
S
819 ie_key = 'Generic'
820
8222d8de 821 if ie_key:
56c73665 822 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
823 else:
824 ies = self._ies
825
826 for ie in ies:
827 if not ie.suitable(url):
828 continue
829
9a68de12 830 ie_key = ie.ie_key()
831 ie = self.get_info_extractor(ie_key)
8222d8de 832 if not ie.working():
6febd1c1
PH
833 self.report_warning('The program functionality for this site has been marked as broken, '
834 'and will probably not work.')
8222d8de
JMF
835
836 try:
a0566bbf 837 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
838 except (AssertionError, IndexError, AttributeError):
839 temp_id = None
840 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
841 self.to_screen("[%s] %s: has already been recorded in archive" % (
842 ie_key, temp_id))
843 break
844
845 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
846
847 else:
848 self.report_error('no suitable InfoExtractor for URL %s' % url)
849
850 def __handle_extraction_exceptions(func):
851 def wrapper(self, *args, **kwargs):
852 try:
853 return func(self, *args, **kwargs)
773f291d
S
854 except GeoRestrictedError as e:
855 msg = e.msg
856 if e.countries:
857 msg += '\nThis video is available in %s.' % ', '.join(
858 map(ISO3166Utils.short2full, e.countries))
859 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
860 self.report_error(msg)
fb043a6e 861 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 862 self.report_error(compat_str(e), e.format_traceback())
d3e5bbf4
PH
863 except MaxDownloadsReached:
864 raise
8222d8de
JMF
865 except Exception as e:
866 if self.params.get('ignoreerrors', False):
9b9c5355 867 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
868 else:
869 raise
a0566bbf 870 return wrapper
871
872 @__handle_extraction_exceptions
873 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
874 ie_result = ie.extract(url)
875 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
876 return
877 if isinstance(ie_result, list):
878 # Backwards compatibility: old IE result format
879 ie_result = {
880 '_type': 'compat_list',
881 'entries': ie_result,
882 }
883 if info_dict:
884 if info_dict.get('id'):
885 ie_result['id'] = info_dict['id']
886 if info_dict.get('title'):
887 ie_result['title'] = info_dict['title']
888 self.add_default_extra_info(ie_result, ie, url)
889 if process:
890 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 891 else:
a0566bbf 892 return ie_result
fe7e0c98 893
ea38e55f
PH
894 def add_default_extra_info(self, ie_result, ie, url):
895 self.add_extra_info(ie_result, {
896 'extractor': ie.IE_NAME,
897 'webpage_url': url,
898 'webpage_url_basename': url_basename(url),
899 'extractor_key': ie.ie_key(),
900 })
901
8222d8de
JMF
902 def process_ie_result(self, ie_result, download=True, extra_info={}):
903 """
904 Take the result of the ie(may be modified) and resolve all unresolved
905 references (URLs, playlist items).
906
907 It will also download the videos if 'download'.
908 Returns the resolved ie_result.
909 """
e8ee972c
PH
910 result_type = ie_result.get('_type', 'video')
911
057a5206 912 if result_type in ('url', 'url_transparent'):
134c6ea8 913 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 914 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
915 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
916 or extract_flat is True):
d06daf23
S
917 self.__forced_printings(
918 ie_result, self.prepare_filename(ie_result),
919 incomplete=True)
e8ee972c
PH
920 return ie_result
921
8222d8de 922 if result_type == 'video':
b6c45014 923 self.add_extra_info(ie_result, extra_info)
feee2ecf 924 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
925 elif result_type == 'url':
926 # We have to add extra_info to the results because it may be
927 # contained in a playlist
928 return self.extract_info(ie_result['url'],
0704d222 929 download, info_dict=ie_result,
8222d8de
JMF
930 ie_key=ie_result.get('ie_key'),
931 extra_info=extra_info)
7fc3fa05
PH
932 elif result_type == 'url_transparent':
933 # Use the information from the embedding page
934 info = self.extract_info(
935 ie_result['url'], ie_key=ie_result.get('ie_key'),
936 extra_info=extra_info, download=False, process=False)
937
1640eb09
S
938 # extract_info may return None when ignoreerrors is enabled and
939 # extraction failed with an error, don't crash and return early
940 # in this case
941 if not info:
942 return info
943
412c617d
PH
944 force_properties = dict(
945 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 946 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
947 if f in force_properties:
948 del force_properties[f]
949 new_result = info.copy()
950 new_result.update(force_properties)
7fc3fa05 951
0563f7ac
S
952 # Extracted info may not be a video result (i.e.
953 # info.get('_type', 'video') != video) but rather an url or
954 # url_transparent. In such cases outer metadata (from ie_result)
955 # should be propagated to inner one (info). For this to happen
956 # _type of info should be overridden with url_transparent. This
067aa17e 957 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
958 if new_result.get('_type') == 'url':
959 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
960
961 return self.process_ie_result(
962 new_result, download=download, extra_info=extra_info)
40fcba5e 963 elif result_type in ('playlist', 'multi_video'):
8222d8de 964 # We process each entry in the playlist
d800609c 965 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 966 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
967
968 playlist_results = []
969
8222d8de 970 playliststart = self.params.get('playliststart', 1) - 1
d800609c 971 playlistend = self.params.get('playlistend')
a19fd00c 972 # For backwards compatibility, interpret -1 as whole list
8222d8de 973 if playlistend == -1:
a19fd00c 974 playlistend = None
8222d8de 975
d800609c 976 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
977 playlistitems = None
978 if playlistitems_str is not None:
979 def iter_playlistitems(format):
980 for string_segment in format.split(','):
981 if '-' in string_segment:
982 start, end = string_segment.split('-')
983 for item in range(int(start), int(end) + 1):
984 yield int(item)
985 else:
986 yield int(string_segment)
cd6fc19e 987 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
c14e88f0 988
b82f815f 989 ie_entries = ie_result['entries']
7e85e872
S
990
991 def make_playlistitems_entries(list_ie_entries):
992 num_entries = len(list_ie_entries)
993 return [
994 list_ie_entries[i - 1] for i in playlistitems
995 if -num_entries <= i - 1 < num_entries]
996
997 def report_download(num_entries):
998 self.to_screen(
999 '[%s] playlist %s: Downloading %d videos' %
1000 (ie_result['extractor'], playlist, num_entries))
1001
b82f815f
PH
1002 if isinstance(ie_entries, list):
1003 n_all_entries = len(ie_entries)
c14e88f0 1004 if playlistitems:
7e85e872 1005 entries = make_playlistitems_entries(ie_entries)
c14e88f0
PH
1006 else:
1007 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
1008 n_entries = len(entries)
1009 self.to_screen(
611c1dd9 1010 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 1011 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 1012 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
1013 if playlistitems:
1014 entries = []
1015 for item in playlistitems:
1016 entries.extend(ie_entries.getslice(
1017 item - 1, item
1018 ))
1019 else:
1020 entries = ie_entries.getslice(
1021 playliststart, playlistend)
b7ab0590 1022 n_entries = len(entries)
7e85e872 1023 report_download(n_entries)
b82f815f 1024 else: # iterable
c14e88f0 1025 if playlistitems:
5871ebac
S
1026 entries = make_playlistitems_entries(list(itertools.islice(
1027 ie_entries, 0, max(playlistitems))))
c14e88f0
PH
1028 else:
1029 entries = list(itertools.islice(
1030 ie_entries, playliststart, playlistend))
b82f815f 1031 n_entries = len(entries)
7e85e872 1032 report_download(n_entries)
8222d8de 1033
ff815fe6
MS
1034 if self.params.get('playlistreverse', False):
1035 entries = entries[::-1]
1036
75822ca7
TC
1037 if self.params.get('playlistrandom', False):
1038 random.shuffle(entries)
1039
0016b84e
S
1040 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1041
fe7e0c98 1042 for i, entry in enumerate(entries, 1):
734ea11e 1043 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
0016b84e
S
1044 # This __x_forwarded_for_ip thing is a bit ugly but requires
1045 # minimal changes
1046 if x_forwarded_for:
1047 entry['__x_forwarded_for_ip'] = x_forwarded_for
8222d8de 1048 extra = {
c6b4132a 1049 'n_entries': n_entries,
fe7e0c98 1050 'playlist': playlist,
a1cf99d0
PH
1051 'playlist_id': ie_result.get('id'),
1052 'playlist_title': ie_result.get('title'),
3961c6cb
S
1053 'playlist_uploader': ie_result.get('uploader'),
1054 'playlist_uploader_id': ie_result.get('uploader_id'),
de1121d7 1055 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
b6c45014 1056 'extractor': ie_result['extractor'],
9103bbc5 1057 'webpage_url': ie_result['webpage_url'],
29eb5174 1058 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1059 'extractor_key': ie_result['extractor_key'],
fe7e0c98 1060 }
7012b23c 1061
442c37b7 1062 reason = self._match_entry(entry, incomplete=True)
7012b23c 1063 if reason is not None:
ea6e0c2b 1064 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1065 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1066 break
1067 else:
1068 self.to_screen('[download] ' + reason)
1069 continue
7012b23c 1070
a0566bbf 1071 entry_result = self.__process_iterable_entry(entry, download, extra)
1072 # TODO: skip failed (empty) entries?
8222d8de
JMF
1073 playlist_results.append(entry_result)
1074 ie_result['entries'] = playlist_results
371c3b79 1075 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
1076 return ie_result
1077 elif result_type == 'compat_list':
c9bf4114
PH
1078 self.report_warning(
1079 'Extractor %s returned a compat_list result. '
1080 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1081
8222d8de 1082 def _fixup(r):
9e1a5b84
JW
1083 self.add_extra_info(
1084 r,
9103bbc5
JMF
1085 {
1086 'extractor': ie_result['extractor'],
1087 'webpage_url': ie_result['webpage_url'],
29eb5174 1088 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1089 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1090 }
1091 )
8222d8de
JMF
1092 return r
1093 ie_result['entries'] = [
b6c45014 1094 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1095 for r in ie_result['entries']
1096 ]
1097 return ie_result
1098 else:
1099 raise Exception('Invalid result type: %s' % result_type)
1100
a0566bbf 1101 @__handle_extraction_exceptions
1102 def __process_iterable_entry(self, entry, download, extra_info):
1103 return self.process_ie_result(
1104 entry, download=download, extra_info=extra_info)
1105
67134eab
JMF
1106 def _build_format_filter(self, filter_spec):
1107 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1108
1109 OPERATORS = {
1110 '<': operator.lt,
1111 '<=': operator.le,
1112 '>': operator.gt,
1113 '>=': operator.ge,
1114 '=': operator.eq,
1115 '!=': operator.ne,
1116 }
67134eab 1117 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1118 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1119 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1120 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1121 $
083c9df9 1122 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1123 m = operator_rex.search(filter_spec)
9ddb6925
S
1124 if m:
1125 try:
1126 comparison_value = int(m.group('value'))
1127 except ValueError:
1128 comparison_value = parse_filesize(m.group('value'))
1129 if comparison_value is None:
1130 comparison_value = parse_filesize(m.group('value') + 'B')
1131 if comparison_value is None:
1132 raise ValueError(
1133 'Invalid value %r in format specification %r' % (
67134eab 1134 m.group('value'), filter_spec))
9ddb6925
S
1135 op = OPERATORS[m.group('op')]
1136
083c9df9 1137 if not m:
9ddb6925
S
1138 STR_OPERATORS = {
1139 '=': operator.eq,
10d33b34
YCH
1140 '^=': lambda attr, value: attr.startswith(value),
1141 '$=': lambda attr, value: attr.endswith(value),
1142 '*=': lambda attr, value: value in attr,
9ddb6925 1143 }
67134eab 1144 str_operator_rex = re.compile(r'''(?x)
d5aacf9a 1145 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
2cc779f4 1146 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1147 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1148 \s*$
9ddb6925 1149 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1150 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1151 if m:
1152 comparison_value = m.group('value')
2cc779f4
S
1153 str_op = STR_OPERATORS[m.group('op')]
1154 if m.group('negation'):
e118a879 1155 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1156 else:
1157 op = str_op
083c9df9 1158
9ddb6925 1159 if not m:
67134eab 1160 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1161
1162 def _filter(f):
1163 actual_value = f.get(m.group('key'))
1164 if actual_value is None:
1165 return m.group('none_inclusive')
1166 return op(actual_value, comparison_value)
67134eab
JMF
1167 return _filter
1168
0017d9ad 1169 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1170
af0f7428
S
1171 def can_merge():
1172 merger = FFmpegMergerPP(self)
1173 return merger.available and merger.can_merge()
1174
1175 def prefer_best():
0017d9ad 1176 if self.params.get('simulate', False):
af0f7428 1177 return False
0017d9ad 1178 if not download:
0017d9ad 1179 return False
af0f7428
S
1180 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1181 return True
0017d9ad 1182 if info_dict.get('is_live'):
af0f7428
S
1183 return True
1184 if not can_merge():
1185 return True
1186 return False
1187
1188 req_format_list = ['bestvideo+bestaudio', 'best']
1189 if prefer_best():
1190 req_format_list.reverse()
0017d9ad
S
1191 return '/'.join(req_format_list)
1192
67134eab
JMF
1193 def build_format_selector(self, format_spec):
1194 def syntax_error(note, start):
1195 message = (
1196 'Invalid format specification: '
1197 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1198 return SyntaxError(message)
1199
1200 PICKFIRST = 'PICKFIRST'
1201 MERGE = 'MERGE'
1202 SINGLE = 'SINGLE'
0130afb7 1203 GROUP = 'GROUP'
67134eab
JMF
1204 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1205
909d24dd 1206 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
1207 'video': self.params.get('allow_multiple_video_streams', True)}
1208
67134eab
JMF
1209 def _parse_filter(tokens):
1210 filter_parts = []
1211 for type, string, start, _, _ in tokens:
1212 if type == tokenize.OP and string == ']':
1213 return ''.join(filter_parts)
1214 else:
1215 filter_parts.append(string)
1216
232541df 1217 def _remove_unused_ops(tokens):
17cc1534 1218 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1219 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1220 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1221 last_string, last_start, last_end, last_line = None, None, None, None
1222 for type, string, start, end, line in tokens:
1223 if type == tokenize.OP and string == '[':
1224 if last_string:
1225 yield tokenize.NAME, last_string, last_start, last_end, last_line
1226 last_string = None
1227 yield type, string, start, end, line
1228 # everything inside brackets will be handled by _parse_filter
1229 for type, string, start, end, line in tokens:
1230 yield type, string, start, end, line
1231 if type == tokenize.OP and string == ']':
1232 break
1233 elif type == tokenize.OP and string in ALLOWED_OPS:
1234 if last_string:
1235 yield tokenize.NAME, last_string, last_start, last_end, last_line
1236 last_string = None
1237 yield type, string, start, end, line
1238 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1239 if not last_string:
1240 last_string = string
1241 last_start = start
1242 last_end = end
1243 else:
1244 last_string += string
1245 if last_string:
1246 yield tokenize.NAME, last_string, last_start, last_end, last_line
1247
cf2ac6df 1248 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1249 selectors = []
1250 current_selector = None
1251 for type, string, start, _, _ in tokens:
1252 # ENCODING is only defined in python 3.x
1253 if type == getattr(tokenize, 'ENCODING', None):
1254 continue
1255 elif type in [tokenize.NAME, tokenize.NUMBER]:
1256 current_selector = FormatSelector(SINGLE, string, [])
1257 elif type == tokenize.OP:
cf2ac6df
JMF
1258 if string == ')':
1259 if not inside_group:
1260 # ')' will be handled by the parentheses group
1261 tokens.restore_last_token()
67134eab 1262 break
cf2ac6df 1263 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1264 tokens.restore_last_token()
1265 break
cf2ac6df
JMF
1266 elif inside_choice and string == ',':
1267 tokens.restore_last_token()
1268 break
1269 elif string == ',':
0a31a350
JMF
1270 if not current_selector:
1271 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1272 selectors.append(current_selector)
1273 current_selector = None
1274 elif string == '/':
d96d604e
JMF
1275 if not current_selector:
1276 raise syntax_error('"/" must follow a format selector', start)
67134eab 1277 first_choice = current_selector
cf2ac6df 1278 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1279 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1280 elif string == '[':
1281 if not current_selector:
1282 current_selector = FormatSelector(SINGLE, 'best', [])
1283 format_filter = _parse_filter(tokens)
1284 current_selector.filters.append(format_filter)
0130afb7
JMF
1285 elif string == '(':
1286 if current_selector:
1287 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1288 group = _parse_format_selection(tokens, inside_group=True)
1289 current_selector = FormatSelector(GROUP, group, [])
67134eab 1290 elif string == '+':
d03cfdce 1291 if not current_selector:
1292 raise syntax_error('Unexpected "+"', start)
1293 selector_1 = current_selector
1294 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1295 if not selector_2:
1296 raise syntax_error('Expected a selector', start)
1297 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1298 else:
1299 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1300 elif type == tokenize.ENDMARKER:
1301 break
1302 if current_selector:
1303 selectors.append(current_selector)
1304 return selectors
1305
1306 def _build_selector_function(selector):
909d24dd 1307 if isinstance(selector, list): # ,
67134eab
JMF
1308 fs = [_build_selector_function(s) for s in selector]
1309
317f7ab6 1310 def selector_function(ctx):
67134eab 1311 for f in fs:
317f7ab6 1312 for format in f(ctx):
67134eab
JMF
1313 yield format
1314 return selector_function
909d24dd 1315
1316 elif selector.type == GROUP: # ()
0130afb7 1317 selector_function = _build_selector_function(selector.selector)
909d24dd 1318
1319 elif selector.type == PICKFIRST: # /
67134eab
JMF
1320 fs = [_build_selector_function(s) for s in selector.selector]
1321
317f7ab6 1322 def selector_function(ctx):
67134eab 1323 for f in fs:
317f7ab6 1324 picked_formats = list(f(ctx))
67134eab
JMF
1325 if picked_formats:
1326 return picked_formats
1327 return []
67134eab 1328
909d24dd 1329 elif selector.type == SINGLE: # atom
1330 format_spec = selector.selector if selector.selector is not None else 'best'
1331
1332 if format_spec == 'all':
1333 def selector_function(ctx):
1334 formats = list(ctx['formats'])
1335 if formats:
1336 for f in formats:
1337 yield f
1338
1339 else:
1340 format_fallback = False
1341 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1342 if format_spec_obj is not None:
1343 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1344 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1345 not_format_type = 'v' if format_type == 'a' else 'a'
1346 format_modified = format_spec_obj.group(3) is not None
1347
1348 format_fallback = not format_type and not format_modified # for b, w
1349 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1350 if format_type and format_modified # bv*, ba*, wv*, wa*
1351 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1352 if format_type # bv, ba, wv, wa
1353 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1354 if not format_modified # b, w
1355 else None) # b*, w*
67134eab 1356 else:
909d24dd 1357 format_idx = -1
1358 filter_f = ((lambda f: f.get('ext') == format_spec)
1359 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1360 else (lambda f: f.get('format_id') == format_spec)) # id
1361
1362 def selector_function(ctx):
1363 formats = list(ctx['formats'])
1364 if not formats:
1365 return
1366 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1367 if matches:
909d24dd 1368 yield matches[format_idx]
1369 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1370 # for extractors with incomplete formats (audio only (soundcloud)
1371 # or video only (imgur)) best/worst will fallback to
1372 # best/worst {video,audio}-only format
1373 yield formats[format_idx]
1374
1375 elif selector.type == MERGE: # +
d03cfdce 1376 def _merge(formats_pair):
1377 format_1, format_2 = formats_pair
1378
1379 formats_info = []
1380 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1381 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1382
909d24dd 1383 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1384 get_no_more = {"video": False, "audio": False}
1385 for (i, fmt_info) in enumerate(formats_info):
1386 for aud_vid in ["audio", "video"]:
1387 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1388 if get_no_more[aud_vid]:
1389 formats_info.pop(i)
1390 get_no_more[aud_vid] = True
1391
1392 if len(formats_info) == 1:
1393 return formats_info[0]
1394
d03cfdce 1395 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1396 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1397
1398 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1399 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1400
1401 output_ext = self.params.get('merge_output_format')
1402 if not output_ext:
1403 if the_only_video:
1404 output_ext = the_only_video['ext']
1405 elif the_only_audio and not video_fmts:
1406 output_ext = the_only_audio['ext']
1407 else:
1408 output_ext = 'mkv'
1409
1410 new_dict = {
67134eab 1411 'requested_formats': formats_info,
d03cfdce 1412 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1413 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1414 'ext': output_ext,
1415 }
d03cfdce 1416
1417 if the_only_video:
1418 new_dict.update({
1419 'width': the_only_video.get('width'),
1420 'height': the_only_video.get('height'),
1421 'resolution': the_only_video.get('resolution'),
1422 'fps': the_only_video.get('fps'),
1423 'vcodec': the_only_video.get('vcodec'),
1424 'vbr': the_only_video.get('vbr'),
1425 'stretched_ratio': the_only_video.get('stretched_ratio'),
1426 })
1427
1428 if the_only_audio:
1429 new_dict.update({
1430 'acodec': the_only_audio.get('acodec'),
1431 'abr': the_only_audio.get('abr'),
1432 })
1433
1434 return new_dict
1435
1436 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1437
317f7ab6
S
1438 def selector_function(ctx):
1439 for pair in itertools.product(
d03cfdce 1440 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1441 yield _merge(pair)
083c9df9 1442
67134eab 1443 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1444
317f7ab6
S
1445 def final_selector(ctx):
1446 ctx_copy = copy.deepcopy(ctx)
67134eab 1447 for _filter in filters:
317f7ab6
S
1448 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1449 return selector_function(ctx_copy)
67134eab 1450 return final_selector
083c9df9 1451
67134eab 1452 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1453 try:
232541df 1454 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1455 except tokenize.TokenError:
1456 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1457
1458 class TokenIterator(object):
1459 def __init__(self, tokens):
1460 self.tokens = tokens
1461 self.counter = 0
1462
1463 def __iter__(self):
1464 return self
1465
1466 def __next__(self):
1467 if self.counter >= len(self.tokens):
1468 raise StopIteration()
1469 value = self.tokens[self.counter]
1470 self.counter += 1
1471 return value
1472
1473 next = __next__
1474
1475 def restore_last_token(self):
1476 self.counter -= 1
1477
1478 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1479 return _build_selector_function(parsed_selector)
a9c58ad9 1480
e5660ee6
JMF
1481 def _calc_headers(self, info_dict):
1482 res = std_headers.copy()
1483
1484 add_headers = info_dict.get('http_headers')
1485 if add_headers:
1486 res.update(add_headers)
1487
1488 cookies = self._calc_cookies(info_dict)
1489 if cookies:
1490 res['Cookie'] = cookies
1491
0016b84e
S
1492 if 'X-Forwarded-For' not in res:
1493 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1494 if x_forwarded_for_ip:
1495 res['X-Forwarded-For'] = x_forwarded_for_ip
1496
e5660ee6
JMF
1497 return res
1498
1499 def _calc_cookies(self, info_dict):
5c2266df 1500 pr = sanitized_Request(info_dict['url'])
e5660ee6 1501 self.cookiejar.add_cookie_header(pr)
662435f7 1502 return pr.get_header('Cookie')
e5660ee6 1503
dd82ffea
JMF
1504 def process_video_result(self, info_dict, download=True):
1505 assert info_dict.get('_type', 'video') == 'video'
1506
bec1fad2
PH
1507 if 'id' not in info_dict:
1508 raise ExtractorError('Missing "id" field in extractor result')
1509 if 'title' not in info_dict:
1510 raise ExtractorError('Missing "title" field in extractor result')
1511
c9969434
S
1512 def report_force_conversion(field, field_not, conversion):
1513 self.report_warning(
1514 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1515 % (field, field_not, conversion))
1516
1517 def sanitize_string_field(info, string_field):
1518 field = info.get(string_field)
1519 if field is None or isinstance(field, compat_str):
1520 return
1521 report_force_conversion(string_field, 'a string', 'string')
1522 info[string_field] = compat_str(field)
1523
1524 def sanitize_numeric_fields(info):
1525 for numeric_field in self._NUMERIC_FIELDS:
1526 field = info.get(numeric_field)
1527 if field is None or isinstance(field, compat_numeric_types):
1528 continue
1529 report_force_conversion(numeric_field, 'numeric', 'int')
1530 info[numeric_field] = int_or_none(field)
1531
1532 sanitize_string_field(info_dict, 'id')
1533 sanitize_numeric_fields(info_dict)
be6217b2 1534
dd82ffea
JMF
1535 if 'playlist' not in info_dict:
1536 # It isn't part of a playlist
1537 info_dict['playlist'] = None
1538 info_dict['playlist_index'] = None
1539
d5519808 1540 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1541 if thumbnails is None:
1542 thumbnail = info_dict.get('thumbnail')
1543 if thumbnail:
a7a14d95 1544 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1545 if thumbnails:
be6d7229 1546 thumbnails.sort(key=lambda t: (
d37708fc
RA
1547 t.get('preference') if t.get('preference') is not None else -1,
1548 t.get('width') if t.get('width') is not None else -1,
1549 t.get('height') if t.get('height') is not None else -1,
1550 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1551 for i, t in enumerate(thumbnails):
dcf77cf1 1552 t['url'] = sanitize_url(t['url'])
9603e8a7 1553 if t.get('width') and t.get('height'):
d5519808 1554 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1555 if t.get('id') is None:
1556 t['id'] = '%d' % i
d5519808 1557
b7b72db9 1558 if self.params.get('list_thumbnails'):
1559 self.list_thumbnails(info_dict)
1560 return
1561
536a55da
S
1562 thumbnail = info_dict.get('thumbnail')
1563 if thumbnail:
1564 info_dict['thumbnail'] = sanitize_url(thumbnail)
1565 elif thumbnails:
d5519808
PH
1566 info_dict['thumbnail'] = thumbnails[-1]['url']
1567
c9ae7b95 1568 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1569 info_dict['display_id'] = info_dict['id']
1570
955c4514 1571 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1572 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1573 # see http://bugs.python.org/issue1646728)
1574 try:
1575 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1576 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1577 except (ValueError, OverflowError, OSError):
1578 pass
9d2ecdbc 1579
33d2fc2f
S
1580 # Auto generate title fields corresponding to the *_number fields when missing
1581 # in order to always have clean titles. This is very common for TV series.
1582 for field in ('chapter', 'season', 'episode'):
1583 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1584 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1585
05108a49
S
1586 for cc_kind in ('subtitles', 'automatic_captions'):
1587 cc = info_dict.get(cc_kind)
1588 if cc:
1589 for _, subtitle in cc.items():
1590 for subtitle_format in subtitle:
1591 if subtitle_format.get('url'):
1592 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1593 if subtitle_format.get('ext') is None:
1594 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1595
1596 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1597 subtitles = info_dict.get('subtitles')
4bba3716 1598
a504ced0 1599 if self.params.get('listsubtitles', False):
360e1ca5 1600 if 'automatic_captions' in info_dict:
05108a49
S
1601 self.list_subtitles(
1602 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1603 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1604 return
05108a49 1605
360e1ca5 1606 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1607 info_dict['id'], subtitles, automatic_captions)
a504ced0 1608
dd82ffea
JMF
1609 # We now pick which formats have to be downloaded
1610 if info_dict.get('formats') is None:
1611 # There's only one format available
1612 formats = [info_dict]
1613 else:
1614 formats = info_dict['formats']
1615
db95dc13
PH
1616 if not formats:
1617 raise ExtractorError('No video formats found!')
1618
73af5cc8
S
1619 def is_wellformed(f):
1620 url = f.get('url')
a5ac0c47 1621 if not url:
73af5cc8
S
1622 self.report_warning(
1623 '"url" field is missing or empty - skipping format, '
1624 'there is an error in extractor')
a5ac0c47
S
1625 return False
1626 if isinstance(url, bytes):
1627 sanitize_string_field(f, 'url')
1628 return True
73af5cc8
S
1629
1630 # Filter out malformed formats for better extraction robustness
1631 formats = list(filter(is_wellformed, formats))
1632
181c7053
S
1633 formats_dict = {}
1634
dd82ffea 1635 # We check that all the formats have the format and format_id fields
db95dc13 1636 for i, format in enumerate(formats):
c9969434
S
1637 sanitize_string_field(format, 'format_id')
1638 sanitize_numeric_fields(format)
dcf77cf1 1639 format['url'] = sanitize_url(format['url'])
e74e3b63 1640 if not format.get('format_id'):
8016c922 1641 format['format_id'] = compat_str(i)
e2effb08
S
1642 else:
1643 # Sanitize format_id from characters used in format selector expression
ec85ded8 1644 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1645 format_id = format['format_id']
1646 if format_id not in formats_dict:
1647 formats_dict[format_id] = []
1648 formats_dict[format_id].append(format)
1649
1650 # Make sure all formats have unique format_id
1651 for format_id, ambiguous_formats in formats_dict.items():
1652 if len(ambiguous_formats) > 1:
1653 for i, format in enumerate(ambiguous_formats):
1654 format['format_id'] = '%s-%d' % (format_id, i)
1655
1656 for i, format in enumerate(formats):
8c51aa65 1657 if format.get('format') is None:
6febd1c1 1658 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1659 id=format['format_id'],
1660 res=self.format_resolution(format),
6febd1c1 1661 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1662 )
c1002e96 1663 # Automatically determine file extension if missing
5b1d8575 1664 if format.get('ext') is None:
cce929ea 1665 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1666 # Automatically determine protocol if missing (useful for format
1667 # selection purposes)
6f0be937 1668 if format.get('protocol') is None:
b5559424 1669 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1670 # Add HTTP headers, so that external programs can use them from the
1671 # json output
1672 full_format_info = info_dict.copy()
1673 full_format_info.update(format)
1674 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1675 # Remove private housekeeping stuff
1676 if '__x_forwarded_for_ip' in info_dict:
1677 del info_dict['__x_forwarded_for_ip']
dd82ffea 1678
4bcc7bd1 1679 # TODO Central sorting goes here
99e206d5 1680
f89197d7 1681 if formats[0] is not info_dict:
b3d9ef88
JMF
1682 # only set the 'formats' fields if the original info_dict list them
1683 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1684 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1685 # which can't be exported to json
b3d9ef88 1686 info_dict['formats'] = formats
cfb56d1a 1687 if self.params.get('listformats'):
bfaae0a7 1688 self.list_formats(info_dict)
1689 return
1690
de3ef3ed 1691 req_format = self.params.get('format')
a9c58ad9 1692 if req_format is None:
0017d9ad
S
1693 req_format = self._default_format_spec(info_dict, download=download)
1694 if self.params.get('verbose'):
1695 self.to_stdout('[debug] Default format spec: %s' % req_format)
1696
5acfa126 1697 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1698
1699 # While in format selection we may need to have an access to the original
1700 # format set in order to calculate some metrics or do some processing.
1701 # For now we need to be able to guess whether original formats provided
1702 # by extractor are incomplete or not (i.e. whether extractor provides only
1703 # video-only or audio-only formats) for proper formats selection for
1704 # extractors with such incomplete formats (see
067aa17e 1705 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1706 # Since formats may be filtered during format selection and may not match
1707 # the original formats the results may be incorrect. Thus original formats
1708 # or pre-calculated metrics should be passed to format selection routines
1709 # as well.
1710 # We will pass a context object containing all necessary additional data
1711 # instead of just formats.
1712 # This fixes incorrect format selection issue (see
067aa17e 1713 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1714 incomplete_formats = (
317f7ab6 1715 # All formats are video-only or
3089bc74 1716 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1717 # all formats are audio-only
3089bc74 1718 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1719
1720 ctx = {
1721 'formats': formats,
1722 'incomplete_formats': incomplete_formats,
1723 }
1724
1725 formats_to_download = list(format_selector(ctx))
dd82ffea 1726 if not formats_to_download:
6febd1c1 1727 raise ExtractorError('requested format not available',
78a3a9f8 1728 expected=True)
dd82ffea
JMF
1729
1730 if download:
909d24dd 1731 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1732 if len(formats_to_download) > 1:
6febd1c1 1733 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1734 for format in formats_to_download:
1735 new_info = dict(info_dict)
1736 new_info.update(format)
1737 self.process_info(new_info)
1738 # We update the info dict with the best quality format (backwards compatibility)
1739 info_dict.update(formats_to_download[-1])
1740 return info_dict
1741
98c70d6f 1742 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1743 """Select the requested subtitles and their format"""
98c70d6f
JMF
1744 available_subs = {}
1745 if normal_subtitles and self.params.get('writesubtitles'):
1746 available_subs.update(normal_subtitles)
1747 if automatic_captions and self.params.get('writeautomaticsub'):
1748 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1749 if lang not in available_subs:
1750 available_subs[lang] = cap_info
1751
4d171848
JMF
1752 if (not self.params.get('writesubtitles') and not
1753 self.params.get('writeautomaticsub') or not
1754 available_subs):
1755 return None
a504ced0
JMF
1756
1757 if self.params.get('allsubtitles', False):
1758 requested_langs = available_subs.keys()
1759 else:
1760 if self.params.get('subtitleslangs', False):
1761 requested_langs = self.params.get('subtitleslangs')
1762 elif 'en' in available_subs:
1763 requested_langs = ['en']
1764 else:
1765 requested_langs = [list(available_subs.keys())[0]]
1766
1767 formats_query = self.params.get('subtitlesformat', 'best')
1768 formats_preference = formats_query.split('/') if formats_query else []
1769 subs = {}
1770 for lang in requested_langs:
1771 formats = available_subs.get(lang)
1772 if formats is None:
1773 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1774 continue
a504ced0
JMF
1775 for ext in formats_preference:
1776 if ext == 'best':
1777 f = formats[-1]
1778 break
1779 matches = list(filter(lambda f: f['ext'] == ext, formats))
1780 if matches:
1781 f = matches[-1]
1782 break
1783 else:
1784 f = formats[-1]
1785 self.report_warning(
1786 'No subtitle format found matching "%s" for language %s, '
1787 'using %s' % (formats_query, lang, f['ext']))
1788 subs[lang] = f
1789 return subs
1790
d06daf23
S
1791 def __forced_printings(self, info_dict, filename, incomplete):
1792 def print_mandatory(field):
1793 if (self.params.get('force%s' % field, False)
1794 and (not incomplete or info_dict.get(field) is not None)):
1795 self.to_stdout(info_dict[field])
1796
1797 def print_optional(field):
1798 if (self.params.get('force%s' % field, False)
1799 and info_dict.get(field) is not None):
1800 self.to_stdout(info_dict[field])
1801
1802 print_mandatory('title')
1803 print_mandatory('id')
1804 if self.params.get('forceurl', False) and not incomplete:
1805 if info_dict.get('requested_formats') is not None:
1806 for f in info_dict['requested_formats']:
1807 self.to_stdout(f['url'] + f.get('play_path', ''))
1808 else:
1809 # For RTMP URLs, also include the playpath
1810 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1811 print_optional('thumbnail')
1812 print_optional('description')
1813 if self.params.get('forcefilename', False) and filename is not None:
1814 self.to_stdout(filename)
1815 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1816 self.to_stdout(formatSeconds(info_dict['duration']))
1817 print_mandatory('format')
1818 if self.params.get('forcejson', False):
1819 self.to_stdout(json.dumps(info_dict))
1820
8222d8de
JMF
1821 def process_info(self, info_dict):
1822 """Process a single resolved IE result."""
1823
1824 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1825
1826 max_downloads = self.params.get('max_downloads')
1827 if max_downloads is not None:
1828 if self._num_downloads >= int(max_downloads):
1829 raise MaxDownloadsReached()
8222d8de 1830
d06daf23 1831 # TODO: backward compatibility, to be removed
8222d8de 1832 info_dict['fulltitle'] = info_dict['title']
8222d8de 1833
11b85ce6 1834 if 'format' not in info_dict:
8222d8de
JMF
1835 info_dict['format'] = info_dict['ext']
1836
442c37b7 1837 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1838 if reason is not None:
6febd1c1 1839 self.to_screen('[download] ' + reason)
8222d8de
JMF
1840 return
1841
fd288278 1842 self._num_downloads += 1
8222d8de 1843
e72c7e41 1844 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1845
1846 # Forced printings
d06daf23 1847 self.__forced_printings(info_dict, filename, incomplete=False)
8222d8de
JMF
1848
1849 # Do nothing else if in simulate mode
1850 if self.params.get('simulate', False):
1851 return
1852
1853 if filename is None:
1854 return
1855
c5c9bf0c
S
1856 def ensure_dir_exists(path):
1857 try:
1858 dn = os.path.dirname(path)
1859 if dn and not os.path.exists(dn):
1860 os.makedirs(dn)
1861 return True
1862 except (OSError, IOError) as err:
1863 self.report_error('unable to create directory ' + error_to_compat_str(err))
1864 return False
1865
1866 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
8222d8de
JMF
1867 return
1868
1869 if self.params.get('writedescription', False):
2699da80 1870 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1871 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1872 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1873 elif info_dict.get('description') is None:
1874 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1875 else:
1876 try:
6febd1c1 1877 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1878 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1879 descfile.write(info_dict['description'])
7b6fefc9 1880 except (OSError, IOError):
6febd1c1 1881 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1882 return
8222d8de 1883
1fb07d10 1884 if self.params.get('writeannotations', False):
98727e12 1885 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1886 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1887 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
1888 elif not info_dict.get('annotations'):
1889 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
1890 else:
1891 try:
6febd1c1 1892 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1893 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1894 annofile.write(info_dict['annotations'])
1895 except (KeyError, TypeError):
6febd1c1 1896 self.report_warning('There are no annotations to write.')
7b6fefc9 1897 except (OSError, IOError):
6febd1c1 1898 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1899 return
1fb07d10 1900
9f448fcb 1901 def dl(name, info, subtitle=False):
98b69821 1902 fd = get_suitable_downloader(info, self.params)(self, self.params)
1903 for ph in self._progress_hooks:
1904 fd.add_progress_hook(ph)
1905 if self.params.get('verbose'):
1906 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 1907 return fd.download(name, info, subtitle)
98b69821 1908
c4a91be7 1909 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1910 self.params.get('writeautomaticsub')])
c4a91be7 1911
c84dd8a9 1912 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1913 # subtitles download errors are already managed as troubles in relevant IE
1914 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1915 subtitles = info_dict['requested_subtitles']
fa57af1e 1916 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1917 for sub_lang, sub_info in subtitles.items():
1918 sub_format = sub_info['ext']
824fa511 1919 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
5ff1bc0c
RA
1920 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1921 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
a504ced0 1922 else:
0c9df79e 1923 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
1924 if sub_info.get('data') is not None:
1925 try:
1926 # Use newline='' to prevent conversion of newline characters
067aa17e 1927 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
1928 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1929 subfile.write(sub_info['data'])
1930 except (OSError, IOError):
1931 self.report_error('Cannot write subtitles file ' + sub_filename)
1932 return
7b6fefc9 1933 else:
5ff1bc0c 1934 try:
9f448fcb
U
1935 dl(sub_filename, sub_info, subtitle=True)
1936 '''
0c9df79e
U
1937 if self.params.get('sleep_interval_subtitles', False):
1938 dl(sub_filename, sub_info)
1939 else:
1940 sub_data = ie._request_webpage(
1941 sub_info['url'], info_dict['id'], note=False).read()
1942 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1943 subfile.write(sub_data)
9f448fcb 1944 '''
0c9df79e 1945 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
1946 self.report_warning('Unable to download subtitle for "%s": %s' %
1947 (sub_lang, error_to_compat_str(err)))
1948 continue
8222d8de 1949
57df9f53
U
1950 if self.params.get('skip_download', False):
1951 if self.params.get('convertsubtitles', False):
1952 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1953 filename_real_ext = os.path.splitext(filename)[1][1:]
1954 filename_wo_ext = (
1955 os.path.splitext(filename)[0]
1956 if filename_real_ext == info_dict['ext']
1957 else filename)
1958 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1959 if subconv.available:
1960 info_dict.setdefault('__postprocessors', [])
1961 # info_dict['__postprocessors'].append(subconv)
1962 if os.path.exists(encodeFilename(afilename)):
f791b419
U
1963 self.to_screen(
1964 '[download] %s has already been downloaded and '
1965 'converted' % afilename)
57df9f53
U
1966 else:
1967 try:
1968 self.post_process(filename, info_dict)
1969 except (PostProcessingError) as err:
1970 self.report_error('postprocessing: %s' % str(err))
1971 return
1972
8222d8de 1973 if self.params.get('writeinfojson', False):
b29e0000 1974 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1975 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1976 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1977 else:
6febd1c1 1978 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1979 try:
cb202fd2 1980 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1981 except (OSError, IOError):
6febd1c1 1982 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1983 return
8222d8de 1984
ec82d85a 1985 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1986
1987 if not self.params.get('skip_download', False):
4340deca 1988 try:
4340deca
P
1989 if info_dict.get('requested_formats') is not None:
1990 downloaded = []
1991 success = True
d47aeb22 1992 merger = FFmpegMergerPP(self)
f740fae2 1993 if not merger.available:
4340deca
P
1994 postprocessors = []
1995 self.report_warning('You have requested multiple '
1996 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1997 ' The formats won\'t be merged.')
6350728b 1998 else:
4340deca 1999 postprocessors = [merger]
81cd954a
S
2000
2001 def compatible_formats(formats):
d03cfdce 2002 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2003 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2004 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2005 if len(video_formats) > 2 or len(audio_formats) > 2:
2006 return False
2007
81cd954a 2008 # Check extension
d03cfdce 2009 exts = set(format.get('ext') for format in formats)
2010 COMPATIBLE_EXTS = (
2011 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2012 set(('webm',)),
2013 )
2014 for ext_sets in COMPATIBLE_EXTS:
2015 if ext_sets.issuperset(exts):
2016 return True
81cd954a
S
2017 # TODO: Check acodec/vcodec
2018 return False
2019
38c6902b
S
2020 filename_real_ext = os.path.splitext(filename)[1][1:]
2021 filename_wo_ext = (
2022 os.path.splitext(filename)[0]
2023 if filename_real_ext == info_dict['ext']
2024 else filename)
81cd954a 2025 requested_formats = info_dict['requested_formats']
c0dea0a7 2026 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2027 info_dict['ext'] = 'mkv'
4a5a898a
S
2028 self.report_warning(
2029 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
2030 # Ensure filename always has a correct extension for successful merge
2031 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
2032 if os.path.exists(encodeFilename(filename)):
2033 self.to_screen(
2034 '[download] %s has already been downloaded and '
2035 'merged' % filename)
2036 else:
81cd954a 2037 for f in requested_formats:
5b5fbc08
JMF
2038 new_info = dict(info_dict)
2039 new_info.update(f)
c5c9bf0c
S
2040 fname = prepend_extension(
2041 self.prepare_filename(new_info),
2042 'f%s' % f['format_id'], new_info['ext'])
2043 if not ensure_dir_exists(fname):
2044 return
5b5fbc08
JMF
2045 downloaded.append(fname)
2046 partial_success = dl(fname, new_info)
2047 success = success and partial_success
2048 info_dict['__postprocessors'] = postprocessors
2049 info_dict['__files_to_merge'] = downloaded
4340deca
P
2050 else:
2051 # Just a single file
2052 success = dl(filename, info_dict)
2053 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2054 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2055 return
2056 except (OSError, IOError) as err:
2057 raise UnavailableVideoError(err)
2058 except (ContentTooShortError, ) as err:
2059 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2060 return
8222d8de 2061
e38cafe9 2062 if success and filename != '-':
6271f1ca 2063 # Fixup content
62cd676c
PH
2064 fixup_policy = self.params.get('fixup')
2065 if fixup_policy is None:
2066 fixup_policy = 'detect_or_warn'
2067
d1e4a464
S
2068 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2069
6271f1ca
PH
2070 stretched_ratio = info_dict.get('stretched_ratio')
2071 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2072 if fixup_policy == 'warn':
2073 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2074 info_dict['id'], stretched_ratio))
2075 elif fixup_policy == 'detect_or_warn':
2076 stretched_pp = FFmpegFixupStretchedPP(self)
2077 if stretched_pp.available:
2078 info_dict.setdefault('__postprocessors', [])
2079 info_dict['__postprocessors'].append(stretched_pp)
2080 else:
2081 self.report_warning(
d1e4a464
S
2082 '%s: Non-uniform pixel ratio (%s). %s'
2083 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2084 else:
62cd676c
PH
2085 assert fixup_policy in ('ignore', 'never')
2086
3089bc74
S
2087 if (info_dict.get('requested_formats') is None
2088 and info_dict.get('container') == 'm4a_dash'):
62cd676c 2089 if fixup_policy == 'warn':
d1e4a464
S
2090 self.report_warning(
2091 '%s: writing DASH m4a. '
2092 'Only some players support this container.'
2093 % info_dict['id'])
62cd676c
PH
2094 elif fixup_policy == 'detect_or_warn':
2095 fixup_pp = FFmpegFixupM4aPP(self)
2096 if fixup_pp.available:
2097 info_dict.setdefault('__postprocessors', [])
2098 info_dict['__postprocessors'].append(fixup_pp)
2099 else:
2100 self.report_warning(
d1e4a464
S
2101 '%s: writing DASH m4a. '
2102 'Only some players support this container. %s'
2103 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2104 else:
2105 assert fixup_policy in ('ignore', 'never')
6271f1ca 2106
3089bc74
S
2107 if (info_dict.get('protocol') == 'm3u8_native'
2108 or info_dict.get('protocol') == 'm3u8'
2109 and self.params.get('hls_prefer_native')):
f17f8651 2110 if fixup_policy == 'warn':
a02682fd 2111 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2112 info_dict['id']))
2113 elif fixup_policy == 'detect_or_warn':
2114 fixup_pp = FFmpegFixupM3u8PP(self)
2115 if fixup_pp.available:
2116 info_dict.setdefault('__postprocessors', [])
2117 info_dict['__postprocessors'].append(fixup_pp)
2118 else:
2119 self.report_warning(
a02682fd 2120 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2121 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2122 else:
2123 assert fixup_policy in ('ignore', 'never')
2124
8222d8de
JMF
2125 try:
2126 self.post_process(filename, info_dict)
2127 except (PostProcessingError) as err:
6febd1c1 2128 self.report_error('postprocessing: %s' % str(err))
8222d8de 2129 return
cd58dc3e 2130 self.record_download_archive(info_dict)
8222d8de
JMF
2131
2132 def download(self, url_list):
2133 """Download a given list of URLs."""
acd69589 2134 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2135 if (len(url_list) > 1
2136 and outtmpl != '-'
2137 and '%' not in outtmpl
2138 and self.params.get('max_downloads') != 1):
acd69589 2139 raise SameFileError(outtmpl)
8222d8de
JMF
2140
2141 for url in url_list:
2142 try:
5f6a1245 2143 # It also downloads the videos
61aa5ba3
S
2144 res = self.extract_info(
2145 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2146 except UnavailableVideoError:
6febd1c1 2147 self.report_error('unable to download video')
8222d8de 2148 except MaxDownloadsReached:
6febd1c1 2149 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 2150 raise
63e0be34
PH
2151 else:
2152 if self.params.get('dump_single_json', False):
2153 self.to_stdout(json.dumps(res))
8222d8de
JMF
2154
2155 return self._download_retcode
2156
1dcc4c0c 2157 def download_with_info_file(self, info_filename):
31bd3925
JMF
2158 with contextlib.closing(fileinput.FileInput(
2159 [info_filename], mode='r',
2160 openhook=fileinput.hook_encoded('utf-8'))) as f:
2161 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2162 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2163 try:
2164 self.process_ie_result(info, download=True)
2165 except DownloadError:
2166 webpage_url = info.get('webpage_url')
2167 if webpage_url is not None:
6febd1c1 2168 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2169 return self.download([webpage_url])
2170 else:
2171 raise
2172 return self._download_retcode
1dcc4c0c 2173
cb202fd2
S
2174 @staticmethod
2175 def filter_requested_info(info_dict):
2176 return dict(
2177 (k, v) for k, v in info_dict.items()
2178 if k not in ['requested_formats', 'requested_subtitles'])
2179
8222d8de
JMF
2180 def post_process(self, filename, ie_info):
2181 """Run all the postprocessors on the given file."""
2182 info = dict(ie_info)
2183 info['filepath'] = filename
6350728b
JMF
2184 pps_chain = []
2185 if ie_info.get('__postprocessors') is not None:
2186 pps_chain.extend(ie_info['__postprocessors'])
2187 pps_chain.extend(self._pps)
2188 for pp in pps_chain:
71646e46 2189 files_to_delete = []
8222d8de 2190 try:
592e97e8 2191 files_to_delete, info = pp.run(info)
8222d8de 2192 except PostProcessingError as e:
bbcbf4d4 2193 self.report_error(e.msg)
592e97e8 2194 if files_to_delete and not self.params.get('keepvideo', False):
d03cfdce 2195 for old_filename in set(files_to_delete):
f3ff1a36 2196 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
2197 try:
2198 os.remove(encodeFilename(old_filename))
2199 except (IOError, OSError):
2200 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 2201
5db07df6 2202 def _make_archive_id(self, info_dict):
e9fef7ee
S
2203 video_id = info_dict.get('id')
2204 if not video_id:
2205 return
5db07df6
PH
2206 # Future-proof against any change in case
2207 # and backwards compatibility with prior versions
e9fef7ee 2208 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2209 if extractor is None:
1211bb6d
S
2210 url = str_or_none(info_dict.get('url'))
2211 if not url:
2212 return
e9fef7ee
S
2213 # Try to find matching extractor for the URL and take its ie_key
2214 for ie in self._ies:
1211bb6d 2215 if ie.suitable(url):
e9fef7ee
S
2216 extractor = ie.ie_key()
2217 break
2218 else:
2219 return
2220 return extractor.lower() + ' ' + video_id
5db07df6
PH
2221
2222 def in_download_archive(self, info_dict):
2223 fn = self.params.get('download_archive')
2224 if fn is None:
2225 return False
2226
2227 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2228 if not vid_id:
7012b23c 2229 return False # Incomplete video information
5db07df6 2230
a45e8619 2231 return vid_id in self.archive
c1c9a79c
PH
2232
2233 def record_download_archive(self, info_dict):
2234 fn = self.params.get('download_archive')
2235 if fn is None:
2236 return
5db07df6
PH
2237 vid_id = self._make_archive_id(info_dict)
2238 assert vid_id
c1c9a79c 2239 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2240 archive_file.write(vid_id + '\n')
a45e8619 2241 self.archive.add(vid_id)
dd82ffea 2242
8c51aa65 2243 @staticmethod
8abeeb94 2244 def format_resolution(format, default='unknown'):
fb04e403
PH
2245 if format.get('vcodec') == 'none':
2246 return 'audio only'
f49d89ee
PH
2247 if format.get('resolution') is not None:
2248 return format['resolution']
8c51aa65
JMF
2249 if format.get('height') is not None:
2250 if format.get('width') is not None:
6febd1c1 2251 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2252 else:
6febd1c1 2253 res = '%sp' % format['height']
f49d89ee 2254 elif format.get('width') is not None:
388ae76b 2255 res = '%dx?' % format['width']
8c51aa65 2256 else:
8abeeb94 2257 res = default
8c51aa65
JMF
2258 return res
2259
c57f7757
PH
2260 def _format_note(self, fdict):
2261 res = ''
2262 if fdict.get('ext') in ['f4f', 'f4m']:
2263 res += '(unsupported) '
32f90364
PH
2264 if fdict.get('language'):
2265 if res:
2266 res += ' '
9016d76f 2267 res += '[%s] ' % fdict['language']
c57f7757
PH
2268 if fdict.get('format_note') is not None:
2269 res += fdict['format_note'] + ' '
2270 if fdict.get('tbr') is not None:
2271 res += '%4dk ' % fdict['tbr']
2272 if fdict.get('container') is not None:
2273 if res:
2274 res += ', '
2275 res += '%s container' % fdict['container']
3089bc74
S
2276 if (fdict.get('vcodec') is not None
2277 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2278 if res:
2279 res += ', '
2280 res += fdict['vcodec']
91c7271a 2281 if fdict.get('vbr') is not None:
c57f7757
PH
2282 res += '@'
2283 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2284 res += 'video@'
2285 if fdict.get('vbr') is not None:
2286 res += '%4dk' % fdict['vbr']
fbb21cf5 2287 if fdict.get('fps') is not None:
5d583bdf
S
2288 if res:
2289 res += ', '
2290 res += '%sfps' % fdict['fps']
c57f7757
PH
2291 if fdict.get('acodec') is not None:
2292 if res:
2293 res += ', '
2294 if fdict['acodec'] == 'none':
2295 res += 'video only'
2296 else:
2297 res += '%-5s' % fdict['acodec']
2298 elif fdict.get('abr') is not None:
2299 if res:
2300 res += ', '
2301 res += 'audio'
2302 if fdict.get('abr') is not None:
2303 res += '@%3dk' % fdict['abr']
2304 if fdict.get('asr') is not None:
2305 res += ' (%5dHz)' % fdict['asr']
2306 if fdict.get('filesize') is not None:
2307 if res:
2308 res += ', '
2309 res += format_bytes(fdict['filesize'])
9732d77e
PH
2310 elif fdict.get('filesize_approx') is not None:
2311 if res:
2312 res += ', '
2313 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2314 return res
91c7271a 2315
c57f7757 2316 def list_formats(self, info_dict):
94badb25 2317 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
2318 table = [
2319 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2320 for f in formats
e65566a9 2321 if f.get('preference') is None or f['preference'] >= -1000]
eb8a4433 2322 # if len(formats) > 1:
909d24dd 2323 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)'
57dd9a8f 2324
b81a359e 2325 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 2326 self.to_screen(
b81a359e
PH
2327 '[info] Available formats for %s:\n%s' %
2328 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
2329
2330 def list_thumbnails(self, info_dict):
2331 thumbnails = info_dict.get('thumbnails')
2332 if not thumbnails:
b7b72db9 2333 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2334 return
cfb56d1a
PH
2335
2336 self.to_screen(
2337 '[info] Thumbnails for %s:' % info_dict['id'])
2338 self.to_screen(render_table(
2339 ['ID', 'width', 'height', 'URL'],
2340 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2341
360e1ca5 2342 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2343 if not subtitles:
360e1ca5 2344 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2345 return
a504ced0 2346 self.to_screen(
edab9dbf
JMF
2347 'Available %s for %s:' % (name, video_id))
2348 self.to_screen(render_table(
2349 ['Language', 'formats'],
2350 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2351 for lang, formats in subtitles.items()]))
a504ced0 2352
dca08720
PH
2353 def urlopen(self, req):
2354 """ Start an HTTP download """
82d8a8b6 2355 if isinstance(req, compat_basestring):
67dda517 2356 req = sanitized_Request(req)
19a41fc6 2357 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2358
2359 def print_debug_header(self):
2360 if not self.params.get('verbose'):
2361 return
62fec3b2 2362
4192b51c 2363 if type('') is not compat_str:
067aa17e 2364 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2365 self.report_warning(
2366 'Your Python is broken! Update to a newer and supported version')
2367
c6afed48
PH
2368 stdout_encoding = getattr(
2369 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2370 encoding_str = (
734f90bb
PH
2371 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2372 locale.getpreferredencoding(),
2373 sys.getfilesystemencoding(),
c6afed48 2374 stdout_encoding,
b0472057 2375 self.get_encoding()))
4192b51c 2376 write_string(encoding_str, encoding=None)
734f90bb 2377
cefecac1 2378 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
e0986e31
JMF
2379 if _LAZY_LOADER:
2380 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2381 try:
2382 sp = subprocess.Popen(
2383 ['git', 'rev-parse', '--short', 'HEAD'],
2384 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2385 cwd=os.path.dirname(os.path.abspath(__file__)))
2386 out, err = sp.communicate()
2387 out = out.decode().strip()
2388 if re.match('[0-9a-f]+', out):
734f90bb 2389 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2390 except Exception:
dca08720
PH
2391 try:
2392 sys.exc_clear()
70a1165b 2393 except Exception:
dca08720 2394 pass
b300cda4
S
2395
2396 def python_implementation():
2397 impl_name = platform.python_implementation()
2398 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2399 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2400 return impl_name
2401
2402 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2403 platform.python_version(), python_implementation(),
2404 platform_name()))
d28b5171 2405
73fac4e9 2406 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2407 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2408 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2409 exe_str = ', '.join(
2410 '%s %s' % (exe, v)
2411 for exe, v in sorted(exe_versions.items())
2412 if v
2413 )
2414 if not exe_str:
2415 exe_str = 'none'
2416 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2417
2418 proxy_map = {}
2419 for handler in self._opener.handlers:
2420 if hasattr(handler, 'proxies'):
2421 proxy_map.update(handler.proxies)
734f90bb 2422 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2423
58b1f00d
PH
2424 if self.params.get('call_home', False):
2425 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2426 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2427 latest_version = self.urlopen(
2428 'https://yt-dl.org/latest/version').read().decode('utf-8')
2429 if version_tuple(latest_version) > version_tuple(__version__):
2430 self.report_warning(
2431 'You are using an outdated version (newest version: %s)! '
2432 'See https://yt-dl.org/update if you need help updating.' %
2433 latest_version)
2434
e344693b 2435 def _setup_opener(self):
6ad14cab 2436 timeout_val = self.params.get('socket_timeout')
19a41fc6 2437 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2438
dca08720
PH
2439 opts_cookiefile = self.params.get('cookiefile')
2440 opts_proxy = self.params.get('proxy')
2441
2442 if opts_cookiefile is None:
2443 self.cookiejar = compat_cookiejar.CookieJar()
2444 else:
590bc6f6 2445 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2446 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2447 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2448 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2449
6a3f4c3f 2450 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2451 if opts_proxy is not None:
2452 if opts_proxy == '':
2453 proxies = {}
2454 else:
2455 proxies = {'http': opts_proxy, 'https': opts_proxy}
2456 else:
2457 proxies = compat_urllib_request.getproxies()
067aa17e 2458 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2459 if 'http' in proxies and 'https' not in proxies:
2460 proxies['https'] = proxies['http']
91410c9b 2461 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2462
2463 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2464 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2465 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2466 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2467 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2468
2469 # When passing our own FileHandler instance, build_opener won't add the
2470 # default FileHandler and allows us to disable the file protocol, which
2471 # can be used for malicious purposes (see
067aa17e 2472 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2473 file_handler = compat_urllib_request.FileHandler()
2474
2475 def file_open(*args, **kwargs):
cefecac1 2476 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2477 file_handler.file_open = file_open
2478
2479 opener = compat_urllib_request.build_opener(
fca6dba8 2480 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2481
dca08720
PH
2482 # Delete the default user-agent header, which would otherwise apply in
2483 # cases where our custom HTTP handler doesn't come into play
067aa17e 2484 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2485 opener.addheaders = []
2486 self._opener = opener
62fec3b2
PH
2487
2488 def encode(self, s):
2489 if isinstance(s, bytes):
2490 return s # Already encoded
2491
2492 try:
2493 return s.encode(self.get_encoding())
2494 except UnicodeEncodeError as err:
2495 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2496 raise
2497
2498 def get_encoding(self):
2499 encoding = self.params.get('encoding')
2500 if encoding is None:
2501 encoding = preferredencoding()
2502 return encoding
ec82d85a
PH
2503
2504 def _write_thumbnails(self, info_dict, filename):
2505 if self.params.get('writethumbnail', False):
2506 thumbnails = info_dict.get('thumbnails')
2507 if thumbnails:
2508 thumbnails = [thumbnails[-1]]
2509 elif self.params.get('write_all_thumbnails', False):
2510 thumbnails = info_dict.get('thumbnails')
2511 else:
2512 return
2513
2514 if not thumbnails:
2515 # No thumbnails present, so return immediately
2516 return
2517
2518 for t in thumbnails:
2519 thumb_ext = determine_ext(t['url'], 'jpg')
2520 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2521 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2522 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2523
2524 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2525 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2526 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2527 else:
2528 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2529 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2530 try:
2531 uf = self.urlopen(t['url'])
d3d89c32 2532 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2533 shutil.copyfileobj(uf, thumbf)
2534 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2535 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2536 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2537 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2538 (t['url'], error_to_compat_str(err)))