]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Fixed typo
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
62fec3b2 11import locale
8222d8de 12import os
dca08720 13import platform
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de
JMF
17import socket
18import sys
19import time
20import traceback
21
1e5b9a95
PH
22if os.name == 'nt':
23 import ctypes
24
ce02ed60 25from .utils import (
dca08720 26 compat_cookiejar,
ce02ed60 27 compat_http_client,
ce02ed60
PH
28 compat_str,
29 compat_urllib_error,
30 compat_urllib_request,
d05cfe06 31 escape_url,
ce02ed60
PH
32 ContentTooShortError,
33 date_from_str,
34 DateRange,
acd69589 35 DEFAULT_OUTTMPL,
ce02ed60
PH
36 determine_ext,
37 DownloadError,
38 encodeFilename,
39 ExtractorError,
02dbf93f 40 format_bytes,
525ef922 41 formatSeconds,
1c088fa8 42 get_term_width,
ce02ed60 43 locked_file,
dca08720 44 make_HTTPS_handler,
ce02ed60 45 MaxDownloadsReached,
b7ab0590 46 PagedList,
ce02ed60 47 PostProcessingError,
dca08720 48 platform_name,
ce02ed60
PH
49 preferredencoding,
50 SameFileError,
51 sanitize_filename,
52 subtitles_filename,
53 takewhile_inclusive,
54 UnavailableVideoError,
29eb5174 55 url_basename,
ce02ed60
PH
56 write_json_file,
57 write_string,
dca08720 58 YoutubeDLHandler,
6350728b 59 prepend_extension,
ce02ed60 60)
a0e07d31 61from .cache import Cache
023fa8c4 62from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 63from .downloader import get_suitable_downloader
56327689 64from .postprocessor import FFmpegMergerPP
dca08720 65from .version import __version__
8222d8de
JMF
66
67
68class YoutubeDL(object):
69 """YoutubeDL class.
70
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
77
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
85
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
92
93 Available options:
94
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
c6c19746 97 videopassword: Password for acces a video.
8222d8de
JMF
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
ad8915b7 101 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
525ef922 108 forceduration: Force printing duration.
8694c600 109 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
110 simulate: Do not download the video files.
111 format: Video format code.
112 format_limit: Highest quality format to try.
113 outtmpl: Template for output names.
114 restrictfilenames: Do not allow "&" and spaces in file names
115 ignoreerrors: Do not stop on download errors.
116 nooverwrites: Prevent overwriting files.
117 playliststart: Playlist item to start at.
118 playlistend: Playlist item to end at.
119 matchtitle: Download only matching titles.
120 rejecttitle: Reject downloads for matching titles.
8bf9319e 121 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
122 logtostderr: Log messages to stderr instead of stdout.
123 writedescription: Write the video description to a .description file
124 writeinfojson: Write the video description to a .info.json file
1fb07d10 125 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
126 writethumbnail: Write the thumbnail image to a file
127 writesubtitles: Write the video subtitles to a file
b004821f 128 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 129 allsubtitles: Downloads all the subtitles of the video
0b7f3118 130 (requires writesubtitles or writeautomaticsub)
8222d8de 131 listsubtitles: Lists all available subtitles for the video
b98a6b2f 132 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 133 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
134 keepvideo: Keep the video file after post-processing
135 daterange: A DateRange object, download only if the upload_date is in the range.
136 skip_download: Skip the actual download of the video file
c35f9e72 137 cachedir: Location of the cache files in the filesystem.
a0e07d31 138 False to disable filesystem cache.
47192f92 139 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
140 age_limit: An integer representing the user's age in years.
141 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
142 min_views: An integer representing the minimum view count the video
143 must have in order to not be skipped.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 max_views: An integer representing the maximum view count.
147 Videos that are more popular than that are not
148 downloaded.
149 Videos without view count information are always
150 downloaded. None for no limit.
151 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
152 Videos already present in the file are not downloaded
153 again.
dca08720 154 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 155 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
156 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
157 At the moment, this is only supported by YouTube.
a1ee09e8 158 proxy: URL of the proxy server to use
e344693b 159 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
160 bidi_workaround: Work around buggy terminals without bidirectional text
161 support, using fridibi
a0ddb8a2 162 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 163 include_ads: Download ads as well
04b4d394
PH
164 default_search: Prepend this string if an input url is not valid.
165 'auto' for elaborate guessing
62fec3b2 166 encoding: Use this encoding instead of the system-specified.
e8ee972c 167 extract_flat: Do not resolve URLs, return the immediate result.
fe7e0c98 168
8222d8de
JMF
169 The following parameters are not used by YoutubeDL itself, they are used by
170 the FileDownloader:
171 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
172 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
173
174 The following options are used by the post processors:
175 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
176 otherwise prefer avconv.
8d31fa3c 177 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
178 """
179
180 params = None
181 _ies = []
182 _pps = []
183 _download_retcode = None
184 _num_downloads = None
185 _screen_file = None
186
a3fb4675 187 def __init__(self, params=None):
8222d8de 188 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
189 if params is None:
190 params = {}
8222d8de 191 self._ies = []
56c73665 192 self._ies_instances = {}
8222d8de 193 self._pps = []
933605d7 194 self._progress_hooks = []
8222d8de
JMF
195 self._download_retcode = 0
196 self._num_downloads = 0
197 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 198 self._err_file = sys.stderr
e9f9a10f 199 self.params = params
a0e07d31 200 self.cache = Cache(self)
34308b30 201
0783b09b 202 if params.get('bidi_workaround', False):
1c088fa8
PH
203 try:
204 import pty
205 master, slave = pty.openpty()
206 width = get_term_width()
207 if width is None:
208 width_args = []
209 else:
210 width_args = ['-w', str(width)]
5d681e96 211 sp_kwargs = dict(
1c088fa8
PH
212 stdin=subprocess.PIPE,
213 stdout=slave,
214 stderr=self._err_file)
5d681e96
PH
215 try:
216 self._output_process = subprocess.Popen(
217 ['bidiv'] + width_args, **sp_kwargs
218 )
219 except OSError:
5d681e96
PH
220 self._output_process = subprocess.Popen(
221 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
222 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
223 except OSError as ose:
224 if ose.errno == 2:
6febd1c1 225 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
226 else:
227 raise
0783b09b 228
34308b30
PH
229 if (sys.version_info >= (3,) and sys.platform != 'win32' and
230 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
231 and not params['restrictfilenames']):
232 # On Python 3, the Unicode filesystem API will throw errors (#1474)
233 self.report_warning(
6febd1c1 234 'Assuming --restrict-filenames since file system encoding '
1b725173 235 'cannot encode all characters. '
6febd1c1 236 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 237 self.params['restrictfilenames'] = True
34308b30 238
a3927cf7 239 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 240 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 241
dca08720
PH
242 self._setup_opener()
243
8222d8de
JMF
244 def add_info_extractor(self, ie):
245 """Add an InfoExtractor object to the end of the list."""
246 self._ies.append(ie)
56c73665 247 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
248 ie.set_downloader(self)
249
56c73665
JMF
250 def get_info_extractor(self, ie_key):
251 """
252 Get an instance of an IE with name ie_key, it will try to get one from
253 the _ies list, if there's no instance it will create a new one and add
254 it to the extractor list.
255 """
256 ie = self._ies_instances.get(ie_key)
257 if ie is None:
258 ie = get_info_extractor(ie_key)()
259 self.add_info_extractor(ie)
260 return ie
261
023fa8c4
JMF
262 def add_default_info_extractors(self):
263 """
264 Add the InfoExtractors returned by gen_extractors to the end of the list
265 """
266 for ie in gen_extractors():
267 self.add_info_extractor(ie)
268
8222d8de
JMF
269 def add_post_processor(self, pp):
270 """Add a PostProcessor object to the end of the chain."""
271 self._pps.append(pp)
272 pp.set_downloader(self)
273
933605d7
JMF
274 def add_progress_hook(self, ph):
275 """Add the progress hook (currently only for the file downloader)"""
276 self._progress_hooks.append(ph)
8ab470f1 277
1c088fa8 278 def _bidi_workaround(self, message):
5d681e96 279 if not hasattr(self, '_output_channel'):
1c088fa8
PH
280 return message
281
5d681e96 282 assert hasattr(self, '_output_process')
11b85ce6 283 assert isinstance(message, compat_str)
6febd1c1
PH
284 line_count = message.count('\n') + 1
285 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 286 self._output_process.stdin.flush()
6febd1c1 287 res = ''.join(self._output_channel.readline().decode('utf-8')
1c088fa8 288 for _ in range(line_count))
6febd1c1 289 return res[:-len('\n')]
1c088fa8 290
8222d8de 291 def to_screen(self, message, skip_eol=False):
0783b09b
PH
292 """Print message to stdout if not in quiet mode."""
293 return self.to_stdout(message, skip_eol, check_quiet=True)
294
734f90bb 295 def _write_string(self, s, out=None):
b58ddb32 296 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 297
0783b09b 298 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 299 """Print message to stdout if not in quiet mode."""
8bf9319e 300 if self.params.get('logger'):
43afe285 301 self.params['logger'].debug(message)
0783b09b 302 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 303 message = self._bidi_workaround(message)
6febd1c1 304 terminator = ['\n', ''][skip_eol]
8222d8de 305 output = message + terminator
1c088fa8 306
734f90bb 307 self._write_string(output, self._screen_file)
8222d8de
JMF
308
309 def to_stderr(self, message):
310 """Print message to stderr."""
11b85ce6 311 assert isinstance(message, compat_str)
8bf9319e 312 if self.params.get('logger'):
43afe285
IB
313 self.params['logger'].error(message)
314 else:
1c088fa8 315 message = self._bidi_workaround(message)
6febd1c1 316 output = message + '\n'
734f90bb 317 self._write_string(output, self._err_file)
8222d8de 318
1e5b9a95
PH
319 def to_console_title(self, message):
320 if not self.params.get('consoletitle', False):
321 return
322 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
323 # c_wchar_p() might not be necessary if `message` is
324 # already of type unicode()
325 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
326 elif 'TERM' in os.environ:
734f90bb 327 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 328
bdde425c
PH
329 def save_console_title(self):
330 if not self.params.get('consoletitle', False):
331 return
332 if 'TERM' in os.environ:
efd6c574 333 # Save the title on stack
734f90bb 334 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
335
336 def restore_console_title(self):
337 if not self.params.get('consoletitle', False):
338 return
339 if 'TERM' in os.environ:
efd6c574 340 # Restore the title from stack
734f90bb 341 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
342
343 def __enter__(self):
344 self.save_console_title()
345 return self
346
347 def __exit__(self, *args):
348 self.restore_console_title()
f89197d7 349
dca08720
PH
350 if self.params.get('cookiefile') is not None:
351 self.cookiejar.save()
bdde425c 352
8222d8de
JMF
353 def trouble(self, message=None, tb=None):
354 """Determine action to take when a download problem appears.
355
356 Depending on if the downloader has been configured to ignore
357 download errors or not, this method may throw an exception or
358 not when errors are found, after printing the message.
359
360 tb, if given, is additional traceback information.
361 """
362 if message is not None:
363 self.to_stderr(message)
364 if self.params.get('verbose'):
365 if tb is None:
366 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 367 tb = ''
8222d8de 368 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 369 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
370 tb += compat_str(traceback.format_exc())
371 else:
372 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 373 tb = ''.join(tb_data)
8222d8de
JMF
374 self.to_stderr(tb)
375 if not self.params.get('ignoreerrors', False):
376 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
377 exc_info = sys.exc_info()[1].exc_info
378 else:
379 exc_info = sys.exc_info()
380 raise DownloadError(message, exc_info)
381 self._download_retcode = 1
382
383 def report_warning(self, message):
384 '''
385 Print the message to stderr, it will be prefixed with 'WARNING:'
386 If stderr is a tty file the 'WARNING:' will be colored
387 '''
6d07ce01
JMF
388 if self.params.get('logger') is not None:
389 self.params['logger'].warning(message)
8222d8de 390 else:
ad8915b7
PH
391 if self.params.get('no_warnings'):
392 return
6d07ce01
JMF
393 if self._err_file.isatty() and os.name != 'nt':
394 _msg_header = '\033[0;33mWARNING:\033[0m'
395 else:
396 _msg_header = 'WARNING:'
397 warning_message = '%s %s' % (_msg_header, message)
398 self.to_stderr(warning_message)
8222d8de
JMF
399
400 def report_error(self, message, tb=None):
401 '''
402 Do the same as trouble, but prefixes the message with 'ERROR:', colored
403 in red if stderr is a tty file.
404 '''
0783b09b 405 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 406 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 407 else:
6febd1c1
PH
408 _msg_header = 'ERROR:'
409 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
410 self.trouble(error_message, tb)
411
8222d8de
JMF
412 def report_file_already_downloaded(self, file_name):
413 """Report file has already been fully downloaded."""
414 try:
6febd1c1 415 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 416 except UnicodeEncodeError:
6febd1c1 417 self.to_screen('[download] The file has already been downloaded')
8222d8de 418
8222d8de
JMF
419 def prepare_filename(self, info_dict):
420 """Generate the output filename."""
421 try:
422 template_dict = dict(info_dict)
423
424 template_dict['epoch'] = int(time.time())
425 autonumber_size = self.params.get('autonumber_size')
426 if autonumber_size is None:
427 autonumber_size = 5
6febd1c1 428 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 429 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 430 if template_dict.get('playlist_index') is not None:
c6b4132a 431 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
432 if template_dict.get('resolution') is None:
433 if template_dict.get('width') and template_dict.get('height'):
434 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
435 elif template_dict.get('height'):
805ef3c6 436 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 437 elif template_dict.get('width'):
805ef3c6 438 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 439
586a91b6 440 sanitize = lambda k, v: sanitize_filename(
45598aab 441 compat_str(v),
8222d8de 442 restricted=self.params.get('restrictfilenames'),
6febd1c1 443 is_id=(k == 'id'))
586a91b6 444 template_dict = dict((k, sanitize(k, v))
45598aab
PH
445 for k, v in template_dict.items()
446 if v is not None)
6febd1c1 447 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 448
acd69589
PH
449 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
450 tmpl = os.path.expanduser(outtmpl)
586a91b6 451 filename = tmpl % template_dict
8222d8de 452 return filename
8222d8de 453 except ValueError as err:
6febd1c1 454 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
455 return None
456
457 def _match_entry(self, info_dict):
458 """ Returns None iff the file should be downloaded """
459
6febd1c1 460 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
461 if 'title' in info_dict:
462 # This can happen when we're just evaluating the playlist
463 title = info_dict['title']
464 matchtitle = self.params.get('matchtitle', False)
465 if matchtitle:
466 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 467 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
468 rejecttitle = self.params.get('rejecttitle', False)
469 if rejecttitle:
470 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 471 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
472 date = info_dict.get('upload_date', None)
473 if date is not None:
474 dateRange = self.params.get('daterange', DateRange())
475 if date not in dateRange:
6febd1c1 476 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
477 view_count = info_dict.get('view_count', None)
478 if view_count is not None:
479 min_views = self.params.get('min_views')
480 if min_views is not None and view_count < min_views:
6febd1c1 481 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
482 max_views = self.params.get('max_views')
483 if max_views is not None and view_count > max_views:
6febd1c1 484 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
485 age_limit = self.params.get('age_limit')
486 if age_limit is not None:
be843678
PH
487 actual_age_limit = info_dict.get('age_limit')
488 if actual_age_limit is None:
489 actual_age_limit = 0
490 if age_limit < actual_age_limit:
6febd1c1 491 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 492 if self.in_download_archive(info_dict):
6febd1c1 493 return '%s has already been recorded in archive' % video_title
8222d8de 494 return None
fe7e0c98 495
b6c45014
JMF
496 @staticmethod
497 def add_extra_info(info_dict, extra_info):
498 '''Set the keys from extra_info in info dict if they are missing'''
499 for key, value in extra_info.items():
500 info_dict.setdefault(key, value)
501
7fc3fa05
PH
502 def extract_info(self, url, download=True, ie_key=None, extra_info={},
503 process=True):
8222d8de
JMF
504 '''
505 Returns a list with a dictionary for each video we find.
506 If 'download', also downloads the videos.
507 extra_info is a dict containing the extra values to add to each result
508 '''
fe7e0c98 509
8222d8de 510 if ie_key:
56c73665 511 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
512 else:
513 ies = self._ies
514
515 for ie in ies:
516 if not ie.suitable(url):
517 continue
518
519 if not ie.working():
6febd1c1
PH
520 self.report_warning('The program functionality for this site has been marked as broken, '
521 'and will probably not work.')
8222d8de
JMF
522
523 try:
524 ie_result = ie.extract(url)
525 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
526 break
527 if isinstance(ie_result, list):
528 # Backwards compatibility: old IE result format
8222d8de
JMF
529 ie_result = {
530 '_type': 'compat_list',
531 'entries': ie_result,
532 }
ea38e55f 533 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
534 if process:
535 return self.process_ie_result(ie_result, download, extra_info)
536 else:
537 return ie_result
8222d8de
JMF
538 except ExtractorError as de: # An error we somewhat expected
539 self.report_error(compat_str(de), de.format_traceback())
540 break
d3e5bbf4
PH
541 except MaxDownloadsReached:
542 raise
8222d8de
JMF
543 except Exception as e:
544 if self.params.get('ignoreerrors', False):
545 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
546 break
547 else:
548 raise
549 else:
1a489545 550 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 551
ea38e55f
PH
552 def add_default_extra_info(self, ie_result, ie, url):
553 self.add_extra_info(ie_result, {
554 'extractor': ie.IE_NAME,
555 'webpage_url': url,
556 'webpage_url_basename': url_basename(url),
557 'extractor_key': ie.ie_key(),
558 })
559
8222d8de
JMF
560 def process_ie_result(self, ie_result, download=True, extra_info={}):
561 """
562 Take the result of the ie(may be modified) and resolve all unresolved
563 references (URLs, playlist items).
564
565 It will also download the videos if 'download'.
566 Returns the resolved ie_result.
567 """
568
e8ee972c
PH
569 result_type = ie_result.get('_type', 'video')
570
571 if self.params.get('extract_flat', False):
572 if result_type in ('url', 'url_transparent'):
573 return ie_result
574
8222d8de 575 if result_type == 'video':
b6c45014 576 self.add_extra_info(ie_result, extra_info)
feee2ecf 577 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
578 elif result_type == 'url':
579 # We have to add extra_info to the results because it may be
580 # contained in a playlist
581 return self.extract_info(ie_result['url'],
582 download,
583 ie_key=ie_result.get('ie_key'),
584 extra_info=extra_info)
7fc3fa05
PH
585 elif result_type == 'url_transparent':
586 # Use the information from the embedding page
587 info = self.extract_info(
588 ie_result['url'], ie_key=ie_result.get('ie_key'),
589 extra_info=extra_info, download=False, process=False)
590
591 def make_result(embedded_info):
592 new_result = ie_result.copy()
593 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 594 'entries', 'ie_key', 'duration',
ef4fd848
PH
595 'subtitles', 'annotations', 'format',
596 'thumbnail', 'thumbnails'):
7fc3fa05
PH
597 if f in new_result:
598 del new_result[f]
599 if f in embedded_info:
600 new_result[f] = embedded_info[f]
601 return new_result
602 new_result = make_result(info)
603
604 assert new_result.get('_type') != 'url_transparent'
605 if new_result.get('_type') == 'compat_list':
606 new_result['entries'] = [
607 make_result(e) for e in new_result['entries']]
608
609 return self.process_ie_result(
610 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
611 elif result_type == 'playlist':
612 # We process each entry in the playlist
613 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 614 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
615
616 playlist_results = []
617
8222d8de 618 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
619 playlistend = self.params.get('playlistend', None)
620 # For backwards compatibility, interpret -1 as whole list
8222d8de 621 if playlistend == -1:
a19fd00c 622 playlistend = None
8222d8de 623
b7ab0590
PH
624 if isinstance(ie_result['entries'], list):
625 n_all_entries = len(ie_result['entries'])
626 entries = ie_result['entries'][playliststart:playlistend]
627 n_entries = len(entries)
628 self.to_screen(
629 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
630 (ie_result['extractor'], playlist, n_all_entries, n_entries))
631 else:
632 assert isinstance(ie_result['entries'], PagedList)
633 entries = ie_result['entries'].getslice(
634 playliststart, playlistend)
635 n_entries = len(entries)
636 self.to_screen(
637 "[%s] playlist %s: Downloading %d videos" %
638 (ie_result['extractor'], playlist, n_entries))
8222d8de 639
fe7e0c98 640 for i, entry in enumerate(entries, 1):
6febd1c1 641 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 642 extra = {
c6b4132a 643 'n_entries': n_entries,
fe7e0c98
JMF
644 'playlist': playlist,
645 'playlist_index': i + playliststart,
b6c45014 646 'extractor': ie_result['extractor'],
9103bbc5 647 'webpage_url': ie_result['webpage_url'],
29eb5174 648 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 649 'extractor_key': ie_result['extractor_key'],
fe7e0c98 650 }
7012b23c
PH
651
652 reason = self._match_entry(entry)
653 if reason is not None:
6febd1c1 654 self.to_screen('[download] ' + reason)
7012b23c
PH
655 continue
656
8222d8de
JMF
657 entry_result = self.process_ie_result(entry,
658 download=download,
659 extra_info=extra)
660 playlist_results.append(entry_result)
661 ie_result['entries'] = playlist_results
662 return ie_result
663 elif result_type == 'compat_list':
664 def _fixup(r):
b6c45014 665 self.add_extra_info(r,
9103bbc5
JMF
666 {
667 'extractor': ie_result['extractor'],
668 'webpage_url': ie_result['webpage_url'],
29eb5174 669 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 670 'extractor_key': ie_result['extractor_key'],
9103bbc5 671 })
8222d8de
JMF
672 return r
673 ie_result['entries'] = [
b6c45014 674 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
675 for r in ie_result['entries']
676 ]
677 return ie_result
678 else:
679 raise Exception('Invalid result type: %s' % result_type)
680
a9c58ad9
JMF
681 def select_format(self, format_spec, available_formats):
682 if format_spec == 'best' or format_spec is None:
683 return available_formats[-1]
684 elif format_spec == 'worst':
685 return available_formats[0]
ba7678f9
PH
686 elif format_spec == 'bestaudio':
687 audio_formats = [
688 f for f in available_formats
689 if f.get('vcodec') == 'none']
690 if audio_formats:
691 return audio_formats[-1]
692 elif format_spec == 'worstaudio':
693 audio_formats = [
694 f for f in available_formats
695 if f.get('vcodec') == 'none']
696 if audio_formats:
697 return audio_formats[0]
bc6d5978
JMF
698 elif format_spec == 'bestvideo':
699 video_formats = [
700 f for f in available_formats
701 if f.get('acodec') == 'none']
702 if video_formats:
703 return video_formats[-1]
704 elif format_spec == 'worstvideo':
705 video_formats = [
706 f for f in available_formats
707 if f.get('acodec') == 'none']
708 if video_formats:
709 return video_formats[0]
a9c58ad9 710 else:
e2e5dae6 711 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
712 if format_spec in extensions:
713 filter_f = lambda f: f['ext'] == format_spec
714 else:
715 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 716 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
717 if matches:
718 return matches[-1]
719 return None
720
dd82ffea
JMF
721 def process_video_result(self, info_dict, download=True):
722 assert info_dict.get('_type', 'video') == 'video'
723
bec1fad2
PH
724 if 'id' not in info_dict:
725 raise ExtractorError('Missing "id" field in extractor result')
726 if 'title' not in info_dict:
727 raise ExtractorError('Missing "title" field in extractor result')
728
dd82ffea
JMF
729 if 'playlist' not in info_dict:
730 # It isn't part of a playlist
731 info_dict['playlist'] = None
732 info_dict['playlist_index'] = None
733
d5519808
PH
734 thumbnails = info_dict.get('thumbnails')
735 if thumbnails:
be6d7229
PH
736 thumbnails.sort(key=lambda t: (
737 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
738 for t in thumbnails:
739 if 'width' in t and 'height' in t:
740 t['resolution'] = '%dx%d' % (t['width'], t['height'])
741
742 if thumbnails and 'thumbnail' not in info_dict:
743 info_dict['thumbnail'] = thumbnails[-1]['url']
744
c9ae7b95 745 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
746 info_dict['display_id'] = info_dict['id']
747
955c4514 748 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
9d2ecdbc 749 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 750 info_dict['timestamp'])
9d2ecdbc
PH
751 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
752
6ff000b8 753 # This extractors handle format selection themselves
6febd1c1 754 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
755 if download:
756 self.process_info(info_dict)
6ff000b8
JMF
757 return info_dict
758
dd82ffea
JMF
759 # We now pick which formats have to be downloaded
760 if info_dict.get('formats') is None:
761 # There's only one format available
762 formats = [info_dict]
763 else:
764 formats = info_dict['formats']
765
db95dc13
PH
766 if not formats:
767 raise ExtractorError('No video formats found!')
768
dd82ffea 769 # We check that all the formats have the format and format_id fields
db95dc13 770 for i, format in enumerate(formats):
bec1fad2
PH
771 if 'url' not in format:
772 raise ExtractorError('Missing "url" key in result (index %d)' % i)
773
dd82ffea 774 if format.get('format_id') is None:
8016c922 775 format['format_id'] = compat_str(i)
8c51aa65 776 if format.get('format') is None:
6febd1c1 777 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
778 id=format['format_id'],
779 res=self.format_resolution(format),
6febd1c1 780 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 781 )
c1002e96
PH
782 # Automatically determine file extension if missing
783 if 'ext' not in format:
cce929ea 784 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 785
99e206d5
JMF
786 format_limit = self.params.get('format_limit', None)
787 if format_limit:
f4d96df0
PH
788 formats = list(takewhile_inclusive(
789 lambda f: f['format_id'] != format_limit, formats
790 ))
4bcc7bd1
PH
791
792 # TODO Central sorting goes here
99e206d5 793
f89197d7 794 if formats[0] is not info_dict:
b3d9ef88
JMF
795 # only set the 'formats' fields if the original info_dict list them
796 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 797 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
798 # wich can't be exported to json
799 info_dict['formats'] = formats
bfaae0a7 800 if self.params.get('listformats', None):
801 self.list_formats(info_dict)
802 return
803
de3ef3ed 804 req_format = self.params.get('format')
a9c58ad9
JMF
805 if req_format is None:
806 req_format = 'best'
dd82ffea 807 formats_to_download = []
dd82ffea 808 # The -1 is for supporting YoutubeIE
a9c58ad9 809 if req_format in ('-1', 'all'):
dd82ffea
JMF
810 formats_to_download = formats
811 else:
1de33faf
PH
812 for rfstr in req_format.split(','):
813 # We can accept formats requested in the format: 34/5/best, we pick
814 # the first that is available, starting from left
815 req_formats = rfstr.split('/')
816 for rf in req_formats:
817 if re.match(r'.+?\+.+?', rf) is not None:
818 # Two formats have been requested like '137+139'
819 format_1, format_2 = rf.split('+')
820 formats_info = (self.select_format(format_1, formats),
821 self.select_format(format_2, formats))
822 if all(formats_info):
823 selected_format = {
824 'requested_formats': formats_info,
825 'format': rf,
826 'ext': formats_info[0]['ext'],
827 }
828 else:
829 selected_format = None
6350728b 830 else:
1de33faf
PH
831 selected_format = self.select_format(rf, formats)
832 if selected_format is not None:
833 formats_to_download.append(selected_format)
834 break
dd82ffea 835 if not formats_to_download:
6febd1c1 836 raise ExtractorError('requested format not available',
78a3a9f8 837 expected=True)
dd82ffea
JMF
838
839 if download:
840 if len(formats_to_download) > 1:
6febd1c1 841 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
842 for format in formats_to_download:
843 new_info = dict(info_dict)
844 new_info.update(format)
845 self.process_info(new_info)
846 # We update the info dict with the best quality format (backwards compatibility)
847 info_dict.update(formats_to_download[-1])
848 return info_dict
849
8222d8de
JMF
850 def process_info(self, info_dict):
851 """Process a single resolved IE result."""
852
853 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
854
855 max_downloads = self.params.get('max_downloads')
856 if max_downloads is not None:
857 if self._num_downloads >= int(max_downloads):
858 raise MaxDownloadsReached()
8222d8de
JMF
859
860 info_dict['fulltitle'] = info_dict['title']
861 if len(info_dict['title']) > 200:
6febd1c1 862 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
863
864 # Keep for backwards compatibility
865 info_dict['stitle'] = info_dict['title']
866
11b85ce6 867 if 'format' not in info_dict:
8222d8de
JMF
868 info_dict['format'] = info_dict['ext']
869
870 reason = self._match_entry(info_dict)
871 if reason is not None:
6febd1c1 872 self.to_screen('[download] ' + reason)
8222d8de
JMF
873 return
874
fd288278 875 self._num_downloads += 1
8222d8de
JMF
876
877 filename = self.prepare_filename(info_dict)
878
879 # Forced printings
880 if self.params.get('forcetitle', False):
0783b09b 881 self.to_stdout(info_dict['fulltitle'])
8222d8de 882 if self.params.get('forceid', False):
0783b09b 883 self.to_stdout(info_dict['id'])
8222d8de 884 if self.params.get('forceurl', False):
edde6c56 885 # For RTMP URLs, also include the playpath
6febd1c1 886 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 887 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 888 self.to_stdout(info_dict['thumbnail'])
216d71d0 889 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 890 self.to_stdout(info_dict['description'])
8222d8de 891 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 892 self.to_stdout(filename)
525ef922
PH
893 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
894 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 895 if self.params.get('forceformat', False):
0783b09b 896 self.to_stdout(info_dict['format'])
9d153818 897 if self.params.get('forcejson', False):
a0d96c98 898 info_dict['_filename'] = filename
0783b09b 899 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
900
901 # Do nothing else if in simulate mode
902 if self.params.get('simulate', False):
903 return
904
905 if filename is None:
906 return
907
908 try:
909 dn = os.path.dirname(encodeFilename(filename))
d26e981d 910 if dn and not os.path.exists(dn):
8222d8de
JMF
911 os.makedirs(dn)
912 except (OSError, IOError) as err:
6febd1c1 913 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
914 return
915
916 if self.params.get('writedescription', False):
6febd1c1 917 descfn = filename + '.description'
7b6fefc9 918 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 919 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
920 else:
921 try:
6febd1c1 922 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
923 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
924 descfile.write(info_dict['description'])
925 except (KeyError, TypeError):
6febd1c1 926 self.report_warning('There\'s no description to write.')
7b6fefc9 927 except (OSError, IOError):
6febd1c1 928 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 929 return
8222d8de 930
1fb07d10 931 if self.params.get('writeannotations', False):
6febd1c1 932 annofn = filename + '.annotations.xml'
7b6fefc9 933 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 934 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
935 else:
936 try:
6febd1c1 937 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
938 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
939 annofile.write(info_dict['annotations'])
940 except (KeyError, TypeError):
6febd1c1 941 self.report_warning('There are no annotations to write.')
7b6fefc9 942 except (OSError, IOError):
6febd1c1 943 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 944 return
1fb07d10 945
c4a91be7 946 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 947 self.params.get('writeautomaticsub')])
c4a91be7 948
fe7e0c98 949 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
950 # subtitles download errors are already managed as troubles in relevant IE
951 # that way it will silently go on when used with unsupporting IE
8222d8de 952 subtitles = info_dict['subtitles']
ca715127 953 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
954 for sub_lang in subtitles.keys():
955 sub = subtitles[sub_lang]
6804038d
JMF
956 if sub is None:
957 continue
8222d8de 958 try:
d4051a8e 959 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 960 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 961 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 962 else:
6febd1c1 963 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9
PH
964 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
965 subfile.write(sub)
8222d8de 966 except (OSError, IOError):
e4db1951 967 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
968 return
969
8222d8de 970 if self.params.get('writeinfojson', False):
6febd1c1 971 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 972 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 973 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 974 else:
6febd1c1 975 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 976 try:
1538eff6 977 write_json_file(info_dict, encodeFilename(infofn))
7b6fefc9 978 except (OSError, IOError):
6febd1c1 979 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 980 return
8222d8de
JMF
981
982 if self.params.get('writethumbnail', False):
d8269e1d 983 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
984 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
985 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 986 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 987 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
988 (info_dict['extractor'], info_dict['id']))
989 else:
6febd1c1 990 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
991 (info_dict['extractor'], info_dict['id']))
992 try:
e9c092f1 993 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
994 with open(thumb_filename, 'wb') as thumbf:
995 shutil.copyfileobj(uf, thumbf)
6febd1c1 996 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
7b6fefc9
PH
997 (info_dict['extractor'], info_dict['id'], thumb_filename))
998 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 999 self.report_warning('Unable to download thumbnail "%s": %s' %
7b6fefc9 1000 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1001
1002 if not self.params.get('skip_download', False):
1003 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1004 success = True
1005 else:
1006 try:
6350728b
JMF
1007 def dl(name, info):
1008 fd = get_suitable_downloader(info)(self, self.params)
1009 for ph in self._progress_hooks:
1010 fd.add_progress_hook(ph)
8d5797b0
PH
1011 if self.params.get('verbose'):
1012 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1013 return fd.download(name, info)
1014 if info_dict.get('requested_formats') is not None:
1015 downloaded = []
1016 success = True
b7f81164 1017 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
58c3c7ae
JMF
1018 if not merger._get_executable():
1019 postprocessors = []
1020 self.report_warning('You have requested multiple '
1021 'formats but ffmpeg or avconv are not installed.'
1022 ' The formats won\'t be merged')
1023 else:
1024 postprocessors = [merger]
6350728b
JMF
1025 for f in info_dict['requested_formats']:
1026 new_info = dict(info_dict)
1027 new_info.update(f)
1028 fname = self.prepare_filename(new_info)
1029 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1030 downloaded.append(fname)
1031 partial_success = dl(fname, new_info)
1032 success = success and partial_success
58c3c7ae 1033 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1034 info_dict['__files_to_merge'] = downloaded
1035 else:
1036 # Just a single file
1037 success = dl(filename, info_dict)
8222d8de 1038 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1039 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1040 return
c40c6aaa
JMF
1041 except (OSError, IOError) as err:
1042 raise UnavailableVideoError(err)
8222d8de 1043 except (ContentTooShortError, ) as err:
6febd1c1 1044 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1045 return
1046
1047 if success:
1048 try:
1049 self.post_process(filename, info_dict)
1050 except (PostProcessingError) as err:
6febd1c1 1051 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
1052 return
1053
c1c9a79c
PH
1054 self.record_download_archive(info_dict)
1055
8222d8de
JMF
1056 def download(self, url_list):
1057 """Download a given list of URLs."""
acd69589 1058 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1059 if (len(url_list) > 1 and
acd69589 1060 '%' not in outtmpl
0c75c3fa 1061 and self.params.get('max_downloads') != 1):
acd69589 1062 raise SameFileError(outtmpl)
8222d8de
JMF
1063
1064 for url in url_list:
1065 try:
1066 #It also downloads the videos
dca08720 1067 self.extract_info(url)
8222d8de 1068 except UnavailableVideoError:
6febd1c1 1069 self.report_error('unable to download video')
8222d8de 1070 except MaxDownloadsReached:
6febd1c1 1071 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de
JMF
1072 raise
1073
1074 return self._download_retcode
1075
1dcc4c0c 1076 def download_with_info_file(self, info_filename):
395293a8 1077 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1078 info = json.load(f)
d4943898
JMF
1079 try:
1080 self.process_ie_result(info, download=True)
1081 except DownloadError:
1082 webpage_url = info.get('webpage_url')
1083 if webpage_url is not None:
6febd1c1 1084 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1085 return self.download([webpage_url])
1086 else:
1087 raise
1088 return self._download_retcode
1dcc4c0c 1089
8222d8de
JMF
1090 def post_process(self, filename, ie_info):
1091 """Run all the postprocessors on the given file."""
1092 info = dict(ie_info)
1093 info['filepath'] = filename
1094 keep_video = None
6350728b
JMF
1095 pps_chain = []
1096 if ie_info.get('__postprocessors') is not None:
1097 pps_chain.extend(ie_info['__postprocessors'])
1098 pps_chain.extend(self._pps)
1099 for pp in pps_chain:
8222d8de 1100 try:
fe7e0c98 1101 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1102 if keep_video_wish is not None:
1103 if keep_video_wish:
1104 keep_video = keep_video_wish
1105 elif keep_video is None:
1106 # No clear decision yet, let IE decide
1107 keep_video = keep_video_wish
1108 except PostProcessingError as e:
bbcbf4d4 1109 self.report_error(e.msg)
8222d8de
JMF
1110 if keep_video is False and not self.params.get('keepvideo', False):
1111 try:
6febd1c1 1112 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1113 os.remove(encodeFilename(filename))
1114 except (IOError, OSError):
6febd1c1 1115 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1116
5db07df6
PH
1117 def _make_archive_id(self, info_dict):
1118 # Future-proof against any change in case
1119 # and backwards compatibility with prior versions
d31209a1 1120 extractor = info_dict.get('extractor_key')
7012b23c
PH
1121 if extractor is None:
1122 if 'id' in info_dict:
1123 extractor = info_dict.get('ie_key') # key in a playlist
1124 if extractor is None:
5db07df6 1125 return None # Incomplete video information
6febd1c1 1126 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1127
1128 def in_download_archive(self, info_dict):
1129 fn = self.params.get('download_archive')
1130 if fn is None:
1131 return False
1132
1133 vid_id = self._make_archive_id(info_dict)
1134 if vid_id is None:
7012b23c 1135 return False # Incomplete video information
5db07df6 1136
c1c9a79c
PH
1137 try:
1138 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1139 for line in archive_file:
1140 if line.strip() == vid_id:
1141 return True
1142 except IOError as ioe:
1143 if ioe.errno != errno.ENOENT:
1144 raise
1145 return False
1146
1147 def record_download_archive(self, info_dict):
1148 fn = self.params.get('download_archive')
1149 if fn is None:
1150 return
5db07df6
PH
1151 vid_id = self._make_archive_id(info_dict)
1152 assert vid_id
c1c9a79c 1153 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1154 archive_file.write(vid_id + '\n')
dd82ffea 1155
8c51aa65 1156 @staticmethod
8abeeb94 1157 def format_resolution(format, default='unknown'):
fb04e403
PH
1158 if format.get('vcodec') == 'none':
1159 return 'audio only'
f49d89ee
PH
1160 if format.get('resolution') is not None:
1161 return format['resolution']
8c51aa65
JMF
1162 if format.get('height') is not None:
1163 if format.get('width') is not None:
6febd1c1 1164 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1165 else:
6febd1c1 1166 res = '%sp' % format['height']
f49d89ee 1167 elif format.get('width') is not None:
6febd1c1 1168 res = '?x%d' % format['width']
8c51aa65 1169 else:
8abeeb94 1170 res = default
8c51aa65
JMF
1171 return res
1172
c57f7757
PH
1173 def _format_note(self, fdict):
1174 res = ''
1175 if fdict.get('ext') in ['f4f', 'f4m']:
1176 res += '(unsupported) '
1177 if fdict.get('format_note') is not None:
1178 res += fdict['format_note'] + ' '
1179 if fdict.get('tbr') is not None:
1180 res += '%4dk ' % fdict['tbr']
1181 if fdict.get('container') is not None:
1182 if res:
1183 res += ', '
1184 res += '%s container' % fdict['container']
1185 if (fdict.get('vcodec') is not None and
1186 fdict.get('vcodec') != 'none'):
1187 if res:
1188 res += ', '
1189 res += fdict['vcodec']
91c7271a 1190 if fdict.get('vbr') is not None:
c57f7757
PH
1191 res += '@'
1192 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1193 res += 'video@'
1194 if fdict.get('vbr') is not None:
1195 res += '%4dk' % fdict['vbr']
1196 if fdict.get('acodec') is not None:
1197 if res:
1198 res += ', '
1199 if fdict['acodec'] == 'none':
1200 res += 'video only'
1201 else:
1202 res += '%-5s' % fdict['acodec']
1203 elif fdict.get('abr') is not None:
1204 if res:
1205 res += ', '
1206 res += 'audio'
1207 if fdict.get('abr') is not None:
1208 res += '@%3dk' % fdict['abr']
1209 if fdict.get('asr') is not None:
1210 res += ' (%5dHz)' % fdict['asr']
1211 if fdict.get('filesize') is not None:
1212 if res:
1213 res += ', '
1214 res += format_bytes(fdict['filesize'])
9732d77e
PH
1215 elif fdict.get('filesize_approx') is not None:
1216 if res:
1217 res += ', '
1218 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1219 return res
91c7271a 1220
c57f7757 1221 def list_formats(self, info_dict):
02dbf93f 1222 def line(format, idlen=20):
6febd1c1 1223 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1224 format['format_id'],
1225 format['ext'],
8c51aa65 1226 self.format_resolution(format),
c57f7757 1227 self._format_note(format),
02dbf93f 1228 ))
57dd9a8f 1229
94badb25 1230 formats = info_dict.get('formats', [info_dict])
6febd1c1 1231 idlen = max(len('format code'),
02dbf93f
PH
1232 max(len(f['format_id']) for f in formats))
1233 formats_s = [line(f, idlen) for f in formats]
94badb25 1234 if len(formats) > 1:
c57f7757
PH
1235 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1236 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1237
1238 header_line = line({
6febd1c1
PH
1239 'format_id': 'format code', 'ext': 'extension',
1240 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1241 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1242 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1243
1244 def urlopen(self, req):
1245 """ Start an HTTP download """
37419b4f 1246
d05cfe06
S
1247 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1248 # always respected by websites, some tend to give out URLs with non percent-encoded
1249 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1250 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1251 # To work around aforementioned issue we will replace request's original URL with
1252 # percent-encoded one
ee0d9070 1253 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1254 url = req if req_is_string else req.get_full_url()
d05cfe06 1255 url_escaped = escape_url(url)
37419b4f
S
1256
1257 # Substitute URL if any change after escaping
1258 if url != url_escaped:
68b09730 1259 if req_is_string:
37419b4f
S
1260 req = url_escaped
1261 else:
1262 req = compat_urllib_request.Request(
1263 url_escaped, data=req.data, headers=req.headers,
1264 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1265
19a41fc6 1266 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1267
1268 def print_debug_header(self):
1269 if not self.params.get('verbose'):
1270 return
62fec3b2 1271
4192b51c
PH
1272 if type('') is not compat_str:
1273 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1274 self.report_warning(
1275 'Your Python is broken! Update to a newer and supported version')
1276
b0472057 1277 encoding_str = (
734f90bb
PH
1278 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1279 locale.getpreferredencoding(),
1280 sys.getfilesystemencoding(),
1281 sys.stdout.encoding,
b0472057 1282 self.get_encoding()))
4192b51c 1283 write_string(encoding_str, encoding=None)
734f90bb
PH
1284
1285 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1286 try:
1287 sp = subprocess.Popen(
1288 ['git', 'rev-parse', '--short', 'HEAD'],
1289 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1290 cwd=os.path.dirname(os.path.abspath(__file__)))
1291 out, err = sp.communicate()
1292 out = out.decode().strip()
1293 if re.match('[0-9a-f]+', out):
734f90bb 1294 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1295 except:
1296 try:
1297 sys.exc_clear()
1298 except:
1299 pass
734f90bb 1300 self._write_string('[debug] Python version %s - %s' %
6febd1c1 1301 (platform.python_version(), platform_name()) + '\n')
dca08720
PH
1302
1303 proxy_map = {}
1304 for handler in self._opener.handlers:
1305 if hasattr(handler, 'proxies'):
1306 proxy_map.update(handler.proxies)
734f90bb 1307 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1308
e344693b 1309 def _setup_opener(self):
6ad14cab 1310 timeout_val = self.params.get('socket_timeout')
19a41fc6 1311 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1312
dca08720
PH
1313 opts_cookiefile = self.params.get('cookiefile')
1314 opts_proxy = self.params.get('proxy')
1315
1316 if opts_cookiefile is None:
1317 self.cookiejar = compat_cookiejar.CookieJar()
1318 else:
1319 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1320 opts_cookiefile)
1321 if os.access(opts_cookiefile, os.R_OK):
1322 self.cookiejar.load()
1323
1324 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1325 self.cookiejar)
1326 if opts_proxy is not None:
1327 if opts_proxy == '':
1328 proxies = {}
1329 else:
1330 proxies = {'http': opts_proxy, 'https': opts_proxy}
1331 else:
1332 proxies = compat_urllib_request.getproxies()
1333 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1334 if 'http' in proxies and 'https' not in proxies:
1335 proxies['https'] = proxies['http']
1336 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1337
1338 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1339 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1340 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1341 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1342 opener = compat_urllib_request.build_opener(
a0ddb8a2 1343 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1344 # Delete the default user-agent header, which would otherwise apply in
1345 # cases where our custom HTTP handler doesn't come into play
1346 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1347 opener.addheaders = []
1348 self._opener = opener
62fec3b2
PH
1349
1350 def encode(self, s):
1351 if isinstance(s, bytes):
1352 return s # Already encoded
1353
1354 try:
1355 return s.encode(self.get_encoding())
1356 except UnicodeEncodeError as err:
1357 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1358 raise
1359
1360 def get_encoding(self):
1361 encoding = self.params.get('encoding')
1362 if encoding is None:
1363 encoding = preferredencoding()
1364 return encoding