]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[adultswim] PEP8
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
62fec3b2 11import locale
8222d8de 12import os
dca08720 13import platform
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de
JMF
17import socket
18import sys
19import time
20import traceback
21
1e5b9a95
PH
22if os.name == 'nt':
23 import ctypes
24
8c25f81b 25from .compat import (
dca08720 26 compat_cookiejar,
4644ac55 27 compat_expanduser,
ce02ed60 28 compat_http_client,
ce02ed60
PH
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
8c25f81b
PH
32)
33from .utils import (
d05cfe06 34 escape_url,
ce02ed60
PH
35 ContentTooShortError,
36 date_from_str,
37 DateRange,
acd69589 38 DEFAULT_OUTTMPL,
ce02ed60
PH
39 determine_ext,
40 DownloadError,
41 encodeFilename,
42 ExtractorError,
02dbf93f 43 format_bytes,
525ef922 44 formatSeconds,
1c088fa8 45 get_term_width,
ce02ed60 46 locked_file,
dca08720 47 make_HTTPS_handler,
ce02ed60 48 MaxDownloadsReached,
b7ab0590 49 PagedList,
ce02ed60 50 PostProcessingError,
dca08720 51 platform_name,
ce02ed60
PH
52 preferredencoding,
53 SameFileError,
54 sanitize_filename,
55 subtitles_filename,
56 takewhile_inclusive,
57 UnavailableVideoError,
29eb5174 58 url_basename,
ce02ed60
PH
59 write_json_file,
60 write_string,
dca08720 61 YoutubeDLHandler,
6350728b 62 prepend_extension,
7d4111ed 63 args_to_str,
ce02ed60 64)
a0e07d31 65from .cache import Cache
023fa8c4 66from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 67from .downloader import get_suitable_downloader
4c83c967 68from .downloader.rtmp import rtmpdump_version
d28b5171 69from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
dca08720 70from .version import __version__
8222d8de
JMF
71
72
73class YoutubeDL(object):
74 """YoutubeDL class.
75
76 YoutubeDL objects are the ones responsible of downloading the
77 actual video file and writing it to disk if the user has requested
78 it, among some other tasks. In most cases there should be one per
79 program. As, given a video URL, the downloader doesn't know how to
80 extract all the needed information, task that InfoExtractors do, it
81 has to pass the URL to one of them.
82
83 For this, YoutubeDL objects have a method that allows
84 InfoExtractors to be registered in a given order. When it is passed
85 a URL, the YoutubeDL object handles it to the first InfoExtractor it
86 finds that reports being able to handle it. The InfoExtractor extracts
87 all the information about the video or videos the URL refers to, and
88 YoutubeDL process the extracted information, possibly using a File
89 Downloader to download the video.
90
91 YoutubeDL objects accept a lot of parameters. In order not to saturate
92 the object constructor with arguments, it receives a dictionary of
93 options instead. These options are available through the params
94 attribute for the InfoExtractors to use. The YoutubeDL also
95 registers itself as the downloader in charge for the InfoExtractors
96 that are added to it, so this is a "mutual registration".
97
98 Available options:
99
100 username: Username for authentication purposes.
101 password: Password for authentication purposes.
c6c19746 102 videopassword: Password for acces a video.
8222d8de
JMF
103 usenetrc: Use netrc for authentication instead.
104 verbose: Print additional info to stdout.
105 quiet: Do not print messages to stdout.
ad8915b7 106 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
107 forceurl: Force printing final URL.
108 forcetitle: Force printing title.
109 forceid: Force printing ID.
110 forcethumbnail: Force printing thumbnail URL.
111 forcedescription: Force printing description.
112 forcefilename: Force printing final filename.
525ef922 113 forceduration: Force printing duration.
8694c600 114 forcejson: Force printing info_dict as JSON.
63e0be34
PH
115 dump_single_json: Force printing the info_dict of the whole playlist
116 (or video) as a single JSON line.
8222d8de
JMF
117 simulate: Do not download the video files.
118 format: Video format code.
119 format_limit: Highest quality format to try.
120 outtmpl: Template for output names.
121 restrictfilenames: Do not allow "&" and spaces in file names
122 ignoreerrors: Do not stop on download errors.
123 nooverwrites: Prevent overwriting files.
124 playliststart: Playlist item to start at.
125 playlistend: Playlist item to end at.
126 matchtitle: Download only matching titles.
127 rejecttitle: Reject downloads for matching titles.
8bf9319e 128 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
129 logtostderr: Log messages to stderr instead of stdout.
130 writedescription: Write the video description to a .description file
131 writeinfojson: Write the video description to a .info.json file
1fb07d10 132 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
133 writethumbnail: Write the thumbnail image to a file
134 writesubtitles: Write the video subtitles to a file
b004821f 135 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 136 allsubtitles: Downloads all the subtitles of the video
0b7f3118 137 (requires writesubtitles or writeautomaticsub)
8222d8de 138 listsubtitles: Lists all available subtitles for the video
b98a6b2f 139 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 140 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
141 keepvideo: Keep the video file after post-processing
142 daterange: A DateRange object, download only if the upload_date is in the range.
143 skip_download: Skip the actual download of the video file
c35f9e72 144 cachedir: Location of the cache files in the filesystem.
a0e07d31 145 False to disable filesystem cache.
47192f92 146 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
147 age_limit: An integer representing the user's age in years.
148 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
149 min_views: An integer representing the minimum view count the video
150 must have in order to not be skipped.
151 Videos without view count information are always
152 downloaded. None for no limit.
153 max_views: An integer representing the maximum view count.
154 Videos that are more popular than that are not
155 downloaded.
156 Videos without view count information are always
157 downloaded. None for no limit.
158 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
159 Videos already present in the file are not downloaded
160 again.
dca08720 161 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 162 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
163 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
164 At the moment, this is only supported by YouTube.
a1ee09e8 165 proxy: URL of the proxy server to use
e344693b 166 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
167 bidi_workaround: Work around buggy terminals without bidirectional text
168 support, using fridibi
a0ddb8a2 169 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 170 include_ads: Download ads as well
04b4d394
PH
171 default_search: Prepend this string if an input url is not valid.
172 'auto' for elaborate guessing
62fec3b2 173 encoding: Use this encoding instead of the system-specified.
e8ee972c 174 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
175 Pass in 'in_playlist' to only show this behavior for
176 playlist items.
fe7e0c98 177
8222d8de
JMF
178 The following parameters are not used by YoutubeDL itself, they are used by
179 the FileDownloader:
180 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
181 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
182
183 The following options are used by the post processors:
184 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
185 otherwise prefer avconv.
8d31fa3c 186 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
187 """
188
189 params = None
190 _ies = []
191 _pps = []
192 _download_retcode = None
193 _num_downloads = None
194 _screen_file = None
195
3511266b 196 def __init__(self, params=None, auto_init=True):
8222d8de 197 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
198 if params is None:
199 params = {}
8222d8de 200 self._ies = []
56c73665 201 self._ies_instances = {}
8222d8de 202 self._pps = []
933605d7 203 self._progress_hooks = []
8222d8de
JMF
204 self._download_retcode = 0
205 self._num_downloads = 0
206 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 207 self._err_file = sys.stderr
e9f9a10f 208 self.params = params
a0e07d31 209 self.cache = Cache(self)
34308b30 210
0783b09b 211 if params.get('bidi_workaround', False):
1c088fa8
PH
212 try:
213 import pty
214 master, slave = pty.openpty()
215 width = get_term_width()
216 if width is None:
217 width_args = []
218 else:
219 width_args = ['-w', str(width)]
5d681e96 220 sp_kwargs = dict(
1c088fa8
PH
221 stdin=subprocess.PIPE,
222 stdout=slave,
223 stderr=self._err_file)
5d681e96
PH
224 try:
225 self._output_process = subprocess.Popen(
226 ['bidiv'] + width_args, **sp_kwargs
227 )
228 except OSError:
5d681e96
PH
229 self._output_process = subprocess.Popen(
230 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
231 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
232 except OSError as ose:
233 if ose.errno == 2:
6febd1c1 234 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
235 else:
236 raise
0783b09b 237
34308b30
PH
238 if (sys.version_info >= (3,) and sys.platform != 'win32' and
239 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 240 and not params.get('restrictfilenames', False)):
34308b30
PH
241 # On Python 3, the Unicode filesystem API will throw errors (#1474)
242 self.report_warning(
6febd1c1 243 'Assuming --restrict-filenames since file system encoding '
1b725173 244 'cannot encode all characters. '
6febd1c1 245 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 246 self.params['restrictfilenames'] = True
34308b30 247
a3927cf7 248 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 249 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 250
dca08720
PH
251 self._setup_opener()
252
3511266b
PH
253 if auto_init:
254 self.print_debug_header()
255 self.add_default_info_extractors()
256
7d4111ed
PH
257 def warn_if_short_id(self, argv):
258 # short YouTube ID starting with dash?
259 idxs = [
260 i for i, a in enumerate(argv)
261 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
262 if idxs:
263 correct_argv = (
264 ['youtube-dl'] +
265 [a for i, a in enumerate(argv) if i not in idxs] +
266 ['--'] + [argv[i] for i in idxs]
267 )
268 self.report_warning(
269 'Long argument string detected. '
270 'Use -- to separate parameters and URLs, like this:\n%s\n' %
271 args_to_str(correct_argv))
272
8222d8de
JMF
273 def add_info_extractor(self, ie):
274 """Add an InfoExtractor object to the end of the list."""
275 self._ies.append(ie)
56c73665 276 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
277 ie.set_downloader(self)
278
56c73665
JMF
279 def get_info_extractor(self, ie_key):
280 """
281 Get an instance of an IE with name ie_key, it will try to get one from
282 the _ies list, if there's no instance it will create a new one and add
283 it to the extractor list.
284 """
285 ie = self._ies_instances.get(ie_key)
286 if ie is None:
287 ie = get_info_extractor(ie_key)()
288 self.add_info_extractor(ie)
289 return ie
290
023fa8c4
JMF
291 def add_default_info_extractors(self):
292 """
293 Add the InfoExtractors returned by gen_extractors to the end of the list
294 """
295 for ie in gen_extractors():
296 self.add_info_extractor(ie)
297
8222d8de
JMF
298 def add_post_processor(self, pp):
299 """Add a PostProcessor object to the end of the chain."""
300 self._pps.append(pp)
301 pp.set_downloader(self)
302
933605d7
JMF
303 def add_progress_hook(self, ph):
304 """Add the progress hook (currently only for the file downloader)"""
305 self._progress_hooks.append(ph)
8ab470f1 306
1c088fa8 307 def _bidi_workaround(self, message):
5d681e96 308 if not hasattr(self, '_output_channel'):
1c088fa8
PH
309 return message
310
5d681e96 311 assert hasattr(self, '_output_process')
11b85ce6 312 assert isinstance(message, compat_str)
6febd1c1
PH
313 line_count = message.count('\n') + 1
314 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 315 self._output_process.stdin.flush()
6febd1c1 316 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 317 for _ in range(line_count))
6febd1c1 318 return res[:-len('\n')]
1c088fa8 319
8222d8de 320 def to_screen(self, message, skip_eol=False):
0783b09b
PH
321 """Print message to stdout if not in quiet mode."""
322 return self.to_stdout(message, skip_eol, check_quiet=True)
323
734f90bb 324 def _write_string(self, s, out=None):
b58ddb32 325 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 326
0783b09b 327 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 328 """Print message to stdout if not in quiet mode."""
8bf9319e 329 if self.params.get('logger'):
43afe285 330 self.params['logger'].debug(message)
0783b09b 331 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 332 message = self._bidi_workaround(message)
6febd1c1 333 terminator = ['\n', ''][skip_eol]
8222d8de 334 output = message + terminator
1c088fa8 335
734f90bb 336 self._write_string(output, self._screen_file)
8222d8de
JMF
337
338 def to_stderr(self, message):
339 """Print message to stderr."""
11b85ce6 340 assert isinstance(message, compat_str)
8bf9319e 341 if self.params.get('logger'):
43afe285
IB
342 self.params['logger'].error(message)
343 else:
1c088fa8 344 message = self._bidi_workaround(message)
6febd1c1 345 output = message + '\n'
734f90bb 346 self._write_string(output, self._err_file)
8222d8de 347
1e5b9a95
PH
348 def to_console_title(self, message):
349 if not self.params.get('consoletitle', False):
350 return
351 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
352 # c_wchar_p() might not be necessary if `message` is
353 # already of type unicode()
354 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
355 elif 'TERM' in os.environ:
734f90bb 356 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 357
bdde425c
PH
358 def save_console_title(self):
359 if not self.params.get('consoletitle', False):
360 return
361 if 'TERM' in os.environ:
efd6c574 362 # Save the title on stack
734f90bb 363 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
364
365 def restore_console_title(self):
366 if not self.params.get('consoletitle', False):
367 return
368 if 'TERM' in os.environ:
efd6c574 369 # Restore the title from stack
734f90bb 370 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
371
372 def __enter__(self):
373 self.save_console_title()
374 return self
375
376 def __exit__(self, *args):
377 self.restore_console_title()
f89197d7 378
dca08720
PH
379 if self.params.get('cookiefile') is not None:
380 self.cookiejar.save()
bdde425c 381
8222d8de
JMF
382 def trouble(self, message=None, tb=None):
383 """Determine action to take when a download problem appears.
384
385 Depending on if the downloader has been configured to ignore
386 download errors or not, this method may throw an exception or
387 not when errors are found, after printing the message.
388
389 tb, if given, is additional traceback information.
390 """
391 if message is not None:
392 self.to_stderr(message)
393 if self.params.get('verbose'):
394 if tb is None:
395 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 396 tb = ''
8222d8de 397 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 398 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
399 tb += compat_str(traceback.format_exc())
400 else:
401 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 402 tb = ''.join(tb_data)
8222d8de
JMF
403 self.to_stderr(tb)
404 if not self.params.get('ignoreerrors', False):
405 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
406 exc_info = sys.exc_info()[1].exc_info
407 else:
408 exc_info = sys.exc_info()
409 raise DownloadError(message, exc_info)
410 self._download_retcode = 1
411
412 def report_warning(self, message):
413 '''
414 Print the message to stderr, it will be prefixed with 'WARNING:'
415 If stderr is a tty file the 'WARNING:' will be colored
416 '''
6d07ce01
JMF
417 if self.params.get('logger') is not None:
418 self.params['logger'].warning(message)
8222d8de 419 else:
ad8915b7
PH
420 if self.params.get('no_warnings'):
421 return
6d07ce01
JMF
422 if self._err_file.isatty() and os.name != 'nt':
423 _msg_header = '\033[0;33mWARNING:\033[0m'
424 else:
425 _msg_header = 'WARNING:'
426 warning_message = '%s %s' % (_msg_header, message)
427 self.to_stderr(warning_message)
8222d8de
JMF
428
429 def report_error(self, message, tb=None):
430 '''
431 Do the same as trouble, but prefixes the message with 'ERROR:', colored
432 in red if stderr is a tty file.
433 '''
0783b09b 434 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 435 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 436 else:
6febd1c1
PH
437 _msg_header = 'ERROR:'
438 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
439 self.trouble(error_message, tb)
440
8222d8de
JMF
441 def report_file_already_downloaded(self, file_name):
442 """Report file has already been fully downloaded."""
443 try:
6febd1c1 444 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 445 except UnicodeEncodeError:
6febd1c1 446 self.to_screen('[download] The file has already been downloaded')
8222d8de 447
8222d8de
JMF
448 def prepare_filename(self, info_dict):
449 """Generate the output filename."""
450 try:
451 template_dict = dict(info_dict)
452
453 template_dict['epoch'] = int(time.time())
454 autonumber_size = self.params.get('autonumber_size')
455 if autonumber_size is None:
456 autonumber_size = 5
6febd1c1 457 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 458 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 459 if template_dict.get('playlist_index') is not None:
c6b4132a 460 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
461 if template_dict.get('resolution') is None:
462 if template_dict.get('width') and template_dict.get('height'):
463 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
464 elif template_dict.get('height'):
805ef3c6 465 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 466 elif template_dict.get('width'):
805ef3c6 467 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 468
586a91b6 469 sanitize = lambda k, v: sanitize_filename(
45598aab 470 compat_str(v),
8222d8de 471 restricted=self.params.get('restrictfilenames'),
6febd1c1 472 is_id=(k == 'id'))
586a91b6 473 template_dict = dict((k, sanitize(k, v))
45598aab
PH
474 for k, v in template_dict.items()
475 if v is not None)
6febd1c1 476 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 477
acd69589 478 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 479 tmpl = compat_expanduser(outtmpl)
586a91b6 480 filename = tmpl % template_dict
8222d8de 481 return filename
8222d8de 482 except ValueError as err:
6febd1c1 483 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
484 return None
485
486 def _match_entry(self, info_dict):
487 """ Returns None iff the file should be downloaded """
488
6febd1c1 489 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
490 if 'title' in info_dict:
491 # This can happen when we're just evaluating the playlist
492 title = info_dict['title']
493 matchtitle = self.params.get('matchtitle', False)
494 if matchtitle:
495 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 496 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
497 rejecttitle = self.params.get('rejecttitle', False)
498 if rejecttitle:
499 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 500 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
501 date = info_dict.get('upload_date', None)
502 if date is not None:
503 dateRange = self.params.get('daterange', DateRange())
504 if date not in dateRange:
6febd1c1 505 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
506 view_count = info_dict.get('view_count', None)
507 if view_count is not None:
508 min_views = self.params.get('min_views')
509 if min_views is not None and view_count < min_views:
6febd1c1 510 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
511 max_views = self.params.get('max_views')
512 if max_views is not None and view_count > max_views:
6febd1c1 513 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
514 age_limit = self.params.get('age_limit')
515 if age_limit is not None:
be843678
PH
516 actual_age_limit = info_dict.get('age_limit')
517 if actual_age_limit is None:
518 actual_age_limit = 0
519 if age_limit < actual_age_limit:
6febd1c1 520 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 521 if self.in_download_archive(info_dict):
6febd1c1 522 return '%s has already been recorded in archive' % video_title
8222d8de 523 return None
fe7e0c98 524
b6c45014
JMF
525 @staticmethod
526 def add_extra_info(info_dict, extra_info):
527 '''Set the keys from extra_info in info dict if they are missing'''
528 for key, value in extra_info.items():
529 info_dict.setdefault(key, value)
530
7fc3fa05
PH
531 def extract_info(self, url, download=True, ie_key=None, extra_info={},
532 process=True):
8222d8de
JMF
533 '''
534 Returns a list with a dictionary for each video we find.
535 If 'download', also downloads the videos.
536 extra_info is a dict containing the extra values to add to each result
537 '''
fe7e0c98 538
8222d8de 539 if ie_key:
56c73665 540 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
541 else:
542 ies = self._ies
543
544 for ie in ies:
545 if not ie.suitable(url):
546 continue
547
548 if not ie.working():
6febd1c1
PH
549 self.report_warning('The program functionality for this site has been marked as broken, '
550 'and will probably not work.')
8222d8de
JMF
551
552 try:
553 ie_result = ie.extract(url)
5f6a1245 554 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
555 break
556 if isinstance(ie_result, list):
557 # Backwards compatibility: old IE result format
8222d8de
JMF
558 ie_result = {
559 '_type': 'compat_list',
560 'entries': ie_result,
561 }
ea38e55f 562 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
563 if process:
564 return self.process_ie_result(ie_result, download, extra_info)
565 else:
566 return ie_result
5f6a1245 567 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
568 self.report_error(compat_str(de), de.format_traceback())
569 break
d3e5bbf4
PH
570 except MaxDownloadsReached:
571 raise
8222d8de
JMF
572 except Exception as e:
573 if self.params.get('ignoreerrors', False):
574 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
575 break
576 else:
577 raise
578 else:
1a489545 579 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 580
ea38e55f
PH
581 def add_default_extra_info(self, ie_result, ie, url):
582 self.add_extra_info(ie_result, {
583 'extractor': ie.IE_NAME,
584 'webpage_url': url,
585 'webpage_url_basename': url_basename(url),
586 'extractor_key': ie.ie_key(),
587 })
588
8222d8de
JMF
589 def process_ie_result(self, ie_result, download=True, extra_info={}):
590 """
591 Take the result of the ie(may be modified) and resolve all unresolved
592 references (URLs, playlist items).
593
594 It will also download the videos if 'download'.
595 Returns the resolved ie_result.
596 """
597
e8ee972c
PH
598 result_type = ie_result.get('_type', 'video')
599
057a5206
PH
600 if result_type in ('url', 'url_transparent'):
601 extract_flat = self.params.get('extract_flat', False)
602 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
603 extract_flat is True):
057a5206
PH
604 if self.params.get('forcejson', False):
605 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
606 return ie_result
607
8222d8de 608 if result_type == 'video':
b6c45014 609 self.add_extra_info(ie_result, extra_info)
feee2ecf 610 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
611 elif result_type == 'url':
612 # We have to add extra_info to the results because it may be
613 # contained in a playlist
614 return self.extract_info(ie_result['url'],
615 download,
616 ie_key=ie_result.get('ie_key'),
617 extra_info=extra_info)
7fc3fa05
PH
618 elif result_type == 'url_transparent':
619 # Use the information from the embedding page
620 info = self.extract_info(
621 ie_result['url'], ie_key=ie_result.get('ie_key'),
622 extra_info=extra_info, download=False, process=False)
623
624 def make_result(embedded_info):
625 new_result = ie_result.copy()
626 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 627 'entries', 'ie_key', 'duration',
ef4fd848
PH
628 'subtitles', 'annotations', 'format',
629 'thumbnail', 'thumbnails'):
7fc3fa05
PH
630 if f in new_result:
631 del new_result[f]
632 if f in embedded_info:
633 new_result[f] = embedded_info[f]
634 return new_result
635 new_result = make_result(info)
636
637 assert new_result.get('_type') != 'url_transparent'
638 if new_result.get('_type') == 'compat_list':
639 new_result['entries'] = [
640 make_result(e) for e in new_result['entries']]
641
642 return self.process_ie_result(
643 new_result, download=download, extra_info=extra_info)
42e12102 644 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
645 # We process each entry in the playlist
646 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 647 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
648
649 playlist_results = []
650
8222d8de 651 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
652 playlistend = self.params.get('playlistend', None)
653 # For backwards compatibility, interpret -1 as whole list
8222d8de 654 if playlistend == -1:
a19fd00c 655 playlistend = None
8222d8de 656
b7ab0590
PH
657 if isinstance(ie_result['entries'], list):
658 n_all_entries = len(ie_result['entries'])
659 entries = ie_result['entries'][playliststart:playlistend]
660 n_entries = len(entries)
661 self.to_screen(
662 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
663 (ie_result['extractor'], playlist, n_all_entries, n_entries))
664 else:
665 assert isinstance(ie_result['entries'], PagedList)
666 entries = ie_result['entries'].getslice(
667 playliststart, playlistend)
668 n_entries = len(entries)
669 self.to_screen(
670 "[%s] playlist %s: Downloading %d videos" %
671 (ie_result['extractor'], playlist, n_entries))
8222d8de 672
fe7e0c98 673 for i, entry in enumerate(entries, 1):
6febd1c1 674 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 675 extra = {
c6b4132a 676 'n_entries': n_entries,
fe7e0c98 677 'playlist': playlist,
a1cf99d0
PH
678 'playlist_id': ie_result.get('id'),
679 'playlist_title': ie_result.get('title'),
fe7e0c98 680 'playlist_index': i + playliststart,
b6c45014 681 'extractor': ie_result['extractor'],
9103bbc5 682 'webpage_url': ie_result['webpage_url'],
29eb5174 683 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 684 'extractor_key': ie_result['extractor_key'],
fe7e0c98 685 }
7012b23c
PH
686
687 reason = self._match_entry(entry)
688 if reason is not None:
6febd1c1 689 self.to_screen('[download] ' + reason)
7012b23c
PH
690 continue
691
8222d8de
JMF
692 entry_result = self.process_ie_result(entry,
693 download=download,
694 extra_info=extra)
695 playlist_results.append(entry_result)
696 ie_result['entries'] = playlist_results
697 return ie_result
698 elif result_type == 'compat_list':
c9bf4114
PH
699 self.report_warning(
700 'Extractor %s returned a compat_list result. '
701 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 702
8222d8de 703 def _fixup(r):
9e1a5b84
JW
704 self.add_extra_info(
705 r,
9103bbc5
JMF
706 {
707 'extractor': ie_result['extractor'],
708 'webpage_url': ie_result['webpage_url'],
29eb5174 709 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 710 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
711 }
712 )
8222d8de
JMF
713 return r
714 ie_result['entries'] = [
b6c45014 715 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
716 for r in ie_result['entries']
717 ]
718 return ie_result
719 else:
720 raise Exception('Invalid result type: %s' % result_type)
721
a9c58ad9
JMF
722 def select_format(self, format_spec, available_formats):
723 if format_spec == 'best' or format_spec is None:
724 return available_formats[-1]
725 elif format_spec == 'worst':
726 return available_formats[0]
ba7678f9
PH
727 elif format_spec == 'bestaudio':
728 audio_formats = [
729 f for f in available_formats
730 if f.get('vcodec') == 'none']
731 if audio_formats:
732 return audio_formats[-1]
733 elif format_spec == 'worstaudio':
734 audio_formats = [
735 f for f in available_formats
736 if f.get('vcodec') == 'none']
737 if audio_formats:
738 return audio_formats[0]
bc6d5978
JMF
739 elif format_spec == 'bestvideo':
740 video_formats = [
741 f for f in available_formats
742 if f.get('acodec') == 'none']
743 if video_formats:
744 return video_formats[-1]
745 elif format_spec == 'worstvideo':
746 video_formats = [
747 f for f in available_formats
748 if f.get('acodec') == 'none']
749 if video_formats:
750 return video_formats[0]
a9c58ad9 751 else:
e2e5dae6 752 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
753 if format_spec in extensions:
754 filter_f = lambda f: f['ext'] == format_spec
755 else:
756 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 757 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
758 if matches:
759 return matches[-1]
760 return None
761
dd82ffea
JMF
762 def process_video_result(self, info_dict, download=True):
763 assert info_dict.get('_type', 'video') == 'video'
764
bec1fad2
PH
765 if 'id' not in info_dict:
766 raise ExtractorError('Missing "id" field in extractor result')
767 if 'title' not in info_dict:
768 raise ExtractorError('Missing "title" field in extractor result')
769
dd82ffea
JMF
770 if 'playlist' not in info_dict:
771 # It isn't part of a playlist
772 info_dict['playlist'] = None
773 info_dict['playlist_index'] = None
774
d5519808
PH
775 thumbnails = info_dict.get('thumbnails')
776 if thumbnails:
be6d7229
PH
777 thumbnails.sort(key=lambda t: (
778 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
779 for t in thumbnails:
780 if 'width' in t and 'height' in t:
781 t['resolution'] = '%dx%d' % (t['width'], t['height'])
782
783 if thumbnails and 'thumbnail' not in info_dict:
784 info_dict['thumbnail'] = thumbnails[-1]['url']
785
c9ae7b95 786 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
787 info_dict['display_id'] = info_dict['id']
788
955c4514 789 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
706d7d4e
S
790 # Working around negative timestamps in Windows
791 # (see http://bugs.python.org/issue1646728)
792 if info_dict['timestamp'] < 0 and os.name == 'nt':
793 info_dict['timestamp'] = 0
9d2ecdbc 794 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 795 info_dict['timestamp'])
9d2ecdbc
PH
796 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
797
6ff000b8 798 # This extractors handle format selection themselves
6febd1c1 799 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
800 if download:
801 self.process_info(info_dict)
6ff000b8
JMF
802 return info_dict
803
dd82ffea
JMF
804 # We now pick which formats have to be downloaded
805 if info_dict.get('formats') is None:
806 # There's only one format available
807 formats = [info_dict]
808 else:
809 formats = info_dict['formats']
810
db95dc13
PH
811 if not formats:
812 raise ExtractorError('No video formats found!')
813
dd82ffea 814 # We check that all the formats have the format and format_id fields
db95dc13 815 for i, format in enumerate(formats):
bec1fad2
PH
816 if 'url' not in format:
817 raise ExtractorError('Missing "url" key in result (index %d)' % i)
818
dd82ffea 819 if format.get('format_id') is None:
8016c922 820 format['format_id'] = compat_str(i)
8c51aa65 821 if format.get('format') is None:
6febd1c1 822 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
823 id=format['format_id'],
824 res=self.format_resolution(format),
6febd1c1 825 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 826 )
c1002e96
PH
827 # Automatically determine file extension if missing
828 if 'ext' not in format:
cce929ea 829 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 830
99e206d5
JMF
831 format_limit = self.params.get('format_limit', None)
832 if format_limit:
f4d96df0
PH
833 formats = list(takewhile_inclusive(
834 lambda f: f['format_id'] != format_limit, formats
835 ))
4bcc7bd1
PH
836
837 # TODO Central sorting goes here
99e206d5 838
f89197d7 839 if formats[0] is not info_dict:
b3d9ef88
JMF
840 # only set the 'formats' fields if the original info_dict list them
841 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 842 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
843 # wich can't be exported to json
844 info_dict['formats'] = formats
bfaae0a7 845 if self.params.get('listformats', None):
846 self.list_formats(info_dict)
847 return
848
de3ef3ed 849 req_format = self.params.get('format')
a9c58ad9
JMF
850 if req_format is None:
851 req_format = 'best'
dd82ffea 852 formats_to_download = []
dd82ffea 853 # The -1 is for supporting YoutubeIE
a9c58ad9 854 if req_format in ('-1', 'all'):
dd82ffea
JMF
855 formats_to_download = formats
856 else:
1de33faf
PH
857 for rfstr in req_format.split(','):
858 # We can accept formats requested in the format: 34/5/best, we pick
859 # the first that is available, starting from left
860 req_formats = rfstr.split('/')
861 for rf in req_formats:
862 if re.match(r'.+?\+.+?', rf) is not None:
863 # Two formats have been requested like '137+139'
864 format_1, format_2 = rf.split('+')
865 formats_info = (self.select_format(format_1, formats),
9e1a5b84 866 self.select_format(format_2, formats))
1de33faf 867 if all(formats_info):
c2954908
JMF
868 # The first format must contain the video and the
869 # second the audio
870 if formats_info[0].get('vcodec') == 'none':
871 self.report_error('The first format must '
9e1a5b84
JW
872 'contain the video, try using '
873 '"-f %s+%s"' % (format_2, format_1))
c2954908 874 return
1de33faf
PH
875 selected_format = {
876 'requested_formats': formats_info,
877 'format': rf,
878 'ext': formats_info[0]['ext'],
879 }
880 else:
881 selected_format = None
6350728b 882 else:
1de33faf
PH
883 selected_format = self.select_format(rf, formats)
884 if selected_format is not None:
885 formats_to_download.append(selected_format)
886 break
dd82ffea 887 if not formats_to_download:
6febd1c1 888 raise ExtractorError('requested format not available',
78a3a9f8 889 expected=True)
dd82ffea
JMF
890
891 if download:
892 if len(formats_to_download) > 1:
6febd1c1 893 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
894 for format in formats_to_download:
895 new_info = dict(info_dict)
896 new_info.update(format)
897 self.process_info(new_info)
898 # We update the info dict with the best quality format (backwards compatibility)
899 info_dict.update(formats_to_download[-1])
900 return info_dict
901
8222d8de
JMF
902 def process_info(self, info_dict):
903 """Process a single resolved IE result."""
904
905 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
906
907 max_downloads = self.params.get('max_downloads')
908 if max_downloads is not None:
909 if self._num_downloads >= int(max_downloads):
910 raise MaxDownloadsReached()
8222d8de
JMF
911
912 info_dict['fulltitle'] = info_dict['title']
913 if len(info_dict['title']) > 200:
6febd1c1 914 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
915
916 # Keep for backwards compatibility
917 info_dict['stitle'] = info_dict['title']
918
11b85ce6 919 if 'format' not in info_dict:
8222d8de
JMF
920 info_dict['format'] = info_dict['ext']
921
922 reason = self._match_entry(info_dict)
923 if reason is not None:
6febd1c1 924 self.to_screen('[download] ' + reason)
8222d8de
JMF
925 return
926
fd288278 927 self._num_downloads += 1
8222d8de
JMF
928
929 filename = self.prepare_filename(info_dict)
930
931 # Forced printings
932 if self.params.get('forcetitle', False):
0783b09b 933 self.to_stdout(info_dict['fulltitle'])
8222d8de 934 if self.params.get('forceid', False):
0783b09b 935 self.to_stdout(info_dict['id'])
8222d8de 936 if self.params.get('forceurl', False):
edde6c56 937 # For RTMP URLs, also include the playpath
6febd1c1 938 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 939 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 940 self.to_stdout(info_dict['thumbnail'])
216d71d0 941 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 942 self.to_stdout(info_dict['description'])
8222d8de 943 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 944 self.to_stdout(filename)
525ef922
PH
945 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
946 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 947 if self.params.get('forceformat', False):
0783b09b 948 self.to_stdout(info_dict['format'])
9d153818 949 if self.params.get('forcejson', False):
a0d96c98 950 info_dict['_filename'] = filename
0783b09b 951 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
952 if self.params.get('dump_single_json', False):
953 info_dict['_filename'] = filename
8222d8de
JMF
954
955 # Do nothing else if in simulate mode
956 if self.params.get('simulate', False):
957 return
958
959 if filename is None:
960 return
961
962 try:
963 dn = os.path.dirname(encodeFilename(filename))
d26e981d 964 if dn and not os.path.exists(dn):
8222d8de
JMF
965 os.makedirs(dn)
966 except (OSError, IOError) as err:
6febd1c1 967 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
968 return
969
970 if self.params.get('writedescription', False):
6febd1c1 971 descfn = filename + '.description'
7b6fefc9 972 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 973 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
974 else:
975 try:
6febd1c1 976 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
977 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
978 descfile.write(info_dict['description'])
979 except (KeyError, TypeError):
6febd1c1 980 self.report_warning('There\'s no description to write.')
7b6fefc9 981 except (OSError, IOError):
6febd1c1 982 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 983 return
8222d8de 984
1fb07d10 985 if self.params.get('writeannotations', False):
6febd1c1 986 annofn = filename + '.annotations.xml'
7b6fefc9 987 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 988 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
989 else:
990 try:
6febd1c1 991 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
992 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
993 annofile.write(info_dict['annotations'])
994 except (KeyError, TypeError):
6febd1c1 995 self.report_warning('There are no annotations to write.')
7b6fefc9 996 except (OSError, IOError):
6febd1c1 997 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 998 return
1fb07d10 999
c4a91be7 1000 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1001 self.params.get('writeautomaticsub')])
c4a91be7 1002
fe7e0c98 1003 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
1004 # subtitles download errors are already managed as troubles in relevant IE
1005 # that way it will silently go on when used with unsupporting IE
8222d8de 1006 subtitles = info_dict['subtitles']
ca715127 1007 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1008 for sub_lang in subtitles.keys():
1009 sub = subtitles[sub_lang]
6804038d
JMF
1010 if sub is None:
1011 continue
8222d8de 1012 try:
d4051a8e 1013 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1014 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1015 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1016 else:
6febd1c1 1017 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1018 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5f6a1245 1019 subfile.write(sub)
8222d8de 1020 except (OSError, IOError):
e4db1951 1021 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1022 return
1023
8222d8de 1024 if self.params.get('writeinfojson', False):
6febd1c1 1025 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1026 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1027 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1028 else:
6febd1c1 1029 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1030 try:
92120217 1031 write_json_file(info_dict, infofn)
7b6fefc9 1032 except (OSError, IOError):
6febd1c1 1033 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1034 return
8222d8de
JMF
1035
1036 if self.params.get('writethumbnail', False):
d8269e1d 1037 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1038 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1039 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1040 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1041 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1042 (info_dict['extractor'], info_dict['id']))
1043 else:
6febd1c1 1044 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1045 (info_dict['extractor'], info_dict['id']))
1046 try:
e9c092f1 1047 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1048 with open(thumb_filename, 'wb') as thumbf:
1049 shutil.copyfileobj(uf, thumbf)
6febd1c1 1050 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
9e1a5b84 1051 (info_dict['extractor'], info_dict['id'], thumb_filename))
7b6fefc9 1052 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1053 self.report_warning('Unable to download thumbnail "%s": %s' %
9e1a5b84 1054 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1055
1056 if not self.params.get('skip_download', False):
1057 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1058 success = True
1059 else:
1060 try:
6350728b
JMF
1061 def dl(name, info):
1062 fd = get_suitable_downloader(info)(self, self.params)
1063 for ph in self._progress_hooks:
1064 fd.add_progress_hook(ph)
8d5797b0
PH
1065 if self.params.get('verbose'):
1066 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1067 return fd.download(name, info)
1068 if info_dict.get('requested_formats') is not None:
1069 downloaded = []
1070 success = True
b7f81164 1071 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1072 if not merger._executable:
58c3c7ae
JMF
1073 postprocessors = []
1074 self.report_warning('You have requested multiple '
9e1a5b84
JW
1075 'formats but ffmpeg or avconv are not installed.'
1076 ' The formats won\'t be merged')
58c3c7ae
JMF
1077 else:
1078 postprocessors = [merger]
6350728b
JMF
1079 for f in info_dict['requested_formats']:
1080 new_info = dict(info_dict)
1081 new_info.update(f)
1082 fname = self.prepare_filename(new_info)
1083 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1084 downloaded.append(fname)
1085 partial_success = dl(fname, new_info)
1086 success = success and partial_success
58c3c7ae 1087 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1088 info_dict['__files_to_merge'] = downloaded
1089 else:
1090 # Just a single file
1091 success = dl(filename, info_dict)
8222d8de 1092 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1093 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1094 return
c40c6aaa
JMF
1095 except (OSError, IOError) as err:
1096 raise UnavailableVideoError(err)
8222d8de 1097 except (ContentTooShortError, ) as err:
6febd1c1 1098 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1099 return
1100
1101 if success:
1102 try:
1103 self.post_process(filename, info_dict)
1104 except (PostProcessingError) as err:
6febd1c1 1105 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
1106 return
1107
c1c9a79c
PH
1108 self.record_download_archive(info_dict)
1109
8222d8de
JMF
1110 def download(self, url_list):
1111 """Download a given list of URLs."""
acd69589 1112 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1113 if (len(url_list) > 1 and
acd69589 1114 '%' not in outtmpl
0c75c3fa 1115 and self.params.get('max_downloads') != 1):
acd69589 1116 raise SameFileError(outtmpl)
8222d8de
JMF
1117
1118 for url in url_list:
1119 try:
5f6a1245 1120 # It also downloads the videos
63e0be34 1121 res = self.extract_info(url)
8222d8de 1122 except UnavailableVideoError:
6febd1c1 1123 self.report_error('unable to download video')
8222d8de 1124 except MaxDownloadsReached:
6febd1c1 1125 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1126 raise
63e0be34
PH
1127 else:
1128 if self.params.get('dump_single_json', False):
1129 self.to_stdout(json.dumps(res))
8222d8de
JMF
1130
1131 return self._download_retcode
1132
1dcc4c0c 1133 def download_with_info_file(self, info_filename):
395293a8 1134 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1135 info = json.load(f)
d4943898
JMF
1136 try:
1137 self.process_ie_result(info, download=True)
1138 except DownloadError:
1139 webpage_url = info.get('webpage_url')
1140 if webpage_url is not None:
6febd1c1 1141 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1142 return self.download([webpage_url])
1143 else:
1144 raise
1145 return self._download_retcode
1dcc4c0c 1146
8222d8de
JMF
1147 def post_process(self, filename, ie_info):
1148 """Run all the postprocessors on the given file."""
1149 info = dict(ie_info)
1150 info['filepath'] = filename
1151 keep_video = None
6350728b
JMF
1152 pps_chain = []
1153 if ie_info.get('__postprocessors') is not None:
1154 pps_chain.extend(ie_info['__postprocessors'])
1155 pps_chain.extend(self._pps)
1156 for pp in pps_chain:
8222d8de 1157 try:
fe7e0c98 1158 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1159 if keep_video_wish is not None:
1160 if keep_video_wish:
1161 keep_video = keep_video_wish
1162 elif keep_video is None:
1163 # No clear decision yet, let IE decide
1164 keep_video = keep_video_wish
1165 except PostProcessingError as e:
bbcbf4d4 1166 self.report_error(e.msg)
8222d8de
JMF
1167 if keep_video is False and not self.params.get('keepvideo', False):
1168 try:
6febd1c1 1169 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1170 os.remove(encodeFilename(filename))
1171 except (IOError, OSError):
6febd1c1 1172 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1173
5db07df6
PH
1174 def _make_archive_id(self, info_dict):
1175 # Future-proof against any change in case
1176 # and backwards compatibility with prior versions
d31209a1 1177 extractor = info_dict.get('extractor_key')
7012b23c
PH
1178 if extractor is None:
1179 if 'id' in info_dict:
1180 extractor = info_dict.get('ie_key') # key in a playlist
1181 if extractor is None:
5db07df6 1182 return None # Incomplete video information
6febd1c1 1183 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1184
1185 def in_download_archive(self, info_dict):
1186 fn = self.params.get('download_archive')
1187 if fn is None:
1188 return False
1189
1190 vid_id = self._make_archive_id(info_dict)
1191 if vid_id is None:
7012b23c 1192 return False # Incomplete video information
5db07df6 1193
c1c9a79c
PH
1194 try:
1195 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1196 for line in archive_file:
1197 if line.strip() == vid_id:
1198 return True
1199 except IOError as ioe:
1200 if ioe.errno != errno.ENOENT:
1201 raise
1202 return False
1203
1204 def record_download_archive(self, info_dict):
1205 fn = self.params.get('download_archive')
1206 if fn is None:
1207 return
5db07df6
PH
1208 vid_id = self._make_archive_id(info_dict)
1209 assert vid_id
c1c9a79c 1210 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1211 archive_file.write(vid_id + '\n')
dd82ffea 1212
8c51aa65 1213 @staticmethod
8abeeb94 1214 def format_resolution(format, default='unknown'):
fb04e403
PH
1215 if format.get('vcodec') == 'none':
1216 return 'audio only'
f49d89ee
PH
1217 if format.get('resolution') is not None:
1218 return format['resolution']
8c51aa65
JMF
1219 if format.get('height') is not None:
1220 if format.get('width') is not None:
6febd1c1 1221 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1222 else:
6febd1c1 1223 res = '%sp' % format['height']
f49d89ee 1224 elif format.get('width') is not None:
6febd1c1 1225 res = '?x%d' % format['width']
8c51aa65 1226 else:
8abeeb94 1227 res = default
8c51aa65
JMF
1228 return res
1229
c57f7757
PH
1230 def _format_note(self, fdict):
1231 res = ''
1232 if fdict.get('ext') in ['f4f', 'f4m']:
1233 res += '(unsupported) '
1234 if fdict.get('format_note') is not None:
1235 res += fdict['format_note'] + ' '
1236 if fdict.get('tbr') is not None:
1237 res += '%4dk ' % fdict['tbr']
1238 if fdict.get('container') is not None:
1239 if res:
1240 res += ', '
1241 res += '%s container' % fdict['container']
1242 if (fdict.get('vcodec') is not None and
1243 fdict.get('vcodec') != 'none'):
1244 if res:
1245 res += ', '
1246 res += fdict['vcodec']
91c7271a 1247 if fdict.get('vbr') is not None:
c57f7757
PH
1248 res += '@'
1249 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1250 res += 'video@'
1251 if fdict.get('vbr') is not None:
1252 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1253 if fdict.get('fps') is not None:
1254 res += ', %sfps' % fdict['fps']
c57f7757
PH
1255 if fdict.get('acodec') is not None:
1256 if res:
1257 res += ', '
1258 if fdict['acodec'] == 'none':
1259 res += 'video only'
1260 else:
1261 res += '%-5s' % fdict['acodec']
1262 elif fdict.get('abr') is not None:
1263 if res:
1264 res += ', '
1265 res += 'audio'
1266 if fdict.get('abr') is not None:
1267 res += '@%3dk' % fdict['abr']
1268 if fdict.get('asr') is not None:
1269 res += ' (%5dHz)' % fdict['asr']
1270 if fdict.get('filesize') is not None:
1271 if res:
1272 res += ', '
1273 res += format_bytes(fdict['filesize'])
9732d77e
PH
1274 elif fdict.get('filesize_approx') is not None:
1275 if res:
1276 res += ', '
1277 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1278 return res
91c7271a 1279
c57f7757 1280 def list_formats(self, info_dict):
02dbf93f 1281 def line(format, idlen=20):
6febd1c1 1282 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1283 format['format_id'],
1284 format['ext'],
8c51aa65 1285 self.format_resolution(format),
c57f7757 1286 self._format_note(format),
02dbf93f 1287 ))
57dd9a8f 1288
94badb25 1289 formats = info_dict.get('formats', [info_dict])
6febd1c1 1290 idlen = max(len('format code'),
02dbf93f
PH
1291 max(len(f['format_id']) for f in formats))
1292 formats_s = [line(f, idlen) for f in formats]
94badb25 1293 if len(formats) > 1:
c57f7757
PH
1294 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1295 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1296
1297 header_line = line({
6febd1c1
PH
1298 'format_id': 'format code', 'ext': 'extension',
1299 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1300 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1301 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1302
1303 def urlopen(self, req):
1304 """ Start an HTTP download """
37419b4f 1305
d05cfe06
S
1306 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1307 # always respected by websites, some tend to give out URLs with non percent-encoded
1308 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1309 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1310 # To work around aforementioned issue we will replace request's original URL with
1311 # percent-encoded one
ee0d9070 1312 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1313 url = req if req_is_string else req.get_full_url()
d05cfe06 1314 url_escaped = escape_url(url)
37419b4f
S
1315
1316 # Substitute URL if any change after escaping
1317 if url != url_escaped:
68b09730 1318 if req_is_string:
37419b4f
S
1319 req = url_escaped
1320 else:
1321 req = compat_urllib_request.Request(
1322 url_escaped, data=req.data, headers=req.headers,
1323 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1324
19a41fc6 1325 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1326
1327 def print_debug_header(self):
1328 if not self.params.get('verbose'):
1329 return
62fec3b2 1330
4192b51c
PH
1331 if type('') is not compat_str:
1332 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1333 self.report_warning(
1334 'Your Python is broken! Update to a newer and supported version')
1335
c6afed48
PH
1336 stdout_encoding = getattr(
1337 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1338 encoding_str = (
734f90bb
PH
1339 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1340 locale.getpreferredencoding(),
1341 sys.getfilesystemencoding(),
c6afed48 1342 stdout_encoding,
b0472057 1343 self.get_encoding()))
4192b51c 1344 write_string(encoding_str, encoding=None)
734f90bb
PH
1345
1346 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1347 try:
1348 sp = subprocess.Popen(
1349 ['git', 'rev-parse', '--short', 'HEAD'],
1350 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1351 cwd=os.path.dirname(os.path.abspath(__file__)))
1352 out, err = sp.communicate()
1353 out = out.decode().strip()
1354 if re.match('[0-9a-f]+', out):
734f90bb 1355 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1356 except:
1357 try:
1358 sys.exc_clear()
1359 except:
1360 pass
d28b5171
PH
1361 self._write_string('[debug] Python version %s - %s\n' % (
1362 platform.python_version(), platform_name()))
1363
1364 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1365 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1366 exe_str = ', '.join(
1367 '%s %s' % (exe, v)
1368 for exe, v in sorted(exe_versions.items())
1369 if v
1370 )
1371 if not exe_str:
1372 exe_str = 'none'
1373 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1374
1375 proxy_map = {}
1376 for handler in self._opener.handlers:
1377 if hasattr(handler, 'proxies'):
1378 proxy_map.update(handler.proxies)
734f90bb 1379 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1380
e344693b 1381 def _setup_opener(self):
6ad14cab 1382 timeout_val = self.params.get('socket_timeout')
19a41fc6 1383 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1384
dca08720
PH
1385 opts_cookiefile = self.params.get('cookiefile')
1386 opts_proxy = self.params.get('proxy')
1387
1388 if opts_cookiefile is None:
1389 self.cookiejar = compat_cookiejar.CookieJar()
1390 else:
1391 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1392 opts_cookiefile)
1393 if os.access(opts_cookiefile, os.R_OK):
1394 self.cookiejar.load()
1395
1396 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1397 self.cookiejar)
1398 if opts_proxy is not None:
1399 if opts_proxy == '':
1400 proxies = {}
1401 else:
1402 proxies = {'http': opts_proxy, 'https': opts_proxy}
1403 else:
1404 proxies = compat_urllib_request.getproxies()
1405 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1406 if 'http' in proxies and 'https' not in proxies:
1407 proxies['https'] = proxies['http']
1408 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1409
1410 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1411 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1412 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1413 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1414 opener = compat_urllib_request.build_opener(
a0ddb8a2 1415 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1416 # Delete the default user-agent header, which would otherwise apply in
1417 # cases where our custom HTTP handler doesn't come into play
1418 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1419 opener.addheaders = []
1420 self._opener = opener
62fec3b2
PH
1421
1422 def encode(self, s):
1423 if isinstance(s, bytes):
1424 return s # Already encoded
1425
1426 try:
1427 return s.encode(self.get_encoding())
1428 except UnicodeEncodeError as err:
1429 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1430 raise
1431
1432 def get_encoding(self):
1433 encoding = self.params.get('encoding')
1434 if encoding is None:
1435 encoding = preferredencoding()
1436 return encoding