]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Now a new FileDownloader is created when downloading a video
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
26e63931 6import collections
c1c9a79c 7import errno
8222d8de 8import io
8694c600 9import json
8222d8de 10import os
dca08720 11import platform
8222d8de
JMF
12import re
13import shutil
dca08720 14import subprocess
8222d8de
JMF
15import socket
16import sys
17import time
18import traceback
19
1e5b9a95
PH
20if os.name == 'nt':
21 import ctypes
22
ce02ed60 23from .utils import (
dca08720 24 compat_cookiejar,
ce02ed60 25 compat_http_client,
ce02ed60
PH
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
02dbf93f 36 format_bytes,
1c088fa8 37 get_term_width,
ce02ed60 38 locked_file,
dca08720 39 make_HTTPS_handler,
ce02ed60
PH
40 MaxDownloadsReached,
41 PostProcessingError,
dca08720 42 platform_name,
ce02ed60
PH
43 preferredencoding,
44 SameFileError,
45 sanitize_filename,
46 subtitles_filename,
47 takewhile_inclusive,
48 UnavailableVideoError,
49 write_json_file,
50 write_string,
dca08720 51 YoutubeDLHandler,
ce02ed60 52)
023fa8c4 53from .extractor import get_info_extractor, gen_extractors
8222d8de 54from .FileDownloader import FileDownloader
dca08720 55from .version import __version__
8222d8de
JMF
56
57
58class YoutubeDL(object):
59 """YoutubeDL class.
60
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
67
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
75
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
82
83 Available options:
84
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
c6c19746 87 videopassword: Password for acces a video.
8222d8de
JMF
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
8694c600 97 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
8bf9319e 109 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
1fb07d10 113 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
b004821f 116 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 117 allsubtitles: Downloads all the subtitles of the video
0b7f3118 118 (requires writesubtitles or writeautomaticsub)
8222d8de 119 listsubtitles: Lists all available subtitles for the video
b98a6b2f 120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 121 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
c35f9e72 125 cachedir: Location of the cache files in the filesystem.
c3c88a26 126 None to disable filesystem cache.
47192f92 127 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
529a2e2c 130 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
131 Videos already present in the file are not downloaded
132 again.
dca08720 133 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8
PH
134 nocheckcertificate:Do not verify SSL certificates
135 proxy: URL of the proxy server to use
e344693b 136 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
137 bidi_workaround: Work around buggy terminals without bidirectional text
138 support, using fridibi
fe7e0c98 139
8222d8de
JMF
140 The following parameters are not used by YoutubeDL itself, they are used by
141 the FileDownloader:
142 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
143 noresizebuffer, retries, continuedl, noprogress, consoletitle
144 """
145
146 params = None
147 _ies = []
148 _pps = []
149 _download_retcode = None
150 _num_downloads = None
151 _screen_file = None
152
a3fb4675 153 def __init__(self, params=None):
8222d8de
JMF
154 """Create a FileDownloader object with the given options."""
155 self._ies = []
56c73665 156 self._ies_instances = {}
8222d8de 157 self._pps = []
8ab470f1 158 self._fd_progress_hooks = []
8222d8de
JMF
159 self._download_retcode = 0
160 self._num_downloads = 0
161 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 162 self._err_file = sys.stderr
a3fb4675 163 self.params = {} if params is None else params
34308b30 164
0783b09b 165 if params.get('bidi_workaround', False):
1c088fa8
PH
166 try:
167 import pty
168 master, slave = pty.openpty()
169 width = get_term_width()
170 if width is None:
171 width_args = []
172 else:
173 width_args = ['-w', str(width)]
174 self._fribidi = subprocess.Popen(
175 ['fribidi', '-c', 'UTF-8'] + width_args,
176 stdin=subprocess.PIPE,
177 stdout=slave,
178 stderr=self._err_file)
179 self._fribidi_channel = os.fdopen(master, 'rb')
180 except OSError as ose:
181 if ose.errno == 2:
182 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
183 else:
184 raise
0783b09b 185
34308b30
PH
186 if (sys.version_info >= (3,) and sys.platform != 'win32' and
187 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
188 and not params['restrictfilenames']):
189 # On Python 3, the Unicode filesystem API will throw errors (#1474)
190 self.report_warning(
1d368c75 191 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
192 u'cannot encode all charactes. '
193 u'Set the LC_ALL environment variable to fix this.')
4a98cdbf 194 self.params['restrictfilenames'] = True
34308b30 195
a3927cf7 196 if '%(stitle)s' in self.params.get('outtmpl', ''):
8222d8de
JMF
197 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
198
dca08720
PH
199 self._setup_opener()
200
8222d8de
JMF
201 def add_info_extractor(self, ie):
202 """Add an InfoExtractor object to the end of the list."""
203 self._ies.append(ie)
56c73665 204 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
205 ie.set_downloader(self)
206
56c73665
JMF
207 def get_info_extractor(self, ie_key):
208 """
209 Get an instance of an IE with name ie_key, it will try to get one from
210 the _ies list, if there's no instance it will create a new one and add
211 it to the extractor list.
212 """
213 ie = self._ies_instances.get(ie_key)
214 if ie is None:
215 ie = get_info_extractor(ie_key)()
216 self.add_info_extractor(ie)
217 return ie
218
023fa8c4
JMF
219 def add_default_info_extractors(self):
220 """
221 Add the InfoExtractors returned by gen_extractors to the end of the list
222 """
223 for ie in gen_extractors():
224 self.add_info_extractor(ie)
225
8222d8de
JMF
226 def add_post_processor(self, pp):
227 """Add a PostProcessor object to the end of the chain."""
228 self._pps.append(pp)
229 pp.set_downloader(self)
230
8ab470f1
JMF
231 def add_downloader_progress_hook(self, ph):
232 """Add the progress hook to the file downloader"""
233 self._fd_progress_hooks.append(ph)
234
1c088fa8
PH
235 def _bidi_workaround(self, message):
236 if not hasattr(self, '_fribidi_channel'):
237 return message
238
239 assert type(message) == type(u'')
240 line_count = message.count(u'\n') + 1
241 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
242 self._fribidi.stdin.flush()
243 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
244 for _ in range(line_count))
245 return res[:-len(u'\n')]
246
8222d8de 247 def to_screen(self, message, skip_eol=False):
0783b09b
PH
248 """Print message to stdout if not in quiet mode."""
249 return self.to_stdout(message, skip_eol, check_quiet=True)
250
251 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 252 """Print message to stdout if not in quiet mode."""
8bf9319e 253 if self.params.get('logger'):
43afe285 254 self.params['logger'].debug(message)
0783b09b 255 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 256 message = self._bidi_workaround(message)
8222d8de
JMF
257 terminator = [u'\n', u''][skip_eol]
258 output = message + terminator
1c088fa8 259
7459e3a2 260 write_string(output, self._screen_file)
8222d8de
JMF
261
262 def to_stderr(self, message):
263 """Print message to stderr."""
264 assert type(message) == type(u'')
8bf9319e 265 if self.params.get('logger'):
43afe285
IB
266 self.params['logger'].error(message)
267 else:
1c088fa8 268 message = self._bidi_workaround(message)
43afe285 269 output = message + u'\n'
0783b09b 270 write_string(output, self._err_file)
8222d8de 271
1e5b9a95
PH
272 def to_console_title(self, message):
273 if not self.params.get('consoletitle', False):
274 return
275 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
276 # c_wchar_p() might not be necessary if `message` is
277 # already of type unicode()
278 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
279 elif 'TERM' in os.environ:
749febf4 280 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 281
bdde425c
PH
282 def save_console_title(self):
283 if not self.params.get('consoletitle', False):
284 return
285 if 'TERM' in os.environ:
efd6c574
JMF
286 # Save the title on stack
287 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
288
289 def restore_console_title(self):
290 if not self.params.get('consoletitle', False):
291 return
292 if 'TERM' in os.environ:
efd6c574
JMF
293 # Restore the title from stack
294 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
295
296 def __enter__(self):
297 self.save_console_title()
298 return self
299
300 def __exit__(self, *args):
301 self.restore_console_title()
dca08720
PH
302
303 if self.params.get('cookiefile') is not None:
304 self.cookiejar.save()
bdde425c 305
8222d8de
JMF
306 def trouble(self, message=None, tb=None):
307 """Determine action to take when a download problem appears.
308
309 Depending on if the downloader has been configured to ignore
310 download errors or not, this method may throw an exception or
311 not when errors are found, after printing the message.
312
313 tb, if given, is additional traceback information.
314 """
315 if message is not None:
316 self.to_stderr(message)
317 if self.params.get('verbose'):
318 if tb is None:
319 if sys.exc_info()[0]: # if .trouble has been called from an except block
320 tb = u''
321 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
322 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
323 tb += compat_str(traceback.format_exc())
324 else:
325 tb_data = traceback.format_list(traceback.extract_stack())
326 tb = u''.join(tb_data)
327 self.to_stderr(tb)
328 if not self.params.get('ignoreerrors', False):
329 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
330 exc_info = sys.exc_info()[1].exc_info
331 else:
332 exc_info = sys.exc_info()
333 raise DownloadError(message, exc_info)
334 self._download_retcode = 1
335
336 def report_warning(self, message):
337 '''
338 Print the message to stderr, it will be prefixed with 'WARNING:'
339 If stderr is a tty file the 'WARNING:' will be colored
340 '''
0783b09b 341 if self._err_file.isatty() and os.name != 'nt':
fe7e0c98 342 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 343 else:
fe7e0c98
JMF
344 _msg_header = u'WARNING:'
345 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
346 self.to_stderr(warning_message)
347
348 def report_error(self, message, tb=None):
349 '''
350 Do the same as trouble, but prefixes the message with 'ERROR:', colored
351 in red if stderr is a tty file.
352 '''
0783b09b 353 if self._err_file.isatty() and os.name != 'nt':
8222d8de
JMF
354 _msg_header = u'\033[0;31mERROR:\033[0m'
355 else:
356 _msg_header = u'ERROR:'
357 error_message = u'%s %s' % (_msg_header, message)
358 self.trouble(error_message, tb)
359
8222d8de
JMF
360 def report_writedescription(self, descfn):
361 """ Report that the description file is being written """
362 self.to_screen(u'[info] Writing video description to: ' + descfn)
363
364 def report_writesubtitles(self, sub_filename):
365 """ Report that the subtitles file is being written """
366 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
367
368 def report_writeinfojson(self, infofn):
369 """ Report that the metadata file has been written """
370 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
371
1fb07d10
JG
372 def report_writeannotations(self, annofn):
373 """ Report that the annotations file has been written. """
374 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
375
8222d8de
JMF
376 def report_file_already_downloaded(self, file_name):
377 """Report file has already been fully downloaded."""
378 try:
379 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 380 except UnicodeEncodeError:
8222d8de
JMF
381 self.to_screen(u'[download] The file has already been downloaded')
382
383 def increment_downloads(self):
384 """Increment the ordinal that assigns a number to each file."""
385 self._num_downloads += 1
386
387 def prepare_filename(self, info_dict):
388 """Generate the output filename."""
389 try:
390 template_dict = dict(info_dict)
391
392 template_dict['epoch'] = int(time.time())
393 autonumber_size = self.params.get('autonumber_size')
394 if autonumber_size is None:
395 autonumber_size = 5
396 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
397 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 398 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
399 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
400
586a91b6 401 sanitize = lambda k, v: sanitize_filename(
45598aab 402 compat_str(v),
8222d8de 403 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
404 is_id=(k == u'id'))
405 template_dict = dict((k, sanitize(k, v))
45598aab
PH
406 for k, v in template_dict.items()
407 if v is not None)
26e63931 408 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
8222d8de 409
586a91b6
PH
410 tmpl = os.path.expanduser(self.params['outtmpl'])
411 filename = tmpl % template_dict
8222d8de 412 return filename
8222d8de 413 except ValueError as err:
4efba05c 414 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
415 return None
416
417 def _match_entry(self, info_dict):
418 """ Returns None iff the file should be downloaded """
419
7012b23c
PH
420 if 'title' in info_dict:
421 # This can happen when we're just evaluating the playlist
422 title = info_dict['title']
423 matchtitle = self.params.get('matchtitle', False)
424 if matchtitle:
425 if not re.search(matchtitle, title, re.IGNORECASE):
426 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
427 rejecttitle = self.params.get('rejecttitle', False)
428 if rejecttitle:
429 if re.search(rejecttitle, title, re.IGNORECASE):
430 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
431 date = info_dict.get('upload_date', None)
432 if date is not None:
433 dateRange = self.params.get('daterange', DateRange())
434 if date not in dateRange:
435 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
436 age_limit = self.params.get('age_limit')
437 if age_limit is not None:
cfadd183 438 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 439 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 440 if self.in_download_archive(info_dict):
7012b23c
PH
441 return (u'%s has already been recorded in archive'
442 % info_dict.get('title', info_dict.get('id', u'video')))
8222d8de 443 return None
fe7e0c98 444
b6c45014
JMF
445 @staticmethod
446 def add_extra_info(info_dict, extra_info):
447 '''Set the keys from extra_info in info dict if they are missing'''
448 for key, value in extra_info.items():
449 info_dict.setdefault(key, value)
450
7fc3fa05
PH
451 def extract_info(self, url, download=True, ie_key=None, extra_info={},
452 process=True):
8222d8de
JMF
453 '''
454 Returns a list with a dictionary for each video we find.
455 If 'download', also downloads the videos.
456 extra_info is a dict containing the extra values to add to each result
457 '''
fe7e0c98 458
8222d8de 459 if ie_key:
56c73665 460 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
461 else:
462 ies = self._ies
463
464 for ie in ies:
465 if not ie.suitable(url):
466 continue
467
468 if not ie.working():
469 self.report_warning(u'The program functionality for this site has been marked as broken, '
470 u'and will probably not work.')
471
472 try:
473 ie_result = ie.extract(url)
474 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
475 break
476 if isinstance(ie_result, list):
477 # Backwards compatibility: old IE result format
8222d8de
JMF
478 ie_result = {
479 '_type': 'compat_list',
480 'entries': ie_result,
481 }
9103bbc5
JMF
482 self.add_extra_info(ie_result,
483 {
484 'extractor': ie.IE_NAME,
be97abc2
JMF
485 'webpage_url': url,
486 'extractor_key': ie.ie_key(),
9103bbc5 487 })
7fc3fa05
PH
488 if process:
489 return self.process_ie_result(ie_result, download, extra_info)
490 else:
491 return ie_result
8222d8de
JMF
492 except ExtractorError as de: # An error we somewhat expected
493 self.report_error(compat_str(de), de.format_traceback())
494 break
495 except Exception as e:
496 if self.params.get('ignoreerrors', False):
497 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
498 break
499 else:
500 raise
501 else:
502 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 503
8222d8de
JMF
504 def process_ie_result(self, ie_result, download=True, extra_info={}):
505 """
506 Take the result of the ie(may be modified) and resolve all unresolved
507 references (URLs, playlist items).
508
509 It will also download the videos if 'download'.
510 Returns the resolved ie_result.
511 """
512
513 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
514 if result_type == 'video':
b6c45014 515 self.add_extra_info(ie_result, extra_info)
feee2ecf 516 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
517 elif result_type == 'url':
518 # We have to add extra_info to the results because it may be
519 # contained in a playlist
520 return self.extract_info(ie_result['url'],
521 download,
522 ie_key=ie_result.get('ie_key'),
523 extra_info=extra_info)
7fc3fa05
PH
524 elif result_type == 'url_transparent':
525 # Use the information from the embedding page
526 info = self.extract_info(
527 ie_result['url'], ie_key=ie_result.get('ie_key'),
528 extra_info=extra_info, download=False, process=False)
529
530 def make_result(embedded_info):
531 new_result = ie_result.copy()
532 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
533 'entries', 'urlhandle', 'ie_key', 'duration',
ef4fd848
PH
534 'subtitles', 'annotations', 'format',
535 'thumbnail', 'thumbnails'):
7fc3fa05
PH
536 if f in new_result:
537 del new_result[f]
538 if f in embedded_info:
539 new_result[f] = embedded_info[f]
540 return new_result
541 new_result = make_result(info)
542
543 assert new_result.get('_type') != 'url_transparent'
544 if new_result.get('_type') == 'compat_list':
545 new_result['entries'] = [
546 make_result(e) for e in new_result['entries']]
547
548 return self.process_ie_result(
549 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
550 elif result_type == 'playlist':
551 # We process each entry in the playlist
552 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 553 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
554
555 playlist_results = []
556
557 n_all_entries = len(ie_result['entries'])
558 playliststart = self.params.get('playliststart', 1) - 1
559 playlistend = self.params.get('playlistend', -1)
560
561 if playlistend == -1:
562 entries = ie_result['entries'][playliststart:]
563 else:
564 entries = ie_result['entries'][playliststart:playlistend]
565
566 n_entries = len(entries)
567
568 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
569 (ie_result['extractor'], playlist, n_all_entries, n_entries))
570
fe7e0c98
JMF
571 for i, entry in enumerate(entries, 1):
572 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 573 extra = {
fe7e0c98
JMF
574 'playlist': playlist,
575 'playlist_index': i + playliststart,
b6c45014 576 'extractor': ie_result['extractor'],
9103bbc5 577 'webpage_url': ie_result['webpage_url'],
be97abc2 578 'extractor_key': ie_result['extractor_key'],
fe7e0c98 579 }
7012b23c
PH
580
581 reason = self._match_entry(entry)
582 if reason is not None:
583 self.to_screen(u'[download] ' + reason)
584 continue
585
8222d8de
JMF
586 entry_result = self.process_ie_result(entry,
587 download=download,
588 extra_info=extra)
589 playlist_results.append(entry_result)
590 ie_result['entries'] = playlist_results
591 return ie_result
592 elif result_type == 'compat_list':
593 def _fixup(r):
b6c45014 594 self.add_extra_info(r,
9103bbc5
JMF
595 {
596 'extractor': ie_result['extractor'],
597 'webpage_url': ie_result['webpage_url'],
be97abc2 598 'extractor_key': ie_result['extractor_key'],
9103bbc5 599 })
8222d8de
JMF
600 return r
601 ie_result['entries'] = [
b6c45014 602 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
603 for r in ie_result['entries']
604 ]
605 return ie_result
606 else:
607 raise Exception('Invalid result type: %s' % result_type)
608
a9c58ad9
JMF
609 def select_format(self, format_spec, available_formats):
610 if format_spec == 'best' or format_spec is None:
611 return available_formats[-1]
612 elif format_spec == 'worst':
613 return available_formats[0]
614 else:
49e86983
JMF
615 extensions = [u'mp4', u'flv', u'webm', u'3gp']
616 if format_spec in extensions:
617 filter_f = lambda f: f['ext'] == format_spec
618 else:
619 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 620 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
621 if matches:
622 return matches[-1]
623 return None
624
dd82ffea
JMF
625 def process_video_result(self, info_dict, download=True):
626 assert info_dict.get('_type', 'video') == 'video'
627
628 if 'playlist' not in info_dict:
629 # It isn't part of a playlist
630 info_dict['playlist'] = None
631 info_dict['playlist_index'] = None
632
6ff000b8 633 # This extractors handle format selection themselves
a7685f3b 634 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
635 if download:
636 self.process_info(info_dict)
6ff000b8
JMF
637 return info_dict
638
dd82ffea
JMF
639 # We now pick which formats have to be downloaded
640 if info_dict.get('formats') is None:
641 # There's only one format available
642 formats = [info_dict]
643 else:
644 formats = info_dict['formats']
645
646 # We check that all the formats have the format and format_id fields
647 for (i, format) in enumerate(formats):
dd82ffea 648 if format.get('format_id') is None:
8016c922 649 format['format_id'] = compat_str(i)
8c51aa65
JMF
650 if format.get('format') is None:
651 format['format'] = u'{id} - {res}{note}'.format(
652 id=format['format_id'],
653 res=self.format_resolution(format),
71934988 654 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 655 )
c1002e96
PH
656 # Automatically determine file extension if missing
657 if 'ext' not in format:
658 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
659
660 if self.params.get('listformats', None):
661 self.list_formats(info_dict)
662 return
663
99e206d5
JMF
664 format_limit = self.params.get('format_limit', None)
665 if format_limit:
f4d96df0
PH
666 formats = list(takewhile_inclusive(
667 lambda f: f['format_id'] != format_limit, formats
668 ))
e028d0d1
JMF
669 if self.params.get('prefer_free_formats'):
670 def _free_formats_key(f):
671 try:
672 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
673 except ValueError:
674 ext_ord = -1
675 # We only compare the extension if they have the same height and width
676 return (f.get('height'), f.get('width'), ext_ord)
677 formats = sorted(formats, key=_free_formats_key)
99e206d5 678
dd82ffea 679 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
680 if req_format is None:
681 req_format = 'best'
dd82ffea 682 formats_to_download = []
dd82ffea 683 # The -1 is for supporting YoutubeIE
a9c58ad9 684 if req_format in ('-1', 'all'):
dd82ffea
JMF
685 formats_to_download = formats
686 else:
a9c58ad9 687 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 688 # the first that is available, starting from left
dd82ffea
JMF
689 req_formats = req_format.split('/')
690 for rf in req_formats:
a9c58ad9
JMF
691 selected_format = self.select_format(rf, formats)
692 if selected_format is not None:
693 formats_to_download = [selected_format]
dd82ffea
JMF
694 break
695 if not formats_to_download:
78a3a9f8
PH
696 raise ExtractorError(u'requested format not available',
697 expected=True)
dd82ffea
JMF
698
699 if download:
700 if len(formats_to_download) > 1:
701 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
702 for format in formats_to_download:
703 new_info = dict(info_dict)
704 new_info.update(format)
705 self.process_info(new_info)
706 # We update the info dict with the best quality format (backwards compatibility)
707 info_dict.update(formats_to_download[-1])
708 return info_dict
709
8222d8de
JMF
710 def process_info(self, info_dict):
711 """Process a single resolved IE result."""
712
713 assert info_dict.get('_type', 'video') == 'video'
714 #We increment the download the download count here to match the previous behaviour.
715 self.increment_downloads()
716
717 info_dict['fulltitle'] = info_dict['title']
718 if len(info_dict['title']) > 200:
719 info_dict['title'] = info_dict['title'][:197] + u'...'
720
721 # Keep for backwards compatibility
722 info_dict['stitle'] = info_dict['title']
723
724 if not 'format' in info_dict:
725 info_dict['format'] = info_dict['ext']
726
727 reason = self._match_entry(info_dict)
728 if reason is not None:
729 self.to_screen(u'[download] ' + reason)
730 return
731
732 max_downloads = self.params.get('max_downloads')
733 if max_downloads is not None:
734 if self._num_downloads > int(max_downloads):
735 raise MaxDownloadsReached()
736
737 filename = self.prepare_filename(info_dict)
738
739 # Forced printings
740 if self.params.get('forcetitle', False):
0783b09b 741 self.to_stdout(info_dict['fulltitle'])
8222d8de 742 if self.params.get('forceid', False):
0783b09b 743 self.to_stdout(info_dict['id'])
8222d8de 744 if self.params.get('forceurl', False):
edde6c56 745 # For RTMP URLs, also include the playpath
0783b09b 746 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 747 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 748 self.to_stdout(info_dict['thumbnail'])
216d71d0 749 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 750 self.to_stdout(info_dict['description'])
8222d8de 751 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 752 self.to_stdout(filename)
8222d8de 753 if self.params.get('forceformat', False):
0783b09b 754 self.to_stdout(info_dict['format'])
9d153818 755 if self.params.get('forcejson', False):
a0d96c98 756 info_dict['_filename'] = filename
0783b09b 757 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
758
759 # Do nothing else if in simulate mode
760 if self.params.get('simulate', False):
761 return
762
763 if filename is None:
764 return
765
766 try:
767 dn = os.path.dirname(encodeFilename(filename))
768 if dn != '' and not os.path.exists(dn):
769 os.makedirs(dn)
770 except (OSError, IOError) as err:
771 self.report_error(u'unable to create directory ' + compat_str(err))
772 return
773
774 if self.params.get('writedescription', False):
775 try:
776 descfn = filename + u'.description'
777 self.report_writedescription(descfn)
778 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
779 descfile.write(info_dict['description'])
b3f0e530 780 except (KeyError, TypeError):
535f59bb 781 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
782 except (OSError, IOError):
783 self.report_error(u'Cannot write description file ' + descfn)
784 return
785
1fb07d10
JG
786 if self.params.get('writeannotations', False):
787 try:
fe7e0c98
JMF
788 annofn = filename + u'.annotations.xml'
789 self.report_writeannotations(annofn)
790 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
791 annofile.write(info_dict['annotations'])
1fb07d10
JG
792 except (KeyError, TypeError):
793 self.report_warning(u'There are no annotations to write.')
794 except (OSError, IOError):
fe7e0c98
JMF
795 self.report_error(u'Cannot write annotations file: ' + annofn)
796 return
1fb07d10 797
c4a91be7 798 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 799 self.params.get('writeautomaticsub')])
c4a91be7 800
fe7e0c98 801 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
802 # subtitles download errors are already managed as troubles in relevant IE
803 # that way it will silently go on when used with unsupporting IE
8222d8de 804 subtitles = info_dict['subtitles']
ca715127 805 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
806 for sub_lang in subtitles.keys():
807 sub = subtitles[sub_lang]
6804038d
JMF
808 if sub is None:
809 continue
8222d8de 810 try:
d4051a8e 811 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
812 self.report_writesubtitles(sub_filename)
813 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 814 subfile.write(sub)
8222d8de
JMF
815 except (OSError, IOError):
816 self.report_error(u'Cannot write subtitles file ' + descfn)
817 return
818
8222d8de 819 if self.params.get('writeinfojson', False):
9771cceb 820 infofn = os.path.splitext(filename)[0] + u'.info.json'
8222d8de
JMF
821 self.report_writeinfojson(infofn)
822 try:
fe7e0c98 823 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
824 write_json_file(json_info_dict, encodeFilename(infofn))
825 except (OSError, IOError):
826 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
827 return
828
829 if self.params.get('writethumbnail', False):
d8269e1d 830 if info_dict.get('thumbnail') is not None:
cbdbb766 831 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
357ddadb 832 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
8222d8de
JMF
833 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
834 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
835 try:
836 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
837 with open(thumb_filename, 'wb') as thumbf:
838 shutil.copyfileobj(uf, thumbf)
839 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
840 (info_dict['extractor'], info_dict['id'], thumb_filename))
841 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
842 self.report_warning(u'Unable to download thumbnail "%s": %s' %
843 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
844
845 if not self.params.get('skip_download', False):
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
847 success = True
848 else:
849 try:
8ab470f1
JMF
850 fd = FileDownloader(self, self.params)
851 for ph in self._fd_progress_hooks:
852 fd.add_progress_hook(ph)
853 success = fd._do_download(filename, info_dict)
8222d8de
JMF
854 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
855 self.report_error(u'unable to download video data: %s' % str(err))
856 return
c40c6aaa
JMF
857 except (OSError, IOError) as err:
858 raise UnavailableVideoError(err)
8222d8de
JMF
859 except (ContentTooShortError, ) as err:
860 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
861 return
862
863 if success:
864 try:
865 self.post_process(filename, info_dict)
866 except (PostProcessingError) as err:
867 self.report_error(u'postprocessing: %s' % str(err))
868 return
869
c1c9a79c
PH
870 self.record_download_archive(info_dict)
871
8222d8de
JMF
872 def download(self, url_list):
873 """Download a given list of URLs."""
0c75c3fa
PH
874 if (len(url_list) > 1 and
875 '%' not in self.params['outtmpl']
876 and self.params.get('max_downloads') != 1):
8222d8de
JMF
877 raise SameFileError(self.params['outtmpl'])
878
879 for url in url_list:
880 try:
881 #It also downloads the videos
dca08720 882 self.extract_info(url)
8222d8de
JMF
883 except UnavailableVideoError:
884 self.report_error(u'unable to download video')
885 except MaxDownloadsReached:
886 self.to_screen(u'[info] Maximum number of downloaded files reached.')
887 raise
888
889 return self._download_retcode
890
1dcc4c0c 891 def download_with_info_file(self, info_filename):
395293a8 892 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 893 info = json.load(f)
d4943898
JMF
894 try:
895 self.process_ie_result(info, download=True)
896 except DownloadError:
897 webpage_url = info.get('webpage_url')
898 if webpage_url is not None:
899 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
900 return self.download([webpage_url])
901 else:
902 raise
903 return self._download_retcode
1dcc4c0c 904
8222d8de
JMF
905 def post_process(self, filename, ie_info):
906 """Run all the postprocessors on the given file."""
907 info = dict(ie_info)
908 info['filepath'] = filename
909 keep_video = None
910 for pp in self._pps:
911 try:
fe7e0c98 912 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
913 if keep_video_wish is not None:
914 if keep_video_wish:
915 keep_video = keep_video_wish
916 elif keep_video is None:
917 # No clear decision yet, let IE decide
918 keep_video = keep_video_wish
919 except PostProcessingError as e:
bbcbf4d4 920 self.report_error(e.msg)
8222d8de
JMF
921 if keep_video is False and not self.params.get('keepvideo', False):
922 try:
923 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
924 os.remove(encodeFilename(filename))
925 except (IOError, OSError):
926 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c 927
5db07df6
PH
928 def _make_archive_id(self, info_dict):
929 # Future-proof against any change in case
930 # and backwards compatibility with prior versions
d31209a1 931 extractor = info_dict.get('extractor_key')
7012b23c
PH
932 if extractor is None:
933 if 'id' in info_dict:
934 extractor = info_dict.get('ie_key') # key in a playlist
935 if extractor is None:
5db07df6
PH
936 return None # Incomplete video information
937 return extractor.lower() + u' ' + info_dict['id']
938
939 def in_download_archive(self, info_dict):
940 fn = self.params.get('download_archive')
941 if fn is None:
942 return False
943
944 vid_id = self._make_archive_id(info_dict)
945 if vid_id is None:
7012b23c 946 return False # Incomplete video information
5db07df6 947
c1c9a79c
PH
948 try:
949 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
950 for line in archive_file:
951 if line.strip() == vid_id:
952 return True
953 except IOError as ioe:
954 if ioe.errno != errno.ENOENT:
955 raise
956 return False
957
958 def record_download_archive(self, info_dict):
959 fn = self.params.get('download_archive')
960 if fn is None:
961 return
5db07df6
PH
962 vid_id = self._make_archive_id(info_dict)
963 assert vid_id
c1c9a79c
PH
964 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
965 archive_file.write(vid_id + u'\n')
dd82ffea 966
8c51aa65 967 @staticmethod
8abeeb94 968 def format_resolution(format, default='unknown'):
fb04e403
PH
969 if format.get('vcodec') == 'none':
970 return 'audio only'
57dd9a8f
PH
971 if format.get('_resolution') is not None:
972 return format['_resolution']
8c51aa65
JMF
973 if format.get('height') is not None:
974 if format.get('width') is not None:
975 res = u'%sx%s' % (format['width'], format['height'])
976 else:
977 res = u'%sp' % format['height']
978 else:
8abeeb94 979 res = default
8c51aa65
JMF
980 return res
981
dd82ffea 982 def list_formats(self, info_dict):
91c7271a 983 def format_note(fdict):
91c7271a 984 res = u''
02dbf93f
PH
985 if fdict.get('format_note') is not None:
986 res += fdict['format_note'] + u' '
fb04e403
PH
987 if (fdict.get('vcodec') is not None and
988 fdict.get('vcodec') != 'none'):
7150858d
PH
989 res += u'%-5s' % fdict['vcodec']
990 elif fdict.get('vbr') is not None:
991 res += u'video'
91c7271a
PH
992 if fdict.get('vbr') is not None:
993 res += u'@%4dk' % fdict['vbr']
994 if fdict.get('acodec') is not None:
995 if res:
996 res += u', '
7150858d
PH
997 res += u'%-5s' % fdict['acodec']
998 elif fdict.get('abr') is not None:
999 if res:
1000 res += u', '
1001 res += 'audio'
91c7271a
PH
1002 if fdict.get('abr') is not None:
1003 res += u'@%3dk' % fdict['abr']
02dbf93f
PH
1004 if fdict.get('filesize') is not None:
1005 if res:
1006 res += u', '
1007 res += format_bytes(fdict['filesize'])
91c7271a
PH
1008 return res
1009
02dbf93f
PH
1010 def line(format, idlen=20):
1011 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
8c51aa65
JMF
1012 format['format_id'],
1013 format['ext'],
8c51aa65 1014 self.format_resolution(format),
91c7271a 1015 format_note(format),
02dbf93f 1016 ))
57dd9a8f 1017
94badb25 1018 formats = info_dict.get('formats', [info_dict])
02dbf93f
PH
1019 idlen = max(len(u'format code'),
1020 max(len(f['format_id']) for f in formats))
1021 formats_s = [line(f, idlen) for f in formats]
94badb25 1022 if len(formats) > 1:
b5349e87
PH
1023 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1024 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1025
1026 header_line = line({
1027 'format_id': u'format code', 'ext': u'extension',
02dbf93f 1028 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
57dd9a8f
PH
1029 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1030 (info_dict['id'], header_line, u"\n".join(formats_s)))
dca08720
PH
1031
1032 def urlopen(self, req):
1033 """ Start an HTTP download """
1034 return self._opener.open(req)
1035
1036 def print_debug_header(self):
1037 if not self.params.get('verbose'):
1038 return
1039 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1040 try:
1041 sp = subprocess.Popen(
1042 ['git', 'rev-parse', '--short', 'HEAD'],
1043 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1044 cwd=os.path.dirname(os.path.abspath(__file__)))
1045 out, err = sp.communicate()
1046 out = out.decode().strip()
1047 if re.match('[0-9a-f]+', out):
1048 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1049 except:
1050 try:
1051 sys.exc_clear()
1052 except:
1053 pass
1054 write_string(u'[debug] Python version %s - %s' %
1055 (platform.python_version(), platform_name()) + u'\n')
1056
1057 proxy_map = {}
1058 for handler in self._opener.handlers:
1059 if hasattr(handler, 'proxies'):
1060 proxy_map.update(handler.proxies)
1061 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1062
e344693b 1063 def _setup_opener(self):
6ad14cab
PH
1064 timeout_val = self.params.get('socket_timeout')
1065 timeout = 600 if timeout_val is None else float(timeout_val)
1066
dca08720
PH
1067 opts_cookiefile = self.params.get('cookiefile')
1068 opts_proxy = self.params.get('proxy')
1069
1070 if opts_cookiefile is None:
1071 self.cookiejar = compat_cookiejar.CookieJar()
1072 else:
1073 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1074 opts_cookiefile)
1075 if os.access(opts_cookiefile, os.R_OK):
1076 self.cookiejar.load()
1077
1078 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1079 self.cookiejar)
1080 if opts_proxy is not None:
1081 if opts_proxy == '':
1082 proxies = {}
1083 else:
1084 proxies = {'http': opts_proxy, 'https': opts_proxy}
1085 else:
1086 proxies = compat_urllib_request.getproxies()
1087 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1088 if 'http' in proxies and 'https' not in proxies:
1089 proxies['https'] = proxies['http']
1090 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1091 https_handler = make_HTTPS_handler(
1092 self.params.get('nocheckcertificate', False))
1093 opener = compat_urllib_request.build_opener(
1094 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1095 # Delete the default user-agent header, which would otherwise apply in
1096 # cases where our custom HTTP handler doesn't come into play
1097 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1098 opener.addheaders = []
1099 self._opener = opener
1100
1101 # TODO remove this global modification
1102 compat_urllib_request.install_opener(opener)
1103 socket.setdefaulttimeout(timeout)