]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Merge remote-tracking branch 'jaimeMF/yt-toplists'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de 7import io
8694c600 8import json
8222d8de 9import os
dca08720 10import platform
8222d8de
JMF
11import re
12import shutil
dca08720 13import subprocess
8222d8de
JMF
14import socket
15import sys
16import time
17import traceback
18
1e5b9a95
PH
19if os.name == 'nt':
20 import ctypes
21
ce02ed60 22from .utils import (
dca08720 23 compat_cookiejar,
ce02ed60
PH
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
02dbf93f 36 format_bytes,
ce02ed60 37 locked_file,
dca08720 38 make_HTTPS_handler,
ce02ed60
PH
39 MaxDownloadsReached,
40 PostProcessingError,
dca08720 41 platform_name,
ce02ed60
PH
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
dca08720 50 YoutubeDLHandler,
ce02ed60 51)
023fa8c4 52from .extractor import get_info_extractor, gen_extractors
8222d8de 53from .FileDownloader import FileDownloader
dca08720 54from .version import __version__
8222d8de
JMF
55
56
57class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
c6c19746 86 videopassword: Password for acces a video.
8222d8de
JMF
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
8694c600 96 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
8bf9319e 108 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
1fb07d10 112 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
b004821f 115 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 116 allsubtitles: Downloads all the subtitles of the video
0b7f3118 117 (requires writesubtitles or writeautomaticsub)
8222d8de 118 listsubtitles: Lists all available subtitles for the video
b98a6b2f 119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 120 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
c35f9e72 124 cachedir: Location of the cache files in the filesystem.
c3c88a26 125 None to disable filesystem cache.
47192f92 126 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
529a2e2c 129 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
130 Videos already present in the file are not downloaded
131 again.
dca08720 132 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8
PH
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
e344693b 135 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
136 bidi_workaround: Work around buggy terminals without bidirectional text
137 support, using fridibi
fe7e0c98 138
8222d8de
JMF
139 The following parameters are not used by YoutubeDL itself, they are used by
140 the FileDownloader:
141 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
142 noresizebuffer, retries, continuedl, noprogress, consoletitle
143 """
144
145 params = None
146 _ies = []
147 _pps = []
148 _download_retcode = None
149 _num_downloads = None
150 _screen_file = None
151
a3fb4675 152 def __init__(self, params=None):
8222d8de
JMF
153 """Create a FileDownloader object with the given options."""
154 self._ies = []
56c73665 155 self._ies_instances = {}
8222d8de
JMF
156 self._pps = []
157 self._progress_hooks = []
158 self._download_retcode = 0
159 self._num_downloads = 0
160 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 161 self._err_file = sys.stderr
a3fb4675 162 self.params = {} if params is None else params
34308b30 163
0783b09b
PH
164 # Pipe messsages through fribidi
165 if params.get('bidi_workaround', False):
166 # fribidi does not support ungetting, so force newlines
167 params['progress_with_newline'] = True
168
169 for fid in ['_screen_file', '_err_file']:
170 class FribidiOut(object):
171 def __init__(self, outfile, errfile):
172 self.outfile = outfile
173 self.process = subprocess.Popen(
174 ['fribidi'],
175 stdin=subprocess.PIPE,
176 stdout=outfile,
177 stderr=errfile)
178
179 def write(self, s):
180 res = self.process.stdin.write(s)
181 self.flush()
182 return res
183
184 def flush(self):
185 return self.process.stdin.flush()
186
187 def isatty(self):
188 return self.outfile.isatty()
189
190 try:
191 vout = FribidiOut(getattr(self, fid), self._err_file)
192 setattr(self, fid, vout)
193 except OSError as ose:
194 if ose.errno == 2:
195 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
196 break
197 else:
198 raise
199
34308b30
PH
200 if (sys.version_info >= (3,) and sys.platform != 'win32' and
201 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
202 and not params['restrictfilenames']):
203 # On Python 3, the Unicode filesystem API will throw errors (#1474)
204 self.report_warning(
1d368c75 205 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
206 u'cannot encode all charactes. '
207 u'Set the LC_ALL environment variable to fix this.')
4a98cdbf 208 self.params['restrictfilenames'] = True
34308b30 209
8222d8de
JMF
210 self.fd = FileDownloader(self, self.params)
211
a3927cf7 212 if '%(stitle)s' in self.params.get('outtmpl', ''):
8222d8de
JMF
213 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
214
dca08720
PH
215 self._setup_opener()
216
8222d8de
JMF
217 def add_info_extractor(self, ie):
218 """Add an InfoExtractor object to the end of the list."""
219 self._ies.append(ie)
56c73665 220 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
221 ie.set_downloader(self)
222
56c73665
JMF
223 def get_info_extractor(self, ie_key):
224 """
225 Get an instance of an IE with name ie_key, it will try to get one from
226 the _ies list, if there's no instance it will create a new one and add
227 it to the extractor list.
228 """
229 ie = self._ies_instances.get(ie_key)
230 if ie is None:
231 ie = get_info_extractor(ie_key)()
232 self.add_info_extractor(ie)
233 return ie
234
023fa8c4
JMF
235 def add_default_info_extractors(self):
236 """
237 Add the InfoExtractors returned by gen_extractors to the end of the list
238 """
239 for ie in gen_extractors():
240 self.add_info_extractor(ie)
241
8222d8de
JMF
242 def add_post_processor(self, pp):
243 """Add a PostProcessor object to the end of the chain."""
244 self._pps.append(pp)
245 pp.set_downloader(self)
246
247 def to_screen(self, message, skip_eol=False):
0783b09b
PH
248 """Print message to stdout if not in quiet mode."""
249 return self.to_stdout(message, skip_eol, check_quiet=True)
250
251 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 252 """Print message to stdout if not in quiet mode."""
8bf9319e 253 if self.params.get('logger'):
43afe285 254 self.params['logger'].debug(message)
0783b09b 255 elif not check_quiet or not self.params.get('quiet', False):
8222d8de
JMF
256 terminator = [u'\n', u''][skip_eol]
257 output = message + terminator
7459e3a2 258 write_string(output, self._screen_file)
8222d8de
JMF
259
260 def to_stderr(self, message):
261 """Print message to stderr."""
262 assert type(message) == type(u'')
8bf9319e 263 if self.params.get('logger'):
43afe285
IB
264 self.params['logger'].error(message)
265 else:
266 output = message + u'\n'
0783b09b 267 write_string(output, self._err_file)
8222d8de 268
1e5b9a95
PH
269 def to_console_title(self, message):
270 if not self.params.get('consoletitle', False):
271 return
272 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
273 # c_wchar_p() might not be necessary if `message` is
274 # already of type unicode()
275 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
276 elif 'TERM' in os.environ:
749febf4 277 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 278
bdde425c
PH
279 def save_console_title(self):
280 if not self.params.get('consoletitle', False):
281 return
282 if 'TERM' in os.environ:
efd6c574
JMF
283 # Save the title on stack
284 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
285
286 def restore_console_title(self):
287 if not self.params.get('consoletitle', False):
288 return
289 if 'TERM' in os.environ:
efd6c574
JMF
290 # Restore the title from stack
291 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
292
293 def __enter__(self):
294 self.save_console_title()
295 return self
296
297 def __exit__(self, *args):
298 self.restore_console_title()
dca08720
PH
299
300 if self.params.get('cookiefile') is not None:
301 self.cookiejar.save()
bdde425c 302
8222d8de
JMF
303 def trouble(self, message=None, tb=None):
304 """Determine action to take when a download problem appears.
305
306 Depending on if the downloader has been configured to ignore
307 download errors or not, this method may throw an exception or
308 not when errors are found, after printing the message.
309
310 tb, if given, is additional traceback information.
311 """
312 if message is not None:
313 self.to_stderr(message)
314 if self.params.get('verbose'):
315 if tb is None:
316 if sys.exc_info()[0]: # if .trouble has been called from an except block
317 tb = u''
318 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
319 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
320 tb += compat_str(traceback.format_exc())
321 else:
322 tb_data = traceback.format_list(traceback.extract_stack())
323 tb = u''.join(tb_data)
324 self.to_stderr(tb)
325 if not self.params.get('ignoreerrors', False):
326 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
327 exc_info = sys.exc_info()[1].exc_info
328 else:
329 exc_info = sys.exc_info()
330 raise DownloadError(message, exc_info)
331 self._download_retcode = 1
332
333 def report_warning(self, message):
334 '''
335 Print the message to stderr, it will be prefixed with 'WARNING:'
336 If stderr is a tty file the 'WARNING:' will be colored
337 '''
0783b09b 338 if self._err_file.isatty() and os.name != 'nt':
fe7e0c98 339 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 340 else:
fe7e0c98
JMF
341 _msg_header = u'WARNING:'
342 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
343 self.to_stderr(warning_message)
344
345 def report_error(self, message, tb=None):
346 '''
347 Do the same as trouble, but prefixes the message with 'ERROR:', colored
348 in red if stderr is a tty file.
349 '''
0783b09b 350 if self._err_file.isatty() and os.name != 'nt':
8222d8de
JMF
351 _msg_header = u'\033[0;31mERROR:\033[0m'
352 else:
353 _msg_header = u'ERROR:'
354 error_message = u'%s %s' % (_msg_header, message)
355 self.trouble(error_message, tb)
356
8222d8de
JMF
357 def report_writedescription(self, descfn):
358 """ Report that the description file is being written """
359 self.to_screen(u'[info] Writing video description to: ' + descfn)
360
361 def report_writesubtitles(self, sub_filename):
362 """ Report that the subtitles file is being written """
363 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
364
365 def report_writeinfojson(self, infofn):
366 """ Report that the metadata file has been written """
367 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
368
1fb07d10
JG
369 def report_writeannotations(self, annofn):
370 """ Report that the annotations file has been written. """
371 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
372
8222d8de
JMF
373 def report_file_already_downloaded(self, file_name):
374 """Report file has already been fully downloaded."""
375 try:
376 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 377 except UnicodeEncodeError:
8222d8de
JMF
378 self.to_screen(u'[download] The file has already been downloaded')
379
380 def increment_downloads(self):
381 """Increment the ordinal that assigns a number to each file."""
382 self._num_downloads += 1
383
384 def prepare_filename(self, info_dict):
385 """Generate the output filename."""
386 try:
387 template_dict = dict(info_dict)
388
389 template_dict['epoch'] = int(time.time())
390 autonumber_size = self.params.get('autonumber_size')
391 if autonumber_size is None:
392 autonumber_size = 5
393 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
394 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 395 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
396 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
397
586a91b6 398 sanitize = lambda k, v: sanitize_filename(
8222d8de
JMF
399 u'NA' if v is None else compat_str(v),
400 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
401 is_id=(k == u'id'))
402 template_dict = dict((k, sanitize(k, v))
403 for k, v in template_dict.items())
8222d8de 404
586a91b6
PH
405 tmpl = os.path.expanduser(self.params['outtmpl'])
406 filename = tmpl % template_dict
8222d8de
JMF
407 return filename
408 except KeyError as err:
409 self.report_error(u'Erroneous output template')
410 return None
411 except ValueError as err:
4efba05c 412 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
413 return None
414
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
417
7012b23c
PH
418 if 'title' in info_dict:
419 # This can happen when we're just evaluating the playlist
420 title = info_dict['title']
421 matchtitle = self.params.get('matchtitle', False)
422 if matchtitle:
423 if not re.search(matchtitle, title, re.IGNORECASE):
424 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
425 rejecttitle = self.params.get('rejecttitle', False)
426 if rejecttitle:
427 if re.search(rejecttitle, title, re.IGNORECASE):
428 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
429 date = info_dict.get('upload_date', None)
430 if date is not None:
431 dateRange = self.params.get('daterange', DateRange())
432 if date not in dateRange:
433 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
434 age_limit = self.params.get('age_limit')
435 if age_limit is not None:
cfadd183 436 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 437 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 438 if self.in_download_archive(info_dict):
7012b23c
PH
439 return (u'%s has already been recorded in archive'
440 % info_dict.get('title', info_dict.get('id', u'video')))
8222d8de 441 return None
fe7e0c98 442
b6c45014
JMF
443 @staticmethod
444 def add_extra_info(info_dict, extra_info):
445 '''Set the keys from extra_info in info dict if they are missing'''
446 for key, value in extra_info.items():
447 info_dict.setdefault(key, value)
448
7fc3fa05
PH
449 def extract_info(self, url, download=True, ie_key=None, extra_info={},
450 process=True):
8222d8de
JMF
451 '''
452 Returns a list with a dictionary for each video we find.
453 If 'download', also downloads the videos.
454 extra_info is a dict containing the extra values to add to each result
455 '''
fe7e0c98 456
8222d8de 457 if ie_key:
56c73665 458 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
459 else:
460 ies = self._ies
461
462 for ie in ies:
463 if not ie.suitable(url):
464 continue
465
466 if not ie.working():
467 self.report_warning(u'The program functionality for this site has been marked as broken, '
468 u'and will probably not work.')
469
470 try:
471 ie_result = ie.extract(url)
472 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
473 break
474 if isinstance(ie_result, list):
475 # Backwards compatibility: old IE result format
8222d8de
JMF
476 ie_result = {
477 '_type': 'compat_list',
478 'entries': ie_result,
479 }
9103bbc5
JMF
480 self.add_extra_info(ie_result,
481 {
482 'extractor': ie.IE_NAME,
be97abc2
JMF
483 'webpage_url': url,
484 'extractor_key': ie.ie_key(),
9103bbc5 485 })
7fc3fa05
PH
486 if process:
487 return self.process_ie_result(ie_result, download, extra_info)
488 else:
489 return ie_result
8222d8de
JMF
490 except ExtractorError as de: # An error we somewhat expected
491 self.report_error(compat_str(de), de.format_traceback())
492 break
493 except Exception as e:
494 if self.params.get('ignoreerrors', False):
495 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
496 break
497 else:
498 raise
499 else:
500 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 501
8222d8de
JMF
502 def process_ie_result(self, ie_result, download=True, extra_info={}):
503 """
504 Take the result of the ie(may be modified) and resolve all unresolved
505 references (URLs, playlist items).
506
507 It will also download the videos if 'download'.
508 Returns the resolved ie_result.
509 """
510
511 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
512 if result_type == 'video':
b6c45014 513 self.add_extra_info(ie_result, extra_info)
feee2ecf 514 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
515 elif result_type == 'url':
516 # We have to add extra_info to the results because it may be
517 # contained in a playlist
518 return self.extract_info(ie_result['url'],
519 download,
520 ie_key=ie_result.get('ie_key'),
521 extra_info=extra_info)
7fc3fa05
PH
522 elif result_type == 'url_transparent':
523 # Use the information from the embedding page
524 info = self.extract_info(
525 ie_result['url'], ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info, download=False, process=False)
527
528 def make_result(embedded_info):
529 new_result = ie_result.copy()
530 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
531 'entries', 'urlhandle', 'ie_key', 'duration',
ef4fd848
PH
532 'subtitles', 'annotations', 'format',
533 'thumbnail', 'thumbnails'):
7fc3fa05
PH
534 if f in new_result:
535 del new_result[f]
536 if f in embedded_info:
537 new_result[f] = embedded_info[f]
538 return new_result
539 new_result = make_result(info)
540
541 assert new_result.get('_type') != 'url_transparent'
542 if new_result.get('_type') == 'compat_list':
543 new_result['entries'] = [
544 make_result(e) for e in new_result['entries']]
545
546 return self.process_ie_result(
547 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
548 elif result_type == 'playlist':
549 # We process each entry in the playlist
550 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 551 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
552
553 playlist_results = []
554
555 n_all_entries = len(ie_result['entries'])
556 playliststart = self.params.get('playliststart', 1) - 1
557 playlistend = self.params.get('playlistend', -1)
558
559 if playlistend == -1:
560 entries = ie_result['entries'][playliststart:]
561 else:
562 entries = ie_result['entries'][playliststart:playlistend]
563
564 n_entries = len(entries)
565
566 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
567 (ie_result['extractor'], playlist, n_all_entries, n_entries))
568
fe7e0c98
JMF
569 for i, entry in enumerate(entries, 1):
570 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 571 extra = {
fe7e0c98
JMF
572 'playlist': playlist,
573 'playlist_index': i + playliststart,
b6c45014 574 'extractor': ie_result['extractor'],
9103bbc5 575 'webpage_url': ie_result['webpage_url'],
be97abc2 576 'extractor_key': ie_result['extractor_key'],
fe7e0c98 577 }
7012b23c
PH
578
579 reason = self._match_entry(entry)
580 if reason is not None:
581 self.to_screen(u'[download] ' + reason)
582 continue
583
8222d8de
JMF
584 entry_result = self.process_ie_result(entry,
585 download=download,
586 extra_info=extra)
587 playlist_results.append(entry_result)
588 ie_result['entries'] = playlist_results
589 return ie_result
590 elif result_type == 'compat_list':
591 def _fixup(r):
b6c45014 592 self.add_extra_info(r,
9103bbc5
JMF
593 {
594 'extractor': ie_result['extractor'],
595 'webpage_url': ie_result['webpage_url'],
be97abc2 596 'extractor_key': ie_result['extractor_key'],
9103bbc5 597 })
8222d8de
JMF
598 return r
599 ie_result['entries'] = [
b6c45014 600 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
601 for r in ie_result['entries']
602 ]
603 return ie_result
604 else:
605 raise Exception('Invalid result type: %s' % result_type)
606
a9c58ad9
JMF
607 def select_format(self, format_spec, available_formats):
608 if format_spec == 'best' or format_spec is None:
609 return available_formats[-1]
610 elif format_spec == 'worst':
611 return available_formats[0]
612 else:
49e86983
JMF
613 extensions = [u'mp4', u'flv', u'webm', u'3gp']
614 if format_spec in extensions:
615 filter_f = lambda f: f['ext'] == format_spec
616 else:
617 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 618 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
619 if matches:
620 return matches[-1]
621 return None
622
dd82ffea
JMF
623 def process_video_result(self, info_dict, download=True):
624 assert info_dict.get('_type', 'video') == 'video'
625
626 if 'playlist' not in info_dict:
627 # It isn't part of a playlist
628 info_dict['playlist'] = None
629 info_dict['playlist_index'] = None
630
6ff000b8 631 # This extractors handle format selection themselves
a7685f3b 632 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
633 if download:
634 self.process_info(info_dict)
6ff000b8
JMF
635 return info_dict
636
dd82ffea
JMF
637 # We now pick which formats have to be downloaded
638 if info_dict.get('formats') is None:
639 # There's only one format available
640 formats = [info_dict]
641 else:
642 formats = info_dict['formats']
643
644 # We check that all the formats have the format and format_id fields
645 for (i, format) in enumerate(formats):
dd82ffea 646 if format.get('format_id') is None:
8016c922 647 format['format_id'] = compat_str(i)
8c51aa65
JMF
648 if format.get('format') is None:
649 format['format'] = u'{id} - {res}{note}'.format(
650 id=format['format_id'],
651 res=self.format_resolution(format),
71934988 652 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 653 )
c1002e96
PH
654 # Automatically determine file extension if missing
655 if 'ext' not in format:
656 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
657
658 if self.params.get('listformats', None):
659 self.list_formats(info_dict)
660 return
661
99e206d5
JMF
662 format_limit = self.params.get('format_limit', None)
663 if format_limit:
f4d96df0
PH
664 formats = list(takewhile_inclusive(
665 lambda f: f['format_id'] != format_limit, formats
666 ))
e028d0d1
JMF
667 if self.params.get('prefer_free_formats'):
668 def _free_formats_key(f):
669 try:
670 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
671 except ValueError:
672 ext_ord = -1
673 # We only compare the extension if they have the same height and width
674 return (f.get('height'), f.get('width'), ext_ord)
675 formats = sorted(formats, key=_free_formats_key)
99e206d5 676
dd82ffea 677 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
678 if req_format is None:
679 req_format = 'best'
dd82ffea 680 formats_to_download = []
dd82ffea 681 # The -1 is for supporting YoutubeIE
a9c58ad9 682 if req_format in ('-1', 'all'):
dd82ffea
JMF
683 formats_to_download = formats
684 else:
a9c58ad9 685 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 686 # the first that is available, starting from left
dd82ffea
JMF
687 req_formats = req_format.split('/')
688 for rf in req_formats:
a9c58ad9
JMF
689 selected_format = self.select_format(rf, formats)
690 if selected_format is not None:
691 formats_to_download = [selected_format]
dd82ffea
JMF
692 break
693 if not formats_to_download:
78a3a9f8
PH
694 raise ExtractorError(u'requested format not available',
695 expected=True)
dd82ffea
JMF
696
697 if download:
698 if len(formats_to_download) > 1:
699 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
700 for format in formats_to_download:
701 new_info = dict(info_dict)
702 new_info.update(format)
703 self.process_info(new_info)
704 # We update the info dict with the best quality format (backwards compatibility)
705 info_dict.update(formats_to_download[-1])
706 return info_dict
707
8222d8de
JMF
708 def process_info(self, info_dict):
709 """Process a single resolved IE result."""
710
711 assert info_dict.get('_type', 'video') == 'video'
712 #We increment the download the download count here to match the previous behaviour.
713 self.increment_downloads()
714
715 info_dict['fulltitle'] = info_dict['title']
716 if len(info_dict['title']) > 200:
717 info_dict['title'] = info_dict['title'][:197] + u'...'
718
719 # Keep for backwards compatibility
720 info_dict['stitle'] = info_dict['title']
721
722 if not 'format' in info_dict:
723 info_dict['format'] = info_dict['ext']
724
725 reason = self._match_entry(info_dict)
726 if reason is not None:
727 self.to_screen(u'[download] ' + reason)
728 return
729
730 max_downloads = self.params.get('max_downloads')
731 if max_downloads is not None:
732 if self._num_downloads > int(max_downloads):
733 raise MaxDownloadsReached()
734
735 filename = self.prepare_filename(info_dict)
736
737 # Forced printings
738 if self.params.get('forcetitle', False):
0783b09b 739 self.to_stdout(info_dict['fulltitle'])
8222d8de 740 if self.params.get('forceid', False):
0783b09b 741 self.to_stdout(info_dict['id'])
8222d8de 742 if self.params.get('forceurl', False):
edde6c56 743 # For RTMP URLs, also include the playpath
0783b09b 744 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 745 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 746 self.to_stdout(info_dict['thumbnail'])
216d71d0 747 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 748 self.to_stdout(info_dict['description'])
8222d8de 749 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 750 self.to_stdout(filename)
8222d8de 751 if self.params.get('forceformat', False):
0783b09b 752 self.to_stdout(info_dict['format'])
9d153818 753 if self.params.get('forcejson', False):
a0d96c98 754 info_dict['_filename'] = filename
0783b09b 755 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
756
757 # Do nothing else if in simulate mode
758 if self.params.get('simulate', False):
759 return
760
761 if filename is None:
762 return
763
764 try:
765 dn = os.path.dirname(encodeFilename(filename))
766 if dn != '' and not os.path.exists(dn):
767 os.makedirs(dn)
768 except (OSError, IOError) as err:
769 self.report_error(u'unable to create directory ' + compat_str(err))
770 return
771
772 if self.params.get('writedescription', False):
773 try:
774 descfn = filename + u'.description'
775 self.report_writedescription(descfn)
776 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
777 descfile.write(info_dict['description'])
b3f0e530 778 except (KeyError, TypeError):
535f59bb 779 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
780 except (OSError, IOError):
781 self.report_error(u'Cannot write description file ' + descfn)
782 return
783
1fb07d10
JG
784 if self.params.get('writeannotations', False):
785 try:
fe7e0c98
JMF
786 annofn = filename + u'.annotations.xml'
787 self.report_writeannotations(annofn)
788 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
789 annofile.write(info_dict['annotations'])
1fb07d10
JG
790 except (KeyError, TypeError):
791 self.report_warning(u'There are no annotations to write.')
792 except (OSError, IOError):
fe7e0c98
JMF
793 self.report_error(u'Cannot write annotations file: ' + annofn)
794 return
1fb07d10 795
c4a91be7 796 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 797 self.params.get('writeautomaticsub')])
c4a91be7 798
fe7e0c98 799 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
800 # subtitles download errors are already managed as troubles in relevant IE
801 # that way it will silently go on when used with unsupporting IE
8222d8de 802 subtitles = info_dict['subtitles']
ca715127 803 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
804 for sub_lang in subtitles.keys():
805 sub = subtitles[sub_lang]
6804038d
JMF
806 if sub is None:
807 continue
8222d8de 808 try:
d4051a8e 809 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
810 self.report_writesubtitles(sub_filename)
811 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 812 subfile.write(sub)
8222d8de
JMF
813 except (OSError, IOError):
814 self.report_error(u'Cannot write subtitles file ' + descfn)
815 return
816
8222d8de 817 if self.params.get('writeinfojson', False):
9771cceb 818 infofn = os.path.splitext(filename)[0] + u'.info.json'
8222d8de
JMF
819 self.report_writeinfojson(infofn)
820 try:
fe7e0c98 821 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
822 write_json_file(json_info_dict, encodeFilename(infofn))
823 except (OSError, IOError):
824 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
825 return
826
827 if self.params.get('writethumbnail', False):
d8269e1d 828 if info_dict.get('thumbnail') is not None:
cbdbb766 829 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
830 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
831 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
832 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
833 try:
834 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
835 with open(thumb_filename, 'wb') as thumbf:
836 shutil.copyfileobj(uf, thumbf)
837 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
838 (info_dict['extractor'], info_dict['id'], thumb_filename))
839 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
840 self.report_warning(u'Unable to download thumbnail "%s": %s' %
841 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
842
843 if not self.params.get('skip_download', False):
844 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
845 success = True
846 else:
847 try:
848 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
849 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
850 self.report_error(u'unable to download video data: %s' % str(err))
851 return
c40c6aaa
JMF
852 except (OSError, IOError) as err:
853 raise UnavailableVideoError(err)
8222d8de
JMF
854 except (ContentTooShortError, ) as err:
855 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
856 return
857
858 if success:
859 try:
860 self.post_process(filename, info_dict)
861 except (PostProcessingError) as err:
862 self.report_error(u'postprocessing: %s' % str(err))
863 return
864
c1c9a79c
PH
865 self.record_download_archive(info_dict)
866
8222d8de
JMF
867 def download(self, url_list):
868 """Download a given list of URLs."""
0c75c3fa
PH
869 if (len(url_list) > 1 and
870 '%' not in self.params['outtmpl']
871 and self.params.get('max_downloads') != 1):
8222d8de
JMF
872 raise SameFileError(self.params['outtmpl'])
873
874 for url in url_list:
875 try:
876 #It also downloads the videos
dca08720 877 self.extract_info(url)
8222d8de
JMF
878 except UnavailableVideoError:
879 self.report_error(u'unable to download video')
880 except MaxDownloadsReached:
881 self.to_screen(u'[info] Maximum number of downloaded files reached.')
882 raise
883
884 return self._download_retcode
885
886 def post_process(self, filename, ie_info):
887 """Run all the postprocessors on the given file."""
888 info = dict(ie_info)
889 info['filepath'] = filename
890 keep_video = None
891 for pp in self._pps:
892 try:
fe7e0c98 893 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
894 if keep_video_wish is not None:
895 if keep_video_wish:
896 keep_video = keep_video_wish
897 elif keep_video is None:
898 # No clear decision yet, let IE decide
899 keep_video = keep_video_wish
900 except PostProcessingError as e:
bbcbf4d4 901 self.report_error(e.msg)
8222d8de
JMF
902 if keep_video is False and not self.params.get('keepvideo', False):
903 try:
904 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
905 os.remove(encodeFilename(filename))
906 except (IOError, OSError):
907 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c 908
5db07df6
PH
909 def _make_archive_id(self, info_dict):
910 # Future-proof against any change in case
911 # and backwards compatibility with prior versions
d31209a1 912 extractor = info_dict.get('extractor_key')
7012b23c
PH
913 if extractor is None:
914 if 'id' in info_dict:
915 extractor = info_dict.get('ie_key') # key in a playlist
916 if extractor is None:
5db07df6
PH
917 return None # Incomplete video information
918 return extractor.lower() + u' ' + info_dict['id']
919
920 def in_download_archive(self, info_dict):
921 fn = self.params.get('download_archive')
922 if fn is None:
923 return False
924
925 vid_id = self._make_archive_id(info_dict)
926 if vid_id is None:
7012b23c 927 return False # Incomplete video information
5db07df6 928
c1c9a79c
PH
929 try:
930 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
931 for line in archive_file:
932 if line.strip() == vid_id:
933 return True
934 except IOError as ioe:
935 if ioe.errno != errno.ENOENT:
936 raise
937 return False
938
939 def record_download_archive(self, info_dict):
940 fn = self.params.get('download_archive')
941 if fn is None:
942 return
5db07df6
PH
943 vid_id = self._make_archive_id(info_dict)
944 assert vid_id
c1c9a79c
PH
945 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
946 archive_file.write(vid_id + u'\n')
dd82ffea 947
8c51aa65 948 @staticmethod
8abeeb94 949 def format_resolution(format, default='unknown'):
fb04e403
PH
950 if format.get('vcodec') == 'none':
951 return 'audio only'
57dd9a8f
PH
952 if format.get('_resolution') is not None:
953 return format['_resolution']
8c51aa65
JMF
954 if format.get('height') is not None:
955 if format.get('width') is not None:
956 res = u'%sx%s' % (format['width'], format['height'])
957 else:
958 res = u'%sp' % format['height']
959 else:
8abeeb94 960 res = default
8c51aa65
JMF
961 return res
962
dd82ffea 963 def list_formats(self, info_dict):
91c7271a 964 def format_note(fdict):
91c7271a 965 res = u''
02dbf93f
PH
966 if fdict.get('format_note') is not None:
967 res += fdict['format_note'] + u' '
fb04e403
PH
968 if (fdict.get('vcodec') is not None and
969 fdict.get('vcodec') != 'none'):
7150858d
PH
970 res += u'%-5s' % fdict['vcodec']
971 elif fdict.get('vbr') is not None:
972 res += u'video'
91c7271a
PH
973 if fdict.get('vbr') is not None:
974 res += u'@%4dk' % fdict['vbr']
975 if fdict.get('acodec') is not None:
976 if res:
977 res += u', '
7150858d
PH
978 res += u'%-5s' % fdict['acodec']
979 elif fdict.get('abr') is not None:
980 if res:
981 res += u', '
982 res += 'audio'
91c7271a
PH
983 if fdict.get('abr') is not None:
984 res += u'@%3dk' % fdict['abr']
02dbf93f
PH
985 if fdict.get('filesize') is not None:
986 if res:
987 res += u', '
988 res += format_bytes(fdict['filesize'])
91c7271a
PH
989 return res
990
02dbf93f
PH
991 def line(format, idlen=20):
992 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
8c51aa65
JMF
993 format['format_id'],
994 format['ext'],
8c51aa65 995 self.format_resolution(format),
91c7271a 996 format_note(format),
02dbf93f 997 ))
57dd9a8f 998
94badb25 999 formats = info_dict.get('formats', [info_dict])
02dbf93f
PH
1000 idlen = max(len(u'format code'),
1001 max(len(f['format_id']) for f in formats))
1002 formats_s = [line(f, idlen) for f in formats]
94badb25 1003 if len(formats) > 1:
b5349e87
PH
1004 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1005 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1006
1007 header_line = line({
1008 'format_id': u'format code', 'ext': u'extension',
02dbf93f 1009 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
57dd9a8f
PH
1010 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1011 (info_dict['id'], header_line, u"\n".join(formats_s)))
dca08720
PH
1012
1013 def urlopen(self, req):
1014 """ Start an HTTP download """
1015 return self._opener.open(req)
1016
1017 def print_debug_header(self):
1018 if not self.params.get('verbose'):
1019 return
1020 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1021 try:
1022 sp = subprocess.Popen(
1023 ['git', 'rev-parse', '--short', 'HEAD'],
1024 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1025 cwd=os.path.dirname(os.path.abspath(__file__)))
1026 out, err = sp.communicate()
1027 out = out.decode().strip()
1028 if re.match('[0-9a-f]+', out):
1029 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1030 except:
1031 try:
1032 sys.exc_clear()
1033 except:
1034 pass
1035 write_string(u'[debug] Python version %s - %s' %
1036 (platform.python_version(), platform_name()) + u'\n')
1037
1038 proxy_map = {}
1039 for handler in self._opener.handlers:
1040 if hasattr(handler, 'proxies'):
1041 proxy_map.update(handler.proxies)
1042 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1043
e344693b 1044 def _setup_opener(self):
6ad14cab
PH
1045 timeout_val = self.params.get('socket_timeout')
1046 timeout = 600 if timeout_val is None else float(timeout_val)
1047
dca08720
PH
1048 opts_cookiefile = self.params.get('cookiefile')
1049 opts_proxy = self.params.get('proxy')
1050
1051 if opts_cookiefile is None:
1052 self.cookiejar = compat_cookiejar.CookieJar()
1053 else:
1054 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1055 opts_cookiefile)
1056 if os.access(opts_cookiefile, os.R_OK):
1057 self.cookiejar.load()
1058
1059 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1060 self.cookiejar)
1061 if opts_proxy is not None:
1062 if opts_proxy == '':
1063 proxies = {}
1064 else:
1065 proxies = {'http': opts_proxy, 'https': opts_proxy}
1066 else:
1067 proxies = compat_urllib_request.getproxies()
1068 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1069 if 'http' in proxies and 'https' not in proxies:
1070 proxies['https'] = proxies['http']
1071 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1072 https_handler = make_HTTPS_handler(
1073 self.params.get('nocheckcertificate', False))
1074 opener = compat_urllib_request.build_opener(
1075 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1076 # Delete the default user-agent header, which would otherwise apply in
1077 # cases where our custom HTTP handler doesn't come into play
1078 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1079 opener.addheaders = []
1080 self._opener = opener
1081
1082 # TODO remove this global modification
1083 compat_urllib_request.install_opener(opener)
1084 socket.setdefaulttimeout(timeout)