]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Move the opener to the YoutubeDL object.
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de 7import io
8694c600 8import json
8222d8de 9import os
dca08720 10import platform
8222d8de
JMF
11import re
12import shutil
dca08720 13import subprocess
8222d8de
JMF
14import socket
15import sys
16import time
17import traceback
18
1e5b9a95
PH
19if os.name == 'nt':
20 import ctypes
21
ce02ed60 22from .utils import (
dca08720 23 compat_cookiejar,
ce02ed60
PH
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 locked_file,
dca08720 37 make_HTTPS_handler,
ce02ed60
PH
38 MaxDownloadsReached,
39 PostProcessingError,
dca08720 40 platform_name,
ce02ed60
PH
41 preferredencoding,
42 SameFileError,
43 sanitize_filename,
44 subtitles_filename,
45 takewhile_inclusive,
46 UnavailableVideoError,
47 write_json_file,
48 write_string,
dca08720 49 YoutubeDLHandler,
ce02ed60 50)
023fa8c4 51from .extractor import get_info_extractor, gen_extractors
8222d8de 52from .FileDownloader import FileDownloader
dca08720 53from .version import __version__
8222d8de
JMF
54
55
56class YoutubeDL(object):
57 """YoutubeDL class.
58
59 YoutubeDL objects are the ones responsible of downloading the
60 actual video file and writing it to disk if the user has requested
61 it, among some other tasks. In most cases there should be one per
62 program. As, given a video URL, the downloader doesn't know how to
63 extract all the needed information, task that InfoExtractors do, it
64 has to pass the URL to one of them.
65
66 For this, YoutubeDL objects have a method that allows
67 InfoExtractors to be registered in a given order. When it is passed
68 a URL, the YoutubeDL object handles it to the first InfoExtractor it
69 finds that reports being able to handle it. The InfoExtractor extracts
70 all the information about the video or videos the URL refers to, and
71 YoutubeDL process the extracted information, possibly using a File
72 Downloader to download the video.
73
74 YoutubeDL objects accept a lot of parameters. In order not to saturate
75 the object constructor with arguments, it receives a dictionary of
76 options instead. These options are available through the params
77 attribute for the InfoExtractors to use. The YoutubeDL also
78 registers itself as the downloader in charge for the InfoExtractors
79 that are added to it, so this is a "mutual registration".
80
81 Available options:
82
83 username: Username for authentication purposes.
84 password: Password for authentication purposes.
c6c19746 85 videopassword: Password for acces a video.
8222d8de
JMF
86 usenetrc: Use netrc for authentication instead.
87 verbose: Print additional info to stdout.
88 quiet: Do not print messages to stdout.
89 forceurl: Force printing final URL.
90 forcetitle: Force printing title.
91 forceid: Force printing ID.
92 forcethumbnail: Force printing thumbnail URL.
93 forcedescription: Force printing description.
94 forcefilename: Force printing final filename.
8694c600 95 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
96 simulate: Do not download the video files.
97 format: Video format code.
98 format_limit: Highest quality format to try.
99 outtmpl: Template for output names.
100 restrictfilenames: Do not allow "&" and spaces in file names
101 ignoreerrors: Do not stop on download errors.
102 nooverwrites: Prevent overwriting files.
103 playliststart: Playlist item to start at.
104 playlistend: Playlist item to end at.
105 matchtitle: Download only matching titles.
106 rejecttitle: Reject downloads for matching titles.
107 logtostderr: Log messages to stderr instead of stdout.
108 writedescription: Write the video description to a .description file
109 writeinfojson: Write the video description to a .info.json file
1fb07d10 110 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
111 writethumbnail: Write the thumbnail image to a file
112 writesubtitles: Write the video subtitles to a file
b004821f 113 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 114 allsubtitles: Downloads all the subtitles of the video
0b7f3118 115 (requires writesubtitles or writeautomaticsub)
8222d8de 116 listsubtitles: Lists all available subtitles for the video
b98a6b2f 117 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 118 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
119 keepvideo: Keep the video file after post-processing
120 daterange: A DateRange object, download only if the upload_date is in the range.
121 skip_download: Skip the actual download of the video file
c35f9e72 122 cachedir: Location of the cache files in the filesystem.
c3c88a26 123 None to disable filesystem cache.
47192f92 124 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
125 age_limit: An integer representing the user's age in years.
126 Unsuitable videos for the given age are skipped.
c1c9a79c
PH
127 downloadarchive: File name of a file where all downloads are recorded.
128 Videos already present in the file are not downloaded
129 again.
dca08720
PH
130 cookiefile: File name where cookies should be read from and dumped to.
131 nocheckcertificate Do not verify SSL certificates
fe7e0c98 132
8222d8de
JMF
133 The following parameters are not used by YoutubeDL itself, they are used by
134 the FileDownloader:
135 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
136 noresizebuffer, retries, continuedl, noprogress, consoletitle
137 """
138
139 params = None
140 _ies = []
141 _pps = []
142 _download_retcode = None
143 _num_downloads = None
144 _screen_file = None
145
146 def __init__(self, params):
147 """Create a FileDownloader object with the given options."""
148 self._ies = []
56c73665 149 self._ies_instances = {}
8222d8de
JMF
150 self._pps = []
151 self._progress_hooks = []
152 self._download_retcode = 0
153 self._num_downloads = 0
154 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
34308b30
PH
155
156 if (sys.version_info >= (3,) and sys.platform != 'win32' and
157 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
158 and not params['restrictfilenames']):
159 # On Python 3, the Unicode filesystem API will throw errors (#1474)
160 self.report_warning(
1d368c75 161 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
162 u'cannot encode all charactes. '
163 u'Set the LC_ALL environment variable to fix this.')
164 params['restrictfilenames'] = True
165
8222d8de
JMF
166 self.params = params
167 self.fd = FileDownloader(self, self.params)
168
169 if '%(stitle)s' in self.params['outtmpl']:
170 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
171
dca08720
PH
172 self._setup_opener()
173
8222d8de
JMF
174 def add_info_extractor(self, ie):
175 """Add an InfoExtractor object to the end of the list."""
176 self._ies.append(ie)
56c73665 177 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
178 ie.set_downloader(self)
179
56c73665
JMF
180 def get_info_extractor(self, ie_key):
181 """
182 Get an instance of an IE with name ie_key, it will try to get one from
183 the _ies list, if there's no instance it will create a new one and add
184 it to the extractor list.
185 """
186 ie = self._ies_instances.get(ie_key)
187 if ie is None:
188 ie = get_info_extractor(ie_key)()
189 self.add_info_extractor(ie)
190 return ie
191
023fa8c4
JMF
192 def add_default_info_extractors(self):
193 """
194 Add the InfoExtractors returned by gen_extractors to the end of the list
195 """
196 for ie in gen_extractors():
197 self.add_info_extractor(ie)
198
8222d8de
JMF
199 def add_post_processor(self, pp):
200 """Add a PostProcessor object to the end of the chain."""
201 self._pps.append(pp)
202 pp.set_downloader(self)
203
204 def to_screen(self, message, skip_eol=False):
205 """Print message to stdout if not in quiet mode."""
8222d8de
JMF
206 if not self.params.get('quiet', False):
207 terminator = [u'\n', u''][skip_eol]
208 output = message + terminator
7459e3a2 209 write_string(output, self._screen_file)
8222d8de
JMF
210
211 def to_stderr(self, message):
212 """Print message to stderr."""
213 assert type(message) == type(u'')
214 output = message + u'\n'
215 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
216 output = output.encode(preferredencoding())
217 sys.stderr.write(output)
218
1e5b9a95
PH
219 def to_console_title(self, message):
220 if not self.params.get('consoletitle', False):
221 return
222 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
223 # c_wchar_p() might not be necessary if `message` is
224 # already of type unicode()
225 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
226 elif 'TERM' in os.environ:
749febf4 227 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 228
bdde425c
PH
229 def save_console_title(self):
230 if not self.params.get('consoletitle', False):
231 return
232 if 'TERM' in os.environ:
efd6c574
JMF
233 # Save the title on stack
234 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
235
236 def restore_console_title(self):
237 if not self.params.get('consoletitle', False):
238 return
239 if 'TERM' in os.environ:
efd6c574
JMF
240 # Restore the title from stack
241 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
242
243 def __enter__(self):
244 self.save_console_title()
245 return self
246
247 def __exit__(self, *args):
248 self.restore_console_title()
dca08720
PH
249
250 if self.params.get('cookiefile') is not None:
251 self.cookiejar.save()
bdde425c 252
8222d8de
JMF
253 def fixed_template(self):
254 """Checks if the output template is fixed."""
255 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
256
257 def trouble(self, message=None, tb=None):
258 """Determine action to take when a download problem appears.
259
260 Depending on if the downloader has been configured to ignore
261 download errors or not, this method may throw an exception or
262 not when errors are found, after printing the message.
263
264 tb, if given, is additional traceback information.
265 """
266 if message is not None:
267 self.to_stderr(message)
268 if self.params.get('verbose'):
269 if tb is None:
270 if sys.exc_info()[0]: # if .trouble has been called from an except block
271 tb = u''
272 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
273 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
274 tb += compat_str(traceback.format_exc())
275 else:
276 tb_data = traceback.format_list(traceback.extract_stack())
277 tb = u''.join(tb_data)
278 self.to_stderr(tb)
279 if not self.params.get('ignoreerrors', False):
280 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
281 exc_info = sys.exc_info()[1].exc_info
282 else:
283 exc_info = sys.exc_info()
284 raise DownloadError(message, exc_info)
285 self._download_retcode = 1
286
287 def report_warning(self, message):
288 '''
289 Print the message to stderr, it will be prefixed with 'WARNING:'
290 If stderr is a tty file the 'WARNING:' will be colored
291 '''
292 if sys.stderr.isatty() and os.name != 'nt':
fe7e0c98 293 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 294 else:
fe7e0c98
JMF
295 _msg_header = u'WARNING:'
296 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
297 self.to_stderr(warning_message)
298
299 def report_error(self, message, tb=None):
300 '''
301 Do the same as trouble, but prefixes the message with 'ERROR:', colored
302 in red if stderr is a tty file.
303 '''
304 if sys.stderr.isatty() and os.name != 'nt':
305 _msg_header = u'\033[0;31mERROR:\033[0m'
306 else:
307 _msg_header = u'ERROR:'
308 error_message = u'%s %s' % (_msg_header, message)
309 self.trouble(error_message, tb)
310
8222d8de
JMF
311 def report_writedescription(self, descfn):
312 """ Report that the description file is being written """
313 self.to_screen(u'[info] Writing video description to: ' + descfn)
314
315 def report_writesubtitles(self, sub_filename):
316 """ Report that the subtitles file is being written """
317 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
318
319 def report_writeinfojson(self, infofn):
320 """ Report that the metadata file has been written """
321 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
322
1fb07d10
JG
323 def report_writeannotations(self, annofn):
324 """ Report that the annotations file has been written. """
325 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
326
8222d8de
JMF
327 def report_file_already_downloaded(self, file_name):
328 """Report file has already been fully downloaded."""
329 try:
330 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 331 except UnicodeEncodeError:
8222d8de
JMF
332 self.to_screen(u'[download] The file has already been downloaded')
333
334 def increment_downloads(self):
335 """Increment the ordinal that assigns a number to each file."""
336 self._num_downloads += 1
337
338 def prepare_filename(self, info_dict):
339 """Generate the output filename."""
340 try:
341 template_dict = dict(info_dict)
342
343 template_dict['epoch'] = int(time.time())
344 autonumber_size = self.params.get('autonumber_size')
345 if autonumber_size is None:
346 autonumber_size = 5
347 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
348 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 349 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
350 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
351
586a91b6 352 sanitize = lambda k, v: sanitize_filename(
8222d8de
JMF
353 u'NA' if v is None else compat_str(v),
354 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
355 is_id=(k == u'id'))
356 template_dict = dict((k, sanitize(k, v))
357 for k, v in template_dict.items())
8222d8de 358
586a91b6
PH
359 tmpl = os.path.expanduser(self.params['outtmpl'])
360 filename = tmpl % template_dict
8222d8de
JMF
361 return filename
362 except KeyError as err:
363 self.report_error(u'Erroneous output template')
364 return None
365 except ValueError as err:
4efba05c 366 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
367 return None
368
369 def _match_entry(self, info_dict):
370 """ Returns None iff the file should be downloaded """
371
372 title = info_dict['title']
373 matchtitle = self.params.get('matchtitle', False)
374 if matchtitle:
375 if not re.search(matchtitle, title, re.IGNORECASE):
376 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
377 rejecttitle = self.params.get('rejecttitle', False)
378 if rejecttitle:
379 if re.search(rejecttitle, title, re.IGNORECASE):
380 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
381 date = info_dict.get('upload_date', None)
382 if date is not None:
383 dateRange = self.params.get('daterange', DateRange())
384 if date not in dateRange:
385 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
386 age_limit = self.params.get('age_limit')
387 if age_limit is not None:
cfadd183 388 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 389 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 390 if self.in_download_archive(info_dict):
ee6c9f95 391 return (u'%(title)s has already been recorded in archive'
c1c9a79c 392 % info_dict)
8222d8de 393 return None
fe7e0c98 394
b6c45014
JMF
395 @staticmethod
396 def add_extra_info(info_dict, extra_info):
397 '''Set the keys from extra_info in info dict if they are missing'''
398 for key, value in extra_info.items():
399 info_dict.setdefault(key, value)
400
8222d8de
JMF
401 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
402 '''
403 Returns a list with a dictionary for each video we find.
404 If 'download', also downloads the videos.
405 extra_info is a dict containing the extra values to add to each result
406 '''
fe7e0c98 407
8222d8de 408 if ie_key:
56c73665 409 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
410 else:
411 ies = self._ies
412
413 for ie in ies:
414 if not ie.suitable(url):
415 continue
416
417 if not ie.working():
418 self.report_warning(u'The program functionality for this site has been marked as broken, '
419 u'and will probably not work.')
420
421 try:
422 ie_result = ie.extract(url)
423 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
424 break
425 if isinstance(ie_result, list):
426 # Backwards compatibility: old IE result format
8222d8de
JMF
427 ie_result = {
428 '_type': 'compat_list',
429 'entries': ie_result,
430 }
9103bbc5
JMF
431 self.add_extra_info(ie_result,
432 {
433 'extractor': ie.IE_NAME,
be97abc2
JMF
434 'webpage_url': url,
435 'extractor_key': ie.ie_key(),
9103bbc5 436 })
b6c45014 437 return self.process_ie_result(ie_result, download, extra_info)
8222d8de
JMF
438 except ExtractorError as de: # An error we somewhat expected
439 self.report_error(compat_str(de), de.format_traceback())
440 break
441 except Exception as e:
442 if self.params.get('ignoreerrors', False):
443 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
444 break
445 else:
446 raise
447 else:
448 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 449
8222d8de
JMF
450 def process_ie_result(self, ie_result, download=True, extra_info={}):
451 """
452 Take the result of the ie(may be modified) and resolve all unresolved
453 references (URLs, playlist items).
454
455 It will also download the videos if 'download'.
456 Returns the resolved ie_result.
457 """
458
459 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
460 if result_type == 'video':
b6c45014 461 self.add_extra_info(ie_result, extra_info)
feee2ecf 462 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
463 elif result_type == 'url':
464 # We have to add extra_info to the results because it may be
465 # contained in a playlist
466 return self.extract_info(ie_result['url'],
467 download,
468 ie_key=ie_result.get('ie_key'),
469 extra_info=extra_info)
470 elif result_type == 'playlist':
b6c45014 471 self.add_extra_info(ie_result, extra_info)
8222d8de
JMF
472 # We process each entry in the playlist
473 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 474 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
475
476 playlist_results = []
477
478 n_all_entries = len(ie_result['entries'])
479 playliststart = self.params.get('playliststart', 1) - 1
480 playlistend = self.params.get('playlistend', -1)
481
482 if playlistend == -1:
483 entries = ie_result['entries'][playliststart:]
484 else:
485 entries = ie_result['entries'][playliststart:playlistend]
486
487 n_entries = len(entries)
488
489 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
490 (ie_result['extractor'], playlist, n_all_entries, n_entries))
491
fe7e0c98
JMF
492 for i, entry in enumerate(entries, 1):
493 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 494 extra = {
fe7e0c98
JMF
495 'playlist': playlist,
496 'playlist_index': i + playliststart,
b6c45014 497 'extractor': ie_result['extractor'],
9103bbc5 498 'webpage_url': ie_result['webpage_url'],
be97abc2 499 'extractor_key': ie_result['extractor_key'],
fe7e0c98 500 }
8222d8de
JMF
501 entry_result = self.process_ie_result(entry,
502 download=download,
503 extra_info=extra)
504 playlist_results.append(entry_result)
505 ie_result['entries'] = playlist_results
506 return ie_result
507 elif result_type == 'compat_list':
508 def _fixup(r):
b6c45014 509 self.add_extra_info(r,
9103bbc5
JMF
510 {
511 'extractor': ie_result['extractor'],
512 'webpage_url': ie_result['webpage_url'],
be97abc2 513 'extractor_key': ie_result['extractor_key'],
9103bbc5 514 })
8222d8de
JMF
515 return r
516 ie_result['entries'] = [
b6c45014 517 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
518 for r in ie_result['entries']
519 ]
520 return ie_result
521 else:
522 raise Exception('Invalid result type: %s' % result_type)
523
a9c58ad9
JMF
524 def select_format(self, format_spec, available_formats):
525 if format_spec == 'best' or format_spec is None:
526 return available_formats[-1]
527 elif format_spec == 'worst':
528 return available_formats[0]
529 else:
49e86983
JMF
530 extensions = [u'mp4', u'flv', u'webm', u'3gp']
531 if format_spec in extensions:
532 filter_f = lambda f: f['ext'] == format_spec
533 else:
534 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 535 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
536 if matches:
537 return matches[-1]
538 return None
539
dd82ffea
JMF
540 def process_video_result(self, info_dict, download=True):
541 assert info_dict.get('_type', 'video') == 'video'
542
543 if 'playlist' not in info_dict:
544 # It isn't part of a playlist
545 info_dict['playlist'] = None
546 info_dict['playlist_index'] = None
547
6ff000b8 548 # This extractors handle format selection themselves
a7685f3b 549 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
550 if download:
551 self.process_info(info_dict)
6ff000b8
JMF
552 return info_dict
553
dd82ffea
JMF
554 # We now pick which formats have to be downloaded
555 if info_dict.get('formats') is None:
556 # There's only one format available
557 formats = [info_dict]
558 else:
559 formats = info_dict['formats']
560
561 # We check that all the formats have the format and format_id fields
562 for (i, format) in enumerate(formats):
dd82ffea 563 if format.get('format_id') is None:
8016c922 564 format['format_id'] = compat_str(i)
8c51aa65
JMF
565 if format.get('format') is None:
566 format['format'] = u'{id} - {res}{note}'.format(
567 id=format['format_id'],
568 res=self.format_resolution(format),
71934988 569 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 570 )
c1002e96
PH
571 # Automatically determine file extension if missing
572 if 'ext' not in format:
573 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
574
575 if self.params.get('listformats', None):
576 self.list_formats(info_dict)
577 return
578
99e206d5
JMF
579 format_limit = self.params.get('format_limit', None)
580 if format_limit:
f4d96df0
PH
581 formats = list(takewhile_inclusive(
582 lambda f: f['format_id'] != format_limit, formats
583 ))
e028d0d1
JMF
584 if self.params.get('prefer_free_formats'):
585 def _free_formats_key(f):
586 try:
587 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
588 except ValueError:
589 ext_ord = -1
590 # We only compare the extension if they have the same height and width
591 return (f.get('height'), f.get('width'), ext_ord)
592 formats = sorted(formats, key=_free_formats_key)
99e206d5 593
dd82ffea 594 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
595 if req_format is None:
596 req_format = 'best'
dd82ffea 597 formats_to_download = []
dd82ffea 598 # The -1 is for supporting YoutubeIE
a9c58ad9 599 if req_format in ('-1', 'all'):
dd82ffea
JMF
600 formats_to_download = formats
601 else:
a9c58ad9 602 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 603 # the first that is available, starting from left
dd82ffea
JMF
604 req_formats = req_format.split('/')
605 for rf in req_formats:
a9c58ad9
JMF
606 selected_format = self.select_format(rf, formats)
607 if selected_format is not None:
608 formats_to_download = [selected_format]
dd82ffea
JMF
609 break
610 if not formats_to_download:
78a3a9f8
PH
611 raise ExtractorError(u'requested format not available',
612 expected=True)
dd82ffea
JMF
613
614 if download:
615 if len(formats_to_download) > 1:
616 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
617 for format in formats_to_download:
618 new_info = dict(info_dict)
619 new_info.update(format)
620 self.process_info(new_info)
621 # We update the info dict with the best quality format (backwards compatibility)
622 info_dict.update(formats_to_download[-1])
623 return info_dict
624
8222d8de
JMF
625 def process_info(self, info_dict):
626 """Process a single resolved IE result."""
627
628 assert info_dict.get('_type', 'video') == 'video'
629 #We increment the download the download count here to match the previous behaviour.
630 self.increment_downloads()
631
632 info_dict['fulltitle'] = info_dict['title']
633 if len(info_dict['title']) > 200:
634 info_dict['title'] = info_dict['title'][:197] + u'...'
635
636 # Keep for backwards compatibility
637 info_dict['stitle'] = info_dict['title']
638
639 if not 'format' in info_dict:
640 info_dict['format'] = info_dict['ext']
641
642 reason = self._match_entry(info_dict)
643 if reason is not None:
644 self.to_screen(u'[download] ' + reason)
645 return
646
647 max_downloads = self.params.get('max_downloads')
648 if max_downloads is not None:
649 if self._num_downloads > int(max_downloads):
650 raise MaxDownloadsReached()
651
652 filename = self.prepare_filename(info_dict)
653
654 # Forced printings
655 if self.params.get('forcetitle', False):
656 compat_print(info_dict['title'])
657 if self.params.get('forceid', False):
658 compat_print(info_dict['id'])
659 if self.params.get('forceurl', False):
edde6c56
PH
660 # For RTMP URLs, also include the playpath
661 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 662 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
8222d8de 663 compat_print(info_dict['thumbnail'])
216d71d0 664 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
8222d8de
JMF
665 compat_print(info_dict['description'])
666 if self.params.get('forcefilename', False) and filename is not None:
667 compat_print(filename)
668 if self.params.get('forceformat', False):
669 compat_print(info_dict['format'])
9d153818
MF
670 if self.params.get('forcejson', False):
671 compat_print(json.dumps(info_dict))
8222d8de
JMF
672
673 # Do nothing else if in simulate mode
674 if self.params.get('simulate', False):
675 return
676
677 if filename is None:
678 return
679
680 try:
681 dn = os.path.dirname(encodeFilename(filename))
682 if dn != '' and not os.path.exists(dn):
683 os.makedirs(dn)
684 except (OSError, IOError) as err:
685 self.report_error(u'unable to create directory ' + compat_str(err))
686 return
687
688 if self.params.get('writedescription', False):
689 try:
690 descfn = filename + u'.description'
691 self.report_writedescription(descfn)
692 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
693 descfile.write(info_dict['description'])
b3f0e530 694 except (KeyError, TypeError):
535f59bb 695 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
696 except (OSError, IOError):
697 self.report_error(u'Cannot write description file ' + descfn)
698 return
699
1fb07d10
JG
700 if self.params.get('writeannotations', False):
701 try:
fe7e0c98
JMF
702 annofn = filename + u'.annotations.xml'
703 self.report_writeannotations(annofn)
704 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
705 annofile.write(info_dict['annotations'])
1fb07d10
JG
706 except (KeyError, TypeError):
707 self.report_warning(u'There are no annotations to write.')
708 except (OSError, IOError):
fe7e0c98
JMF
709 self.report_error(u'Cannot write annotations file: ' + annofn)
710 return
1fb07d10 711
c4a91be7 712 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 713 self.params.get('writeautomaticsub')])
c4a91be7 714
fe7e0c98 715 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
716 # subtitles download errors are already managed as troubles in relevant IE
717 # that way it will silently go on when used with unsupporting IE
8222d8de 718 subtitles = info_dict['subtitles']
ca715127 719 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
720 for sub_lang in subtitles.keys():
721 sub = subtitles[sub_lang]
6804038d
JMF
722 if sub is None:
723 continue
8222d8de 724 try:
d4051a8e 725 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
726 self.report_writesubtitles(sub_filename)
727 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 728 subfile.write(sub)
8222d8de
JMF
729 except (OSError, IOError):
730 self.report_error(u'Cannot write subtitles file ' + descfn)
731 return
732
8222d8de 733 if self.params.get('writeinfojson', False):
9771cceb 734 infofn = os.path.splitext(filename)[0] + u'.info.json'
8222d8de
JMF
735 self.report_writeinfojson(infofn)
736 try:
fe7e0c98 737 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
738 write_json_file(json_info_dict, encodeFilename(infofn))
739 except (OSError, IOError):
740 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
741 return
742
743 if self.params.get('writethumbnail', False):
d8269e1d 744 if info_dict.get('thumbnail') is not None:
cbdbb766 745 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
746 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
747 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
748 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
749 try:
750 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
751 with open(thumb_filename, 'wb') as thumbf:
752 shutil.copyfileobj(uf, thumbf)
753 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
754 (info_dict['extractor'], info_dict['id'], thumb_filename))
755 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
756 self.report_warning(u'Unable to download thumbnail "%s": %s' %
757 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
758
759 if not self.params.get('skip_download', False):
760 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
761 success = True
762 else:
763 try:
764 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
765 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
766 self.report_error(u'unable to download video data: %s' % str(err))
767 return
c40c6aaa
JMF
768 except (OSError, IOError) as err:
769 raise UnavailableVideoError(err)
8222d8de
JMF
770 except (ContentTooShortError, ) as err:
771 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
772 return
773
774 if success:
775 try:
776 self.post_process(filename, info_dict)
777 except (PostProcessingError) as err:
778 self.report_error(u'postprocessing: %s' % str(err))
779 return
780
c1c9a79c
PH
781 self.record_download_archive(info_dict)
782
8222d8de
JMF
783 def download(self, url_list):
784 """Download a given list of URLs."""
785 if len(url_list) > 1 and self.fixed_template():
786 raise SameFileError(self.params['outtmpl'])
787
788 for url in url_list:
789 try:
790 #It also downloads the videos
dca08720 791 self.extract_info(url)
8222d8de
JMF
792 except UnavailableVideoError:
793 self.report_error(u'unable to download video')
794 except MaxDownloadsReached:
795 self.to_screen(u'[info] Maximum number of downloaded files reached.')
796 raise
797
798 return self._download_retcode
799
800 def post_process(self, filename, ie_info):
801 """Run all the postprocessors on the given file."""
802 info = dict(ie_info)
803 info['filepath'] = filename
804 keep_video = None
805 for pp in self._pps:
806 try:
fe7e0c98 807 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
808 if keep_video_wish is not None:
809 if keep_video_wish:
810 keep_video = keep_video_wish
811 elif keep_video is None:
812 # No clear decision yet, let IE decide
813 keep_video = keep_video_wish
814 except PostProcessingError as e:
bbcbf4d4 815 self.report_error(e.msg)
8222d8de
JMF
816 if keep_video is False and not self.params.get('keepvideo', False):
817 try:
818 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
819 os.remove(encodeFilename(filename))
820 except (IOError, OSError):
821 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c
PH
822
823 def in_download_archive(self, info_dict):
824 fn = self.params.get('download_archive')
825 if fn is None:
826 return False
827 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
828 try:
829 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
830 for line in archive_file:
831 if line.strip() == vid_id:
832 return True
833 except IOError as ioe:
834 if ioe.errno != errno.ENOENT:
835 raise
836 return False
837
838 def record_download_archive(self, info_dict):
839 fn = self.params.get('download_archive')
840 if fn is None:
841 return
842 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
843 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
844 archive_file.write(vid_id + u'\n')
dd82ffea 845
8c51aa65 846 @staticmethod
8abeeb94 847 def format_resolution(format, default='unknown'):
57dd9a8f
PH
848 if format.get('_resolution') is not None:
849 return format['_resolution']
8c51aa65
JMF
850 if format.get('height') is not None:
851 if format.get('width') is not None:
852 res = u'%sx%s' % (format['width'], format['height'])
853 else:
854 res = u'%sp' % format['height']
855 else:
8abeeb94 856 res = default
8c51aa65
JMF
857 return res
858
dd82ffea 859 def list_formats(self, info_dict):
91c7271a
PH
860 def format_note(fdict):
861 if fdict.get('format_note') is not None:
862 return fdict['format_note']
863 res = u''
864 if fdict.get('vcodec') is not None:
7150858d
PH
865 res += u'%-5s' % fdict['vcodec']
866 elif fdict.get('vbr') is not None:
867 res += u'video'
91c7271a
PH
868 if fdict.get('vbr') is not None:
869 res += u'@%4dk' % fdict['vbr']
870 if fdict.get('acodec') is not None:
871 if res:
872 res += u', '
7150858d
PH
873 res += u'%-5s' % fdict['acodec']
874 elif fdict.get('abr') is not None:
875 if res:
876 res += u', '
877 res += 'audio'
91c7271a
PH
878 if fdict.get('abr') is not None:
879 res += u'@%3dk' % fdict['abr']
880 return res
881
57dd9a8f 882 def line(format):
897d6cc4 883 return (u'%-20s%-10s%-12s%s' % (
8c51aa65
JMF
884 format['format_id'],
885 format['ext'],
8c51aa65 886 self.format_resolution(format),
91c7271a 887 format_note(format),
8c51aa65
JMF
888 )
889 )
57dd9a8f 890
94badb25
PH
891 formats = info_dict.get('formats', [info_dict])
892 formats_s = list(map(line, formats))
893 if len(formats) > 1:
b5349e87
PH
894 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
895 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
896
897 header_line = line({
898 'format_id': u'format code', 'ext': u'extension',
899 '_resolution': u'resolution', 'format_note': u'note'})
900 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
901 (info_dict['id'], header_line, u"\n".join(formats_s)))
dca08720
PH
902
903 def urlopen(self, req):
904 """ Start an HTTP download """
905 return self._opener.open(req)
906
907 def print_debug_header(self):
908 if not self.params.get('verbose'):
909 return
910 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
911 try:
912 sp = subprocess.Popen(
913 ['git', 'rev-parse', '--short', 'HEAD'],
914 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
915 cwd=os.path.dirname(os.path.abspath(__file__)))
916 out, err = sp.communicate()
917 out = out.decode().strip()
918 if re.match('[0-9a-f]+', out):
919 write_string(u'[debug] Git HEAD: ' + out + u'\n')
920 except:
921 try:
922 sys.exc_clear()
923 except:
924 pass
925 write_string(u'[debug] Python version %s - %s' %
926 (platform.python_version(), platform_name()) + u'\n')
927
928 proxy_map = {}
929 for handler in self._opener.handlers:
930 if hasattr(handler, 'proxies'):
931 proxy_map.update(handler.proxies)
932 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
933
934 def _setup_opener(self, timeout=300):
935 opts_cookiefile = self.params.get('cookiefile')
936 opts_proxy = self.params.get('proxy')
937
938 if opts_cookiefile is None:
939 self.cookiejar = compat_cookiejar.CookieJar()
940 else:
941 self.cookiejar = compat_cookiejar.MozillaCookieJar(
942 opts_cookiefile)
943 if os.access(opts_cookiefile, os.R_OK):
944 self.cookiejar.load()
945
946 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
947 self.cookiejar)
948 if opts_proxy is not None:
949 if opts_proxy == '':
950 proxies = {}
951 else:
952 proxies = {'http': opts_proxy, 'https': opts_proxy}
953 else:
954 proxies = compat_urllib_request.getproxies()
955 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
956 if 'http' in proxies and 'https' not in proxies:
957 proxies['https'] = proxies['http']
958 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
959 https_handler = make_HTTPS_handler(
960 self.params.get('nocheckcertificate', False))
961 opener = compat_urllib_request.build_opener(
962 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
963 # Delete the default user-agent header, which would otherwise apply in
964 # cases where our custom HTTP handler doesn't come into play
965 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
966 opener.addheaders = []
967 self._opener = opener
968
969 # TODO remove this global modification
970 compat_urllib_request.install_opener(opener)
971 socket.setdefaulttimeout(timeout)