]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[soundcloud] Prefer HTTP over RTMP (#1798)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de 7import io
8694c600 8import json
8222d8de 9import os
dca08720 10import platform
8222d8de
JMF
11import re
12import shutil
dca08720 13import subprocess
8222d8de
JMF
14import socket
15import sys
16import time
17import traceback
18
1e5b9a95
PH
19if os.name == 'nt':
20 import ctypes
21
ce02ed60 22from .utils import (
dca08720 23 compat_cookiejar,
ce02ed60
PH
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
02dbf93f 36 format_bytes,
ce02ed60 37 locked_file,
dca08720 38 make_HTTPS_handler,
ce02ed60
PH
39 MaxDownloadsReached,
40 PostProcessingError,
dca08720 41 platform_name,
ce02ed60
PH
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
dca08720 50 YoutubeDLHandler,
ce02ed60 51)
023fa8c4 52from .extractor import get_info_extractor, gen_extractors
8222d8de 53from .FileDownloader import FileDownloader
dca08720 54from .version import __version__
8222d8de
JMF
55
56
57class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
c6c19746 86 videopassword: Password for acces a video.
8222d8de
JMF
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
8694c600 96 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
8bf9319e 108 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
1fb07d10 112 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
b004821f 115 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 116 allsubtitles: Downloads all the subtitles of the video
0b7f3118 117 (requires writesubtitles or writeautomaticsub)
8222d8de 118 listsubtitles: Lists all available subtitles for the video
b98a6b2f 119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 120 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
c35f9e72 124 cachedir: Location of the cache files in the filesystem.
c3c88a26 125 None to disable filesystem cache.
47192f92 126 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
c1c9a79c
PH
129 downloadarchive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
131 again.
dca08720 132 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8
PH
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
fe7e0c98 135
8222d8de
JMF
136 The following parameters are not used by YoutubeDL itself, they are used by
137 the FileDownloader:
138 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
139 noresizebuffer, retries, continuedl, noprogress, consoletitle
140 """
141
142 params = None
143 _ies = []
144 _pps = []
145 _download_retcode = None
146 _num_downloads = None
147 _screen_file = None
148
149 def __init__(self, params):
150 """Create a FileDownloader object with the given options."""
151 self._ies = []
56c73665 152 self._ies_instances = {}
8222d8de
JMF
153 self._pps = []
154 self._progress_hooks = []
155 self._download_retcode = 0
156 self._num_downloads = 0
157 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
34308b30
PH
158
159 if (sys.version_info >= (3,) and sys.platform != 'win32' and
160 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
161 and not params['restrictfilenames']):
162 # On Python 3, the Unicode filesystem API will throw errors (#1474)
163 self.report_warning(
1d368c75 164 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
165 u'cannot encode all charactes. '
166 u'Set the LC_ALL environment variable to fix this.')
167 params['restrictfilenames'] = True
168
8222d8de
JMF
169 self.params = params
170 self.fd = FileDownloader(self, self.params)
171
172 if '%(stitle)s' in self.params['outtmpl']:
173 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
174
dca08720
PH
175 self._setup_opener()
176
8222d8de
JMF
177 def add_info_extractor(self, ie):
178 """Add an InfoExtractor object to the end of the list."""
179 self._ies.append(ie)
56c73665 180 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
181 ie.set_downloader(self)
182
56c73665
JMF
183 def get_info_extractor(self, ie_key):
184 """
185 Get an instance of an IE with name ie_key, it will try to get one from
186 the _ies list, if there's no instance it will create a new one and add
187 it to the extractor list.
188 """
189 ie = self._ies_instances.get(ie_key)
190 if ie is None:
191 ie = get_info_extractor(ie_key)()
192 self.add_info_extractor(ie)
193 return ie
194
023fa8c4
JMF
195 def add_default_info_extractors(self):
196 """
197 Add the InfoExtractors returned by gen_extractors to the end of the list
198 """
199 for ie in gen_extractors():
200 self.add_info_extractor(ie)
201
8222d8de
JMF
202 def add_post_processor(self, pp):
203 """Add a PostProcessor object to the end of the chain."""
204 self._pps.append(pp)
205 pp.set_downloader(self)
206
207 def to_screen(self, message, skip_eol=False):
208 """Print message to stdout if not in quiet mode."""
8bf9319e 209 if self.params.get('logger'):
43afe285
IB
210 self.params['logger'].debug(message)
211 elif not self.params.get('quiet', False):
8222d8de
JMF
212 terminator = [u'\n', u''][skip_eol]
213 output = message + terminator
7459e3a2 214 write_string(output, self._screen_file)
8222d8de
JMF
215
216 def to_stderr(self, message):
217 """Print message to stderr."""
218 assert type(message) == type(u'')
8bf9319e 219 if self.params.get('logger'):
43afe285
IB
220 self.params['logger'].error(message)
221 else:
222 output = message + u'\n'
223 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
224 output = output.encode(preferredencoding())
225 sys.stderr.write(output)
8222d8de 226
1e5b9a95
PH
227 def to_console_title(self, message):
228 if not self.params.get('consoletitle', False):
229 return
230 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
231 # c_wchar_p() might not be necessary if `message` is
232 # already of type unicode()
233 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
234 elif 'TERM' in os.environ:
749febf4 235 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 236
bdde425c
PH
237 def save_console_title(self):
238 if not self.params.get('consoletitle', False):
239 return
240 if 'TERM' in os.environ:
efd6c574
JMF
241 # Save the title on stack
242 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
243
244 def restore_console_title(self):
245 if not self.params.get('consoletitle', False):
246 return
247 if 'TERM' in os.environ:
efd6c574
JMF
248 # Restore the title from stack
249 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
250
251 def __enter__(self):
252 self.save_console_title()
253 return self
254
255 def __exit__(self, *args):
256 self.restore_console_title()
dca08720
PH
257
258 if self.params.get('cookiefile') is not None:
259 self.cookiejar.save()
bdde425c 260
8222d8de
JMF
261 def fixed_template(self):
262 """Checks if the output template is fixed."""
263 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
264
265 def trouble(self, message=None, tb=None):
266 """Determine action to take when a download problem appears.
267
268 Depending on if the downloader has been configured to ignore
269 download errors or not, this method may throw an exception or
270 not when errors are found, after printing the message.
271
272 tb, if given, is additional traceback information.
273 """
274 if message is not None:
275 self.to_stderr(message)
276 if self.params.get('verbose'):
277 if tb is None:
278 if sys.exc_info()[0]: # if .trouble has been called from an except block
279 tb = u''
280 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
281 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
282 tb += compat_str(traceback.format_exc())
283 else:
284 tb_data = traceback.format_list(traceback.extract_stack())
285 tb = u''.join(tb_data)
286 self.to_stderr(tb)
287 if not self.params.get('ignoreerrors', False):
288 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
289 exc_info = sys.exc_info()[1].exc_info
290 else:
291 exc_info = sys.exc_info()
292 raise DownloadError(message, exc_info)
293 self._download_retcode = 1
294
295 def report_warning(self, message):
296 '''
297 Print the message to stderr, it will be prefixed with 'WARNING:'
298 If stderr is a tty file the 'WARNING:' will be colored
299 '''
300 if sys.stderr.isatty() and os.name != 'nt':
fe7e0c98 301 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 302 else:
fe7e0c98
JMF
303 _msg_header = u'WARNING:'
304 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
305 self.to_stderr(warning_message)
306
307 def report_error(self, message, tb=None):
308 '''
309 Do the same as trouble, but prefixes the message with 'ERROR:', colored
310 in red if stderr is a tty file.
311 '''
312 if sys.stderr.isatty() and os.name != 'nt':
313 _msg_header = u'\033[0;31mERROR:\033[0m'
314 else:
315 _msg_header = u'ERROR:'
316 error_message = u'%s %s' % (_msg_header, message)
317 self.trouble(error_message, tb)
318
8222d8de
JMF
319 def report_writedescription(self, descfn):
320 """ Report that the description file is being written """
321 self.to_screen(u'[info] Writing video description to: ' + descfn)
322
323 def report_writesubtitles(self, sub_filename):
324 """ Report that the subtitles file is being written """
325 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
326
327 def report_writeinfojson(self, infofn):
328 """ Report that the metadata file has been written """
329 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
330
1fb07d10
JG
331 def report_writeannotations(self, annofn):
332 """ Report that the annotations file has been written. """
333 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
334
8222d8de
JMF
335 def report_file_already_downloaded(self, file_name):
336 """Report file has already been fully downloaded."""
337 try:
338 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 339 except UnicodeEncodeError:
8222d8de
JMF
340 self.to_screen(u'[download] The file has already been downloaded')
341
342 def increment_downloads(self):
343 """Increment the ordinal that assigns a number to each file."""
344 self._num_downloads += 1
345
346 def prepare_filename(self, info_dict):
347 """Generate the output filename."""
348 try:
349 template_dict = dict(info_dict)
350
351 template_dict['epoch'] = int(time.time())
352 autonumber_size = self.params.get('autonumber_size')
353 if autonumber_size is None:
354 autonumber_size = 5
355 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
356 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 357 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
358 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
359
586a91b6 360 sanitize = lambda k, v: sanitize_filename(
8222d8de
JMF
361 u'NA' if v is None else compat_str(v),
362 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
363 is_id=(k == u'id'))
364 template_dict = dict((k, sanitize(k, v))
365 for k, v in template_dict.items())
8222d8de 366
586a91b6
PH
367 tmpl = os.path.expanduser(self.params['outtmpl'])
368 filename = tmpl % template_dict
8222d8de
JMF
369 return filename
370 except KeyError as err:
371 self.report_error(u'Erroneous output template')
372 return None
373 except ValueError as err:
4efba05c 374 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
375 return None
376
377 def _match_entry(self, info_dict):
378 """ Returns None iff the file should be downloaded """
379
7012b23c
PH
380 if 'title' in info_dict:
381 # This can happen when we're just evaluating the playlist
382 title = info_dict['title']
383 matchtitle = self.params.get('matchtitle', False)
384 if matchtitle:
385 if not re.search(matchtitle, title, re.IGNORECASE):
386 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
387 rejecttitle = self.params.get('rejecttitle', False)
388 if rejecttitle:
389 if re.search(rejecttitle, title, re.IGNORECASE):
390 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
391 date = info_dict.get('upload_date', None)
392 if date is not None:
393 dateRange = self.params.get('daterange', DateRange())
394 if date not in dateRange:
395 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
396 age_limit = self.params.get('age_limit')
397 if age_limit is not None:
cfadd183 398 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 399 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 400 if self.in_download_archive(info_dict):
7012b23c
PH
401 return (u'%s has already been recorded in archive'
402 % info_dict.get('title', info_dict.get('id', u'video')))
8222d8de 403 return None
fe7e0c98 404
b6c45014
JMF
405 @staticmethod
406 def add_extra_info(info_dict, extra_info):
407 '''Set the keys from extra_info in info dict if they are missing'''
408 for key, value in extra_info.items():
409 info_dict.setdefault(key, value)
410
8222d8de
JMF
411 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
412 '''
413 Returns a list with a dictionary for each video we find.
414 If 'download', also downloads the videos.
415 extra_info is a dict containing the extra values to add to each result
416 '''
fe7e0c98 417
8222d8de 418 if ie_key:
56c73665 419 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
420 else:
421 ies = self._ies
422
423 for ie in ies:
424 if not ie.suitable(url):
425 continue
426
427 if not ie.working():
428 self.report_warning(u'The program functionality for this site has been marked as broken, '
429 u'and will probably not work.')
430
431 try:
432 ie_result = ie.extract(url)
433 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
434 break
435 if isinstance(ie_result, list):
436 # Backwards compatibility: old IE result format
8222d8de
JMF
437 ie_result = {
438 '_type': 'compat_list',
439 'entries': ie_result,
440 }
9103bbc5
JMF
441 self.add_extra_info(ie_result,
442 {
443 'extractor': ie.IE_NAME,
be97abc2
JMF
444 'webpage_url': url,
445 'extractor_key': ie.ie_key(),
9103bbc5 446 })
b6c45014 447 return self.process_ie_result(ie_result, download, extra_info)
8222d8de
JMF
448 except ExtractorError as de: # An error we somewhat expected
449 self.report_error(compat_str(de), de.format_traceback())
450 break
451 except Exception as e:
452 if self.params.get('ignoreerrors', False):
453 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
454 break
455 else:
456 raise
457 else:
458 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 459
8222d8de
JMF
460 def process_ie_result(self, ie_result, download=True, extra_info={}):
461 """
462 Take the result of the ie(may be modified) and resolve all unresolved
463 references (URLs, playlist items).
464
465 It will also download the videos if 'download'.
466 Returns the resolved ie_result.
467 """
468
469 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
470 if result_type == 'video':
b6c45014 471 self.add_extra_info(ie_result, extra_info)
feee2ecf 472 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
473 elif result_type == 'url':
474 # We have to add extra_info to the results because it may be
475 # contained in a playlist
476 return self.extract_info(ie_result['url'],
477 download,
478 ie_key=ie_result.get('ie_key'),
479 extra_info=extra_info)
480 elif result_type == 'playlist':
7012b23c 481
8222d8de
JMF
482 # We process each entry in the playlist
483 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 484 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
485
486 playlist_results = []
487
488 n_all_entries = len(ie_result['entries'])
489 playliststart = self.params.get('playliststart', 1) - 1
490 playlistend = self.params.get('playlistend', -1)
491
492 if playlistend == -1:
493 entries = ie_result['entries'][playliststart:]
494 else:
495 entries = ie_result['entries'][playliststart:playlistend]
496
497 n_entries = len(entries)
498
499 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
500 (ie_result['extractor'], playlist, n_all_entries, n_entries))
501
fe7e0c98
JMF
502 for i, entry in enumerate(entries, 1):
503 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 504 extra = {
fe7e0c98
JMF
505 'playlist': playlist,
506 'playlist_index': i + playliststart,
b6c45014 507 'extractor': ie_result['extractor'],
9103bbc5 508 'webpage_url': ie_result['webpage_url'],
be97abc2 509 'extractor_key': ie_result['extractor_key'],
fe7e0c98 510 }
7012b23c
PH
511
512 reason = self._match_entry(entry)
513 if reason is not None:
514 self.to_screen(u'[download] ' + reason)
515 continue
516
8222d8de
JMF
517 entry_result = self.process_ie_result(entry,
518 download=download,
519 extra_info=extra)
520 playlist_results.append(entry_result)
521 ie_result['entries'] = playlist_results
522 return ie_result
523 elif result_type == 'compat_list':
524 def _fixup(r):
b6c45014 525 self.add_extra_info(r,
9103bbc5
JMF
526 {
527 'extractor': ie_result['extractor'],
528 'webpage_url': ie_result['webpage_url'],
be97abc2 529 'extractor_key': ie_result['extractor_key'],
9103bbc5 530 })
8222d8de
JMF
531 return r
532 ie_result['entries'] = [
b6c45014 533 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
534 for r in ie_result['entries']
535 ]
536 return ie_result
537 else:
538 raise Exception('Invalid result type: %s' % result_type)
539
a9c58ad9
JMF
540 def select_format(self, format_spec, available_formats):
541 if format_spec == 'best' or format_spec is None:
542 return available_formats[-1]
543 elif format_spec == 'worst':
544 return available_formats[0]
545 else:
49e86983
JMF
546 extensions = [u'mp4', u'flv', u'webm', u'3gp']
547 if format_spec in extensions:
548 filter_f = lambda f: f['ext'] == format_spec
549 else:
550 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 551 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
552 if matches:
553 return matches[-1]
554 return None
555
dd82ffea
JMF
556 def process_video_result(self, info_dict, download=True):
557 assert info_dict.get('_type', 'video') == 'video'
558
559 if 'playlist' not in info_dict:
560 # It isn't part of a playlist
561 info_dict['playlist'] = None
562 info_dict['playlist_index'] = None
563
6ff000b8 564 # This extractors handle format selection themselves
a7685f3b 565 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
566 if download:
567 self.process_info(info_dict)
6ff000b8
JMF
568 return info_dict
569
dd82ffea
JMF
570 # We now pick which formats have to be downloaded
571 if info_dict.get('formats') is None:
572 # There's only one format available
573 formats = [info_dict]
574 else:
575 formats = info_dict['formats']
576
577 # We check that all the formats have the format and format_id fields
578 for (i, format) in enumerate(formats):
dd82ffea 579 if format.get('format_id') is None:
8016c922 580 format['format_id'] = compat_str(i)
8c51aa65
JMF
581 if format.get('format') is None:
582 format['format'] = u'{id} - {res}{note}'.format(
583 id=format['format_id'],
584 res=self.format_resolution(format),
71934988 585 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 586 )
c1002e96
PH
587 # Automatically determine file extension if missing
588 if 'ext' not in format:
589 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
590
591 if self.params.get('listformats', None):
592 self.list_formats(info_dict)
593 return
594
99e206d5
JMF
595 format_limit = self.params.get('format_limit', None)
596 if format_limit:
f4d96df0
PH
597 formats = list(takewhile_inclusive(
598 lambda f: f['format_id'] != format_limit, formats
599 ))
e028d0d1
JMF
600 if self.params.get('prefer_free_formats'):
601 def _free_formats_key(f):
602 try:
603 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
604 except ValueError:
605 ext_ord = -1
606 # We only compare the extension if they have the same height and width
607 return (f.get('height'), f.get('width'), ext_ord)
608 formats = sorted(formats, key=_free_formats_key)
99e206d5 609
dd82ffea 610 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
611 if req_format is None:
612 req_format = 'best'
dd82ffea 613 formats_to_download = []
dd82ffea 614 # The -1 is for supporting YoutubeIE
a9c58ad9 615 if req_format in ('-1', 'all'):
dd82ffea
JMF
616 formats_to_download = formats
617 else:
a9c58ad9 618 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 619 # the first that is available, starting from left
dd82ffea
JMF
620 req_formats = req_format.split('/')
621 for rf in req_formats:
a9c58ad9
JMF
622 selected_format = self.select_format(rf, formats)
623 if selected_format is not None:
624 formats_to_download = [selected_format]
dd82ffea
JMF
625 break
626 if not formats_to_download:
78a3a9f8
PH
627 raise ExtractorError(u'requested format not available',
628 expected=True)
dd82ffea
JMF
629
630 if download:
631 if len(formats_to_download) > 1:
632 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
633 for format in formats_to_download:
634 new_info = dict(info_dict)
635 new_info.update(format)
636 self.process_info(new_info)
637 # We update the info dict with the best quality format (backwards compatibility)
638 info_dict.update(formats_to_download[-1])
639 return info_dict
640
8222d8de
JMF
641 def process_info(self, info_dict):
642 """Process a single resolved IE result."""
643
644 assert info_dict.get('_type', 'video') == 'video'
645 #We increment the download the download count here to match the previous behaviour.
646 self.increment_downloads()
647
648 info_dict['fulltitle'] = info_dict['title']
649 if len(info_dict['title']) > 200:
650 info_dict['title'] = info_dict['title'][:197] + u'...'
651
652 # Keep for backwards compatibility
653 info_dict['stitle'] = info_dict['title']
654
655 if not 'format' in info_dict:
656 info_dict['format'] = info_dict['ext']
657
658 reason = self._match_entry(info_dict)
659 if reason is not None:
660 self.to_screen(u'[download] ' + reason)
661 return
662
663 max_downloads = self.params.get('max_downloads')
664 if max_downloads is not None:
665 if self._num_downloads > int(max_downloads):
666 raise MaxDownloadsReached()
667
668 filename = self.prepare_filename(info_dict)
669
670 # Forced printings
671 if self.params.get('forcetitle', False):
00ea0f11 672 compat_print(info_dict['fulltitle'])
8222d8de
JMF
673 if self.params.get('forceid', False):
674 compat_print(info_dict['id'])
675 if self.params.get('forceurl', False):
edde6c56
PH
676 # For RTMP URLs, also include the playpath
677 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 678 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
8222d8de 679 compat_print(info_dict['thumbnail'])
216d71d0 680 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
8222d8de
JMF
681 compat_print(info_dict['description'])
682 if self.params.get('forcefilename', False) and filename is not None:
683 compat_print(filename)
684 if self.params.get('forceformat', False):
685 compat_print(info_dict['format'])
9d153818
MF
686 if self.params.get('forcejson', False):
687 compat_print(json.dumps(info_dict))
8222d8de
JMF
688
689 # Do nothing else if in simulate mode
690 if self.params.get('simulate', False):
691 return
692
693 if filename is None:
694 return
695
696 try:
697 dn = os.path.dirname(encodeFilename(filename))
698 if dn != '' and not os.path.exists(dn):
699 os.makedirs(dn)
700 except (OSError, IOError) as err:
701 self.report_error(u'unable to create directory ' + compat_str(err))
702 return
703
704 if self.params.get('writedescription', False):
705 try:
706 descfn = filename + u'.description'
707 self.report_writedescription(descfn)
708 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
709 descfile.write(info_dict['description'])
b3f0e530 710 except (KeyError, TypeError):
535f59bb 711 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
712 except (OSError, IOError):
713 self.report_error(u'Cannot write description file ' + descfn)
714 return
715
1fb07d10
JG
716 if self.params.get('writeannotations', False):
717 try:
fe7e0c98
JMF
718 annofn = filename + u'.annotations.xml'
719 self.report_writeannotations(annofn)
720 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
721 annofile.write(info_dict['annotations'])
1fb07d10
JG
722 except (KeyError, TypeError):
723 self.report_warning(u'There are no annotations to write.')
724 except (OSError, IOError):
fe7e0c98
JMF
725 self.report_error(u'Cannot write annotations file: ' + annofn)
726 return
1fb07d10 727
c4a91be7 728 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 729 self.params.get('writeautomaticsub')])
c4a91be7 730
fe7e0c98 731 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
732 # subtitles download errors are already managed as troubles in relevant IE
733 # that way it will silently go on when used with unsupporting IE
8222d8de 734 subtitles = info_dict['subtitles']
ca715127 735 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
736 for sub_lang in subtitles.keys():
737 sub = subtitles[sub_lang]
6804038d
JMF
738 if sub is None:
739 continue
8222d8de 740 try:
d4051a8e 741 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
742 self.report_writesubtitles(sub_filename)
743 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 744 subfile.write(sub)
8222d8de
JMF
745 except (OSError, IOError):
746 self.report_error(u'Cannot write subtitles file ' + descfn)
747 return
748
8222d8de 749 if self.params.get('writeinfojson', False):
9771cceb 750 infofn = os.path.splitext(filename)[0] + u'.info.json'
8222d8de
JMF
751 self.report_writeinfojson(infofn)
752 try:
fe7e0c98 753 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
754 write_json_file(json_info_dict, encodeFilename(infofn))
755 except (OSError, IOError):
756 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
757 return
758
759 if self.params.get('writethumbnail', False):
d8269e1d 760 if info_dict.get('thumbnail') is not None:
cbdbb766 761 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
762 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
763 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
764 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
765 try:
766 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
767 with open(thumb_filename, 'wb') as thumbf:
768 shutil.copyfileobj(uf, thumbf)
769 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
770 (info_dict['extractor'], info_dict['id'], thumb_filename))
771 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
772 self.report_warning(u'Unable to download thumbnail "%s": %s' %
773 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
774
775 if not self.params.get('skip_download', False):
776 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
777 success = True
778 else:
779 try:
780 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
781 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
782 self.report_error(u'unable to download video data: %s' % str(err))
783 return
c40c6aaa
JMF
784 except (OSError, IOError) as err:
785 raise UnavailableVideoError(err)
8222d8de
JMF
786 except (ContentTooShortError, ) as err:
787 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
788 return
789
790 if success:
791 try:
792 self.post_process(filename, info_dict)
793 except (PostProcessingError) as err:
794 self.report_error(u'postprocessing: %s' % str(err))
795 return
796
c1c9a79c
PH
797 self.record_download_archive(info_dict)
798
8222d8de
JMF
799 def download(self, url_list):
800 """Download a given list of URLs."""
801 if len(url_list) > 1 and self.fixed_template():
802 raise SameFileError(self.params['outtmpl'])
803
804 for url in url_list:
805 try:
806 #It also downloads the videos
dca08720 807 self.extract_info(url)
8222d8de
JMF
808 except UnavailableVideoError:
809 self.report_error(u'unable to download video')
810 except MaxDownloadsReached:
811 self.to_screen(u'[info] Maximum number of downloaded files reached.')
812 raise
813
814 return self._download_retcode
815
816 def post_process(self, filename, ie_info):
817 """Run all the postprocessors on the given file."""
818 info = dict(ie_info)
819 info['filepath'] = filename
820 keep_video = None
821 for pp in self._pps:
822 try:
fe7e0c98 823 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
824 if keep_video_wish is not None:
825 if keep_video_wish:
826 keep_video = keep_video_wish
827 elif keep_video is None:
828 # No clear decision yet, let IE decide
829 keep_video = keep_video_wish
830 except PostProcessingError as e:
bbcbf4d4 831 self.report_error(e.msg)
8222d8de
JMF
832 if keep_video is False and not self.params.get('keepvideo', False):
833 try:
834 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
835 os.remove(encodeFilename(filename))
836 except (IOError, OSError):
837 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c 838
5db07df6
PH
839 def _make_archive_id(self, info_dict):
840 # Future-proof against any change in case
841 # and backwards compatibility with prior versions
842 extractor = info_dict.get('extractor')
7012b23c
PH
843 if extractor is None:
844 if 'id' in info_dict:
845 extractor = info_dict.get('ie_key') # key in a playlist
846 if extractor is None:
5db07df6
PH
847 return None # Incomplete video information
848 return extractor.lower() + u' ' + info_dict['id']
849
850 def in_download_archive(self, info_dict):
851 fn = self.params.get('download_archive')
852 if fn is None:
853 return False
854
855 vid_id = self._make_archive_id(info_dict)
856 if vid_id is None:
7012b23c 857 return False # Incomplete video information
5db07df6 858
c1c9a79c
PH
859 try:
860 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
861 for line in archive_file:
862 if line.strip() == vid_id:
863 return True
864 except IOError as ioe:
865 if ioe.errno != errno.ENOENT:
866 raise
867 return False
868
869 def record_download_archive(self, info_dict):
870 fn = self.params.get('download_archive')
871 if fn is None:
872 return
5db07df6
PH
873 vid_id = self._make_archive_id(info_dict)
874 assert vid_id
c1c9a79c
PH
875 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
876 archive_file.write(vid_id + u'\n')
dd82ffea 877
8c51aa65 878 @staticmethod
8abeeb94 879 def format_resolution(format, default='unknown'):
57dd9a8f
PH
880 if format.get('_resolution') is not None:
881 return format['_resolution']
8c51aa65
JMF
882 if format.get('height') is not None:
883 if format.get('width') is not None:
884 res = u'%sx%s' % (format['width'], format['height'])
885 else:
886 res = u'%sp' % format['height']
887 else:
8abeeb94 888 res = default
8c51aa65
JMF
889 return res
890
dd82ffea 891 def list_formats(self, info_dict):
91c7271a 892 def format_note(fdict):
91c7271a 893 res = u''
02dbf93f
PH
894 if fdict.get('format_note') is not None:
895 res += fdict['format_note'] + u' '
91c7271a 896 if fdict.get('vcodec') is not None:
7150858d
PH
897 res += u'%-5s' % fdict['vcodec']
898 elif fdict.get('vbr') is not None:
899 res += u'video'
91c7271a
PH
900 if fdict.get('vbr') is not None:
901 res += u'@%4dk' % fdict['vbr']
902 if fdict.get('acodec') is not None:
903 if res:
904 res += u', '
7150858d
PH
905 res += u'%-5s' % fdict['acodec']
906 elif fdict.get('abr') is not None:
907 if res:
908 res += u', '
909 res += 'audio'
91c7271a
PH
910 if fdict.get('abr') is not None:
911 res += u'@%3dk' % fdict['abr']
02dbf93f
PH
912 if fdict.get('filesize') is not None:
913 if res:
914 res += u', '
915 res += format_bytes(fdict['filesize'])
91c7271a
PH
916 return res
917
02dbf93f
PH
918 def line(format, idlen=20):
919 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
8c51aa65
JMF
920 format['format_id'],
921 format['ext'],
8c51aa65 922 self.format_resolution(format),
91c7271a 923 format_note(format),
02dbf93f 924 ))
57dd9a8f 925
94badb25 926 formats = info_dict.get('formats', [info_dict])
02dbf93f
PH
927 idlen = max(len(u'format code'),
928 max(len(f['format_id']) for f in formats))
929 formats_s = [line(f, idlen) for f in formats]
94badb25 930 if len(formats) > 1:
b5349e87
PH
931 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
932 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
933
934 header_line = line({
935 'format_id': u'format code', 'ext': u'extension',
02dbf93f 936 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
57dd9a8f
PH
937 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
938 (info_dict['id'], header_line, u"\n".join(formats_s)))
dca08720
PH
939
940 def urlopen(self, req):
941 """ Start an HTTP download """
942 return self._opener.open(req)
943
944 def print_debug_header(self):
945 if not self.params.get('verbose'):
946 return
947 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
948 try:
949 sp = subprocess.Popen(
950 ['git', 'rev-parse', '--short', 'HEAD'],
951 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
952 cwd=os.path.dirname(os.path.abspath(__file__)))
953 out, err = sp.communicate()
954 out = out.decode().strip()
955 if re.match('[0-9a-f]+', out):
956 write_string(u'[debug] Git HEAD: ' + out + u'\n')
957 except:
958 try:
959 sys.exc_clear()
960 except:
961 pass
962 write_string(u'[debug] Python version %s - %s' %
963 (platform.python_version(), platform_name()) + u'\n')
964
965 proxy_map = {}
966 for handler in self._opener.handlers:
967 if hasattr(handler, 'proxies'):
968 proxy_map.update(handler.proxies)
969 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
970
d46cc192 971 def _setup_opener(self, timeout=20):
dca08720
PH
972 opts_cookiefile = self.params.get('cookiefile')
973 opts_proxy = self.params.get('proxy')
974
975 if opts_cookiefile is None:
976 self.cookiejar = compat_cookiejar.CookieJar()
977 else:
978 self.cookiejar = compat_cookiejar.MozillaCookieJar(
979 opts_cookiefile)
980 if os.access(opts_cookiefile, os.R_OK):
981 self.cookiejar.load()
982
983 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
984 self.cookiejar)
985 if opts_proxy is not None:
986 if opts_proxy == '':
987 proxies = {}
988 else:
989 proxies = {'http': opts_proxy, 'https': opts_proxy}
990 else:
991 proxies = compat_urllib_request.getproxies()
992 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
993 if 'http' in proxies and 'https' not in proxies:
994 proxies['https'] = proxies['http']
995 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
996 https_handler = make_HTTPS_handler(
997 self.params.get('nocheckcertificate', False))
998 opener = compat_urllib_request.build_opener(
999 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1000 # Delete the default user-agent header, which would otherwise apply in
1001 # cases where our custom HTTP handler doesn't come into play
1002 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1003 opener.addheaders = []
1004 self._opener = opener
1005
1006 # TODO remove this global modification
1007 compat_urllib_request.install_opener(opener)
1008 socket.setdefaulttimeout(timeout)