]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Incorrect variable is used to check whether thumbnail exists
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import collections
7 import errno
8 import io
9 import json
10 import os
11 import platform
12 import re
13 import shutil
14 import subprocess
15 import socket
16 import sys
17 import time
18 import traceback
19
20 if os.name == 'nt':
21 import ctypes
22
23 from .utils import (
24 compat_cookiejar,
25 compat_http_client,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 formatSeconds,
38 get_term_width,
39 locked_file,
40 make_HTTPS_handler,
41 MaxDownloadsReached,
42 PostProcessingError,
43 platform_name,
44 preferredencoding,
45 SameFileError,
46 sanitize_filename,
47 subtitles_filename,
48 takewhile_inclusive,
49 UnavailableVideoError,
50 write_json_file,
51 write_string,
52 YoutubeDLHandler,
53 )
54 from .extractor import get_info_extractor, gen_extractors
55 from .FileDownloader import FileDownloader
56 from .version import __version__
57
58
59 class YoutubeDL(object):
60 """YoutubeDL class.
61
62 YoutubeDL objects are the ones responsible of downloading the
63 actual video file and writing it to disk if the user has requested
64 it, among some other tasks. In most cases there should be one per
65 program. As, given a video URL, the downloader doesn't know how to
66 extract all the needed information, task that InfoExtractors do, it
67 has to pass the URL to one of them.
68
69 For this, YoutubeDL objects have a method that allows
70 InfoExtractors to be registered in a given order. When it is passed
71 a URL, the YoutubeDL object handles it to the first InfoExtractor it
72 finds that reports being able to handle it. The InfoExtractor extracts
73 all the information about the video or videos the URL refers to, and
74 YoutubeDL process the extracted information, possibly using a File
75 Downloader to download the video.
76
77 YoutubeDL objects accept a lot of parameters. In order not to saturate
78 the object constructor with arguments, it receives a dictionary of
79 options instead. These options are available through the params
80 attribute for the InfoExtractors to use. The YoutubeDL also
81 registers itself as the downloader in charge for the InfoExtractors
82 that are added to it, so this is a "mutual registration".
83
84 Available options:
85
86 username: Username for authentication purposes.
87 password: Password for authentication purposes.
88 videopassword: Password for acces a video.
89 usenetrc: Use netrc for authentication instead.
90 verbose: Print additional info to stdout.
91 quiet: Do not print messages to stdout.
92 forceurl: Force printing final URL.
93 forcetitle: Force printing title.
94 forceid: Force printing ID.
95 forcethumbnail: Force printing thumbnail URL.
96 forcedescription: Force printing description.
97 forcefilename: Force printing final filename.
98 forceduration: Force printing duration.
99 forcejson: Force printing info_dict as JSON.
100 simulate: Do not download the video files.
101 format: Video format code.
102 format_limit: Highest quality format to try.
103 outtmpl: Template for output names.
104 restrictfilenames: Do not allow "&" and spaces in file names
105 ignoreerrors: Do not stop on download errors.
106 nooverwrites: Prevent overwriting files.
107 playliststart: Playlist item to start at.
108 playlistend: Playlist item to end at.
109 matchtitle: Download only matching titles.
110 rejecttitle: Reject downloads for matching titles.
111 logger: Log messages to a logging.Logger instance.
112 logtostderr: Log messages to stderr instead of stdout.
113 writedescription: Write the video description to a .description file
114 writeinfojson: Write the video description to a .info.json file
115 writeannotations: Write the video annotations to a .annotations.xml file
116 writethumbnail: Write the thumbnail image to a file
117 writesubtitles: Write the video subtitles to a file
118 writeautomaticsub: Write the automatic subtitles to a file
119 allsubtitles: Downloads all the subtitles of the video
120 (requires writesubtitles or writeautomaticsub)
121 listsubtitles: Lists all available subtitles for the video
122 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
123 subtitleslangs: List of languages of the subtitles to download
124 keepvideo: Keep the video file after post-processing
125 daterange: A DateRange object, download only if the upload_date is in the range.
126 skip_download: Skip the actual download of the video file
127 cachedir: Location of the cache files in the filesystem.
128 None to disable filesystem cache.
129 noplaylist: Download single video instead of a playlist if in doubt.
130 age_limit: An integer representing the user's age in years.
131 Unsuitable videos for the given age are skipped.
132 min_views: An integer representing the minimum view count the video
133 must have in order to not be skipped.
134 Videos without view count information are always
135 downloaded. None for no limit.
136 max_views: An integer representing the maximum view count.
137 Videos that are more popular than that are not
138 downloaded.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 download_archive: File name of a file where all downloads are recorded.
142 Videos already present in the file are not downloaded
143 again.
144 cookiefile: File name where cookies should be read from and dumped to.
145 nocheckcertificate:Do not verify SSL certificates
146 proxy: URL of the proxy server to use
147 socket_timeout: Time to wait for unresponsive hosts, in seconds
148 bidi_workaround: Work around buggy terminals without bidirectional text
149 support, using fridibi
150
151 The following parameters are not used by YoutubeDL itself, they are used by
152 the FileDownloader:
153 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
154 noresizebuffer, retries, continuedl, noprogress, consoletitle
155 """
156
157 params = None
158 _ies = []
159 _pps = []
160 _download_retcode = None
161 _num_downloads = None
162 _screen_file = None
163
164 def __init__(self, params=None):
165 """Create a FileDownloader object with the given options."""
166 self._ies = []
167 self._ies_instances = {}
168 self._pps = []
169 self._progress_hooks = []
170 self._download_retcode = 0
171 self._num_downloads = 0
172 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
173 self._err_file = sys.stderr
174 self.params = {} if params is None else params
175
176 if params.get('bidi_workaround', False):
177 try:
178 import pty
179 master, slave = pty.openpty()
180 width = get_term_width()
181 if width is None:
182 width_args = []
183 else:
184 width_args = ['-w', str(width)]
185 self._fribidi = subprocess.Popen(
186 ['fribidi', '-c', 'UTF-8'] + width_args,
187 stdin=subprocess.PIPE,
188 stdout=slave,
189 stderr=self._err_file)
190 self._fribidi_channel = os.fdopen(master, 'rb')
191 except OSError as ose:
192 if ose.errno == 2:
193 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
194 else:
195 raise
196
197 if (sys.version_info >= (3,) and sys.platform != 'win32' and
198 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
199 and not params['restrictfilenames']):
200 # On Python 3, the Unicode filesystem API will throw errors (#1474)
201 self.report_warning(
202 u'Assuming --restrict-filenames since file system encoding '
203 u'cannot encode all charactes. '
204 u'Set the LC_ALL environment variable to fix this.')
205 self.params['restrictfilenames'] = True
206
207 self.fd = FileDownloader(self, self.params)
208
209 if '%(stitle)s' in self.params.get('outtmpl', ''):
210 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
211
212 self._setup_opener()
213
214 def add_info_extractor(self, ie):
215 """Add an InfoExtractor object to the end of the list."""
216 self._ies.append(ie)
217 self._ies_instances[ie.ie_key()] = ie
218 ie.set_downloader(self)
219
220 def get_info_extractor(self, ie_key):
221 """
222 Get an instance of an IE with name ie_key, it will try to get one from
223 the _ies list, if there's no instance it will create a new one and add
224 it to the extractor list.
225 """
226 ie = self._ies_instances.get(ie_key)
227 if ie is None:
228 ie = get_info_extractor(ie_key)()
229 self.add_info_extractor(ie)
230 return ie
231
232 def add_default_info_extractors(self):
233 """
234 Add the InfoExtractors returned by gen_extractors to the end of the list
235 """
236 for ie in gen_extractors():
237 self.add_info_extractor(ie)
238
239 def add_post_processor(self, pp):
240 """Add a PostProcessor object to the end of the chain."""
241 self._pps.append(pp)
242 pp.set_downloader(self)
243
244 def _bidi_workaround(self, message):
245 if not hasattr(self, '_fribidi_channel'):
246 return message
247
248 assert type(message) == type(u'')
249 line_count = message.count(u'\n') + 1
250 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
251 self._fribidi.stdin.flush()
252 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
253 for _ in range(line_count))
254 return res[:-len(u'\n')]
255
256 def to_screen(self, message, skip_eol=False):
257 """Print message to stdout if not in quiet mode."""
258 return self.to_stdout(message, skip_eol, check_quiet=True)
259
260 def to_stdout(self, message, skip_eol=False, check_quiet=False):
261 """Print message to stdout if not in quiet mode."""
262 if self.params.get('logger'):
263 self.params['logger'].debug(message)
264 elif not check_quiet or not self.params.get('quiet', False):
265 message = self._bidi_workaround(message)
266 terminator = [u'\n', u''][skip_eol]
267 output = message + terminator
268
269 write_string(output, self._screen_file)
270
271 def to_stderr(self, message):
272 """Print message to stderr."""
273 assert type(message) == type(u'')
274 if self.params.get('logger'):
275 self.params['logger'].error(message)
276 else:
277 message = self._bidi_workaround(message)
278 output = message + u'\n'
279 write_string(output, self._err_file)
280
281 def to_console_title(self, message):
282 if not self.params.get('consoletitle', False):
283 return
284 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
285 # c_wchar_p() might not be necessary if `message` is
286 # already of type unicode()
287 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
288 elif 'TERM' in os.environ:
289 write_string(u'\033]0;%s\007' % message, self._screen_file)
290
291 def save_console_title(self):
292 if not self.params.get('consoletitle', False):
293 return
294 if 'TERM' in os.environ:
295 # Save the title on stack
296 write_string(u'\033[22;0t', self._screen_file)
297
298 def restore_console_title(self):
299 if not self.params.get('consoletitle', False):
300 return
301 if 'TERM' in os.environ:
302 # Restore the title from stack
303 write_string(u'\033[23;0t', self._screen_file)
304
305 def __enter__(self):
306 self.save_console_title()
307 return self
308
309 def __exit__(self, *args):
310 self.restore_console_title()
311
312 if self.params.get('cookiefile') is not None:
313 self.cookiejar.save()
314
315 def trouble(self, message=None, tb=None):
316 """Determine action to take when a download problem appears.
317
318 Depending on if the downloader has been configured to ignore
319 download errors or not, this method may throw an exception or
320 not when errors are found, after printing the message.
321
322 tb, if given, is additional traceback information.
323 """
324 if message is not None:
325 self.to_stderr(message)
326 if self.params.get('verbose'):
327 if tb is None:
328 if sys.exc_info()[0]: # if .trouble has been called from an except block
329 tb = u''
330 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
331 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
332 tb += compat_str(traceback.format_exc())
333 else:
334 tb_data = traceback.format_list(traceback.extract_stack())
335 tb = u''.join(tb_data)
336 self.to_stderr(tb)
337 if not self.params.get('ignoreerrors', False):
338 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
339 exc_info = sys.exc_info()[1].exc_info
340 else:
341 exc_info = sys.exc_info()
342 raise DownloadError(message, exc_info)
343 self._download_retcode = 1
344
345 def report_warning(self, message):
346 '''
347 Print the message to stderr, it will be prefixed with 'WARNING:'
348 If stderr is a tty file the 'WARNING:' will be colored
349 '''
350 if self._err_file.isatty() and os.name != 'nt':
351 _msg_header = u'\033[0;33mWARNING:\033[0m'
352 else:
353 _msg_header = u'WARNING:'
354 warning_message = u'%s %s' % (_msg_header, message)
355 self.to_stderr(warning_message)
356
357 def report_error(self, message, tb=None):
358 '''
359 Do the same as trouble, but prefixes the message with 'ERROR:', colored
360 in red if stderr is a tty file.
361 '''
362 if self._err_file.isatty() and os.name != 'nt':
363 _msg_header = u'\033[0;31mERROR:\033[0m'
364 else:
365 _msg_header = u'ERROR:'
366 error_message = u'%s %s' % (_msg_header, message)
367 self.trouble(error_message, tb)
368
369 def report_file_already_downloaded(self, file_name):
370 """Report file has already been fully downloaded."""
371 try:
372 self.to_screen(u'[download] %s has already been downloaded' % file_name)
373 except UnicodeEncodeError:
374 self.to_screen(u'[download] The file has already been downloaded')
375
376 def increment_downloads(self):
377 """Increment the ordinal that assigns a number to each file."""
378 self._num_downloads += 1
379
380 def prepare_filename(self, info_dict):
381 """Generate the output filename."""
382 try:
383 template_dict = dict(info_dict)
384
385 template_dict['epoch'] = int(time.time())
386 autonumber_size = self.params.get('autonumber_size')
387 if autonumber_size is None:
388 autonumber_size = 5
389 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
390 template_dict['autonumber'] = autonumber_templ % self._num_downloads
391 if template_dict.get('playlist_index') is not None:
392 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
393
394 sanitize = lambda k, v: sanitize_filename(
395 compat_str(v),
396 restricted=self.params.get('restrictfilenames'),
397 is_id=(k == u'id'))
398 template_dict = dict((k, sanitize(k, v))
399 for k, v in template_dict.items()
400 if v is not None)
401 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
402
403 tmpl = os.path.expanduser(self.params['outtmpl'])
404 filename = tmpl % template_dict
405 return filename
406 except ValueError as err:
407 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
408 return None
409
410 def _match_entry(self, info_dict):
411 """ Returns None iff the file should be downloaded """
412
413 video_title = info_dict.get('title', info_dict.get('id', u'video'))
414 if 'title' in info_dict:
415 # This can happen when we're just evaluating the playlist
416 title = info_dict['title']
417 matchtitle = self.params.get('matchtitle', False)
418 if matchtitle:
419 if not re.search(matchtitle, title, re.IGNORECASE):
420 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
421 rejecttitle = self.params.get('rejecttitle', False)
422 if rejecttitle:
423 if re.search(rejecttitle, title, re.IGNORECASE):
424 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
425 date = info_dict.get('upload_date', None)
426 if date is not None:
427 dateRange = self.params.get('daterange', DateRange())
428 if date not in dateRange:
429 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
430 view_count = info_dict.get('view_count', None)
431 if view_count is not None:
432 min_views = self.params.get('min_views')
433 if min_views is not None and view_count < min_views:
434 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
435 max_views = self.params.get('max_views')
436 if max_views is not None and view_count > max_views:
437 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
438 age_limit = self.params.get('age_limit')
439 if age_limit is not None:
440 if age_limit < info_dict.get('age_limit', 0):
441 return u'Skipping "' + title + '" because it is age restricted'
442 if self.in_download_archive(info_dict):
443 return u'%s has already been recorded in archive' % video_title
444 return None
445
446 @staticmethod
447 def add_extra_info(info_dict, extra_info):
448 '''Set the keys from extra_info in info dict if they are missing'''
449 for key, value in extra_info.items():
450 info_dict.setdefault(key, value)
451
452 def extract_info(self, url, download=True, ie_key=None, extra_info={},
453 process=True):
454 '''
455 Returns a list with a dictionary for each video we find.
456 If 'download', also downloads the videos.
457 extra_info is a dict containing the extra values to add to each result
458 '''
459
460 if ie_key:
461 ies = [self.get_info_extractor(ie_key)]
462 else:
463 ies = self._ies
464
465 for ie in ies:
466 if not ie.suitable(url):
467 continue
468
469 if not ie.working():
470 self.report_warning(u'The program functionality for this site has been marked as broken, '
471 u'and will probably not work.')
472
473 try:
474 ie_result = ie.extract(url)
475 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
476 break
477 if isinstance(ie_result, list):
478 # Backwards compatibility: old IE result format
479 ie_result = {
480 '_type': 'compat_list',
481 'entries': ie_result,
482 }
483 self.add_extra_info(ie_result,
484 {
485 'extractor': ie.IE_NAME,
486 'webpage_url': url,
487 'extractor_key': ie.ie_key(),
488 })
489 if process:
490 return self.process_ie_result(ie_result, download, extra_info)
491 else:
492 return ie_result
493 except ExtractorError as de: # An error we somewhat expected
494 self.report_error(compat_str(de), de.format_traceback())
495 break
496 except Exception as e:
497 if self.params.get('ignoreerrors', False):
498 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
499 break
500 else:
501 raise
502 else:
503 self.report_error(u'no suitable InfoExtractor: %s' % url)
504
505 def process_ie_result(self, ie_result, download=True, extra_info={}):
506 """
507 Take the result of the ie(may be modified) and resolve all unresolved
508 references (URLs, playlist items).
509
510 It will also download the videos if 'download'.
511 Returns the resolved ie_result.
512 """
513
514 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
515 if result_type == 'video':
516 self.add_extra_info(ie_result, extra_info)
517 return self.process_video_result(ie_result, download=download)
518 elif result_type == 'url':
519 # We have to add extra_info to the results because it may be
520 # contained in a playlist
521 return self.extract_info(ie_result['url'],
522 download,
523 ie_key=ie_result.get('ie_key'),
524 extra_info=extra_info)
525 elif result_type == 'url_transparent':
526 # Use the information from the embedding page
527 info = self.extract_info(
528 ie_result['url'], ie_key=ie_result.get('ie_key'),
529 extra_info=extra_info, download=False, process=False)
530
531 def make_result(embedded_info):
532 new_result = ie_result.copy()
533 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
534 'entries', 'urlhandle', 'ie_key', 'duration',
535 'subtitles', 'annotations', 'format',
536 'thumbnail', 'thumbnails'):
537 if f in new_result:
538 del new_result[f]
539 if f in embedded_info:
540 new_result[f] = embedded_info[f]
541 return new_result
542 new_result = make_result(info)
543
544 assert new_result.get('_type') != 'url_transparent'
545 if new_result.get('_type') == 'compat_list':
546 new_result['entries'] = [
547 make_result(e) for e in new_result['entries']]
548
549 return self.process_ie_result(
550 new_result, download=download, extra_info=extra_info)
551 elif result_type == 'playlist':
552 # We process each entry in the playlist
553 playlist = ie_result.get('title', None) or ie_result.get('id', None)
554 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
555
556 playlist_results = []
557
558 n_all_entries = len(ie_result['entries'])
559 playliststart = self.params.get('playliststart', 1) - 1
560 playlistend = self.params.get('playlistend', None)
561 # For backwards compatibility, interpret -1 as whole list
562 if playlistend == -1:
563 playlistend = None
564
565 entries = ie_result['entries'][playliststart:playlistend]
566 n_entries = len(entries)
567
568 self.to_screen(
569 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
570 (ie_result['extractor'], playlist, n_all_entries, n_entries))
571
572 for i, entry in enumerate(entries, 1):
573 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
574 extra = {
575 'playlist': playlist,
576 'playlist_index': i + playliststart,
577 'extractor': ie_result['extractor'],
578 'webpage_url': ie_result['webpage_url'],
579 'extractor_key': ie_result['extractor_key'],
580 }
581
582 reason = self._match_entry(entry)
583 if reason is not None:
584 self.to_screen(u'[download] ' + reason)
585 continue
586
587 entry_result = self.process_ie_result(entry,
588 download=download,
589 extra_info=extra)
590 playlist_results.append(entry_result)
591 ie_result['entries'] = playlist_results
592 return ie_result
593 elif result_type == 'compat_list':
594 def _fixup(r):
595 self.add_extra_info(r,
596 {
597 'extractor': ie_result['extractor'],
598 'webpage_url': ie_result['webpage_url'],
599 'extractor_key': ie_result['extractor_key'],
600 })
601 return r
602 ie_result['entries'] = [
603 self.process_ie_result(_fixup(r), download, extra_info)
604 for r in ie_result['entries']
605 ]
606 return ie_result
607 else:
608 raise Exception('Invalid result type: %s' % result_type)
609
610 def select_format(self, format_spec, available_formats):
611 if format_spec == 'best' or format_spec is None:
612 return available_formats[-1]
613 elif format_spec == 'worst':
614 return available_formats[0]
615 else:
616 extensions = [u'mp4', u'flv', u'webm', u'3gp']
617 if format_spec in extensions:
618 filter_f = lambda f: f['ext'] == format_spec
619 else:
620 filter_f = lambda f: f['format_id'] == format_spec
621 matches = list(filter(filter_f, available_formats))
622 if matches:
623 return matches[-1]
624 return None
625
626 def process_video_result(self, info_dict, download=True):
627 assert info_dict.get('_type', 'video') == 'video'
628
629 if 'playlist' not in info_dict:
630 # It isn't part of a playlist
631 info_dict['playlist'] = None
632 info_dict['playlist_index'] = None
633
634 # This extractors handle format selection themselves
635 if info_dict['extractor'] in [u'youtube', u'Youku']:
636 if download:
637 self.process_info(info_dict)
638 return info_dict
639
640 # We now pick which formats have to be downloaded
641 if info_dict.get('formats') is None:
642 # There's only one format available
643 formats = [info_dict]
644 else:
645 formats = info_dict['formats']
646
647 # We check that all the formats have the format and format_id fields
648 for (i, format) in enumerate(formats):
649 if format.get('format_id') is None:
650 format['format_id'] = compat_str(i)
651 if format.get('format') is None:
652 format['format'] = u'{id} - {res}{note}'.format(
653 id=format['format_id'],
654 res=self.format_resolution(format),
655 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
656 )
657 # Automatically determine file extension if missing
658 if 'ext' not in format:
659 format['ext'] = determine_ext(format['url'])
660
661 if self.params.get('listformats', None):
662 self.list_formats(info_dict)
663 return
664
665 format_limit = self.params.get('format_limit', None)
666 if format_limit:
667 formats = list(takewhile_inclusive(
668 lambda f: f['format_id'] != format_limit, formats
669 ))
670 if self.params.get('prefer_free_formats'):
671 def _free_formats_key(f):
672 try:
673 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
674 except ValueError:
675 ext_ord = -1
676 # We only compare the extension if they have the same height and width
677 return (f.get('height'), f.get('width'), ext_ord)
678 formats = sorted(formats, key=_free_formats_key)
679
680 req_format = self.params.get('format', 'best')
681 if req_format is None:
682 req_format = 'best'
683 formats_to_download = []
684 # The -1 is for supporting YoutubeIE
685 if req_format in ('-1', 'all'):
686 formats_to_download = formats
687 else:
688 # We can accept formats requestd in the format: 34/5/best, we pick
689 # the first that is available, starting from left
690 req_formats = req_format.split('/')
691 for rf in req_formats:
692 selected_format = self.select_format(rf, formats)
693 if selected_format is not None:
694 formats_to_download = [selected_format]
695 break
696 if not formats_to_download:
697 raise ExtractorError(u'requested format not available',
698 expected=True)
699
700 if download:
701 if len(formats_to_download) > 1:
702 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
703 for format in formats_to_download:
704 new_info = dict(info_dict)
705 new_info.update(format)
706 self.process_info(new_info)
707 # We update the info dict with the best quality format (backwards compatibility)
708 info_dict.update(formats_to_download[-1])
709 return info_dict
710
711 def process_info(self, info_dict):
712 """Process a single resolved IE result."""
713
714 assert info_dict.get('_type', 'video') == 'video'
715 #We increment the download the download count here to match the previous behaviour.
716 self.increment_downloads()
717
718 info_dict['fulltitle'] = info_dict['title']
719 if len(info_dict['title']) > 200:
720 info_dict['title'] = info_dict['title'][:197] + u'...'
721
722 # Keep for backwards compatibility
723 info_dict['stitle'] = info_dict['title']
724
725 if not 'format' in info_dict:
726 info_dict['format'] = info_dict['ext']
727
728 reason = self._match_entry(info_dict)
729 if reason is not None:
730 self.to_screen(u'[download] ' + reason)
731 return
732
733 max_downloads = self.params.get('max_downloads')
734 if max_downloads is not None:
735 if self._num_downloads > int(max_downloads):
736 raise MaxDownloadsReached()
737
738 filename = self.prepare_filename(info_dict)
739
740 # Forced printings
741 if self.params.get('forcetitle', False):
742 self.to_stdout(info_dict['fulltitle'])
743 if self.params.get('forceid', False):
744 self.to_stdout(info_dict['id'])
745 if self.params.get('forceurl', False):
746 # For RTMP URLs, also include the playpath
747 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
748 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
749 self.to_stdout(info_dict['thumbnail'])
750 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
751 self.to_stdout(info_dict['description'])
752 if self.params.get('forcefilename', False) and filename is not None:
753 self.to_stdout(filename)
754 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
755 self.to_stdout(formatSeconds(info_dict['duration']))
756 if self.params.get('forceformat', False):
757 self.to_stdout(info_dict['format'])
758 if self.params.get('forcejson', False):
759 info_dict['_filename'] = filename
760 self.to_stdout(json.dumps(info_dict))
761
762 # Do nothing else if in simulate mode
763 if self.params.get('simulate', False):
764 return
765
766 if filename is None:
767 return
768
769 try:
770 dn = os.path.dirname(encodeFilename(filename))
771 if dn != '' and not os.path.exists(dn):
772 os.makedirs(dn)
773 except (OSError, IOError) as err:
774 self.report_error(u'unable to create directory ' + compat_str(err))
775 return
776
777 if self.params.get('writedescription', False):
778 descfn = filename + u'.description'
779 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
780 self.to_screen(u'[info] Video description is already present')
781 else:
782 try:
783 self.to_screen(u'[info] Writing video description to: ' + descfn)
784 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
785 descfile.write(info_dict['description'])
786 except (KeyError, TypeError):
787 self.report_warning(u'There\'s no description to write.')
788 except (OSError, IOError):
789 self.report_error(u'Cannot write description file ' + descfn)
790 return
791
792 if self.params.get('writeannotations', False):
793 annofn = filename + u'.annotations.xml'
794 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
795 self.to_screen(u'[info] Video annotations are already present')
796 else:
797 try:
798 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
799 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
800 annofile.write(info_dict['annotations'])
801 except (KeyError, TypeError):
802 self.report_warning(u'There are no annotations to write.')
803 except (OSError, IOError):
804 self.report_error(u'Cannot write annotations file: ' + annofn)
805 return
806
807 subtitles_are_requested = any([self.params.get('writesubtitles', False),
808 self.params.get('writeautomaticsub')])
809
810 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
811 # subtitles download errors are already managed as troubles in relevant IE
812 # that way it will silently go on when used with unsupporting IE
813 subtitles = info_dict['subtitles']
814 sub_format = self.params.get('subtitlesformat', 'srt')
815 for sub_lang in subtitles.keys():
816 sub = subtitles[sub_lang]
817 if sub is None:
818 continue
819 try:
820 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
821 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
822 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
823 else:
824 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
825 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
826 subfile.write(sub)
827 except (OSError, IOError):
828 self.report_error(u'Cannot write subtitles file ' + descfn)
829 return
830
831 if self.params.get('writeinfojson', False):
832 infofn = os.path.splitext(filename)[0] + u'.info.json'
833 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
834 self.to_screen(u'[info] Video description metadata is already present')
835 else:
836 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
837 try:
838 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
839 write_json_file(json_info_dict, encodeFilename(infofn))
840 except (OSError, IOError):
841 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
842 return
843
844 if self.params.get('writethumbnail', False):
845 if info_dict.get('thumbnail') is not None:
846 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
847 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
848 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
849 self.to_screen(u'[%s] %s: Thumbnail is already present' %
850 (info_dict['extractor'], info_dict['id']))
851 else:
852 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
853 (info_dict['extractor'], info_dict['id']))
854 try:
855 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
856 with open(thumb_filename, 'wb') as thumbf:
857 shutil.copyfileobj(uf, thumbf)
858 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
859 (info_dict['extractor'], info_dict['id'], thumb_filename))
860 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
861 self.report_warning(u'Unable to download thumbnail "%s": %s' %
862 (info_dict['thumbnail'], compat_str(err)))
863
864 if not self.params.get('skip_download', False):
865 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
866 success = True
867 else:
868 try:
869 success = self.fd._do_download(filename, info_dict)
870 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
871 self.report_error(u'unable to download video data: %s' % str(err))
872 return
873 except (OSError, IOError) as err:
874 raise UnavailableVideoError(err)
875 except (ContentTooShortError, ) as err:
876 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
877 return
878
879 if success:
880 try:
881 self.post_process(filename, info_dict)
882 except (PostProcessingError) as err:
883 self.report_error(u'postprocessing: %s' % str(err))
884 return
885
886 self.record_download_archive(info_dict)
887
888 def download(self, url_list):
889 """Download a given list of URLs."""
890 if (len(url_list) > 1 and
891 '%' not in self.params['outtmpl']
892 and self.params.get('max_downloads') != 1):
893 raise SameFileError(self.params['outtmpl'])
894
895 for url in url_list:
896 try:
897 #It also downloads the videos
898 self.extract_info(url)
899 except UnavailableVideoError:
900 self.report_error(u'unable to download video')
901 except MaxDownloadsReached:
902 self.to_screen(u'[info] Maximum number of downloaded files reached.')
903 raise
904
905 return self._download_retcode
906
907 def download_with_info_file(self, info_filename):
908 with io.open(info_filename, 'r', encoding='utf-8') as f:
909 info = json.load(f)
910 try:
911 self.process_ie_result(info, download=True)
912 except DownloadError:
913 webpage_url = info.get('webpage_url')
914 if webpage_url is not None:
915 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
916 return self.download([webpage_url])
917 else:
918 raise
919 return self._download_retcode
920
921 def post_process(self, filename, ie_info):
922 """Run all the postprocessors on the given file."""
923 info = dict(ie_info)
924 info['filepath'] = filename
925 keep_video = None
926 for pp in self._pps:
927 try:
928 keep_video_wish, new_info = pp.run(info)
929 if keep_video_wish is not None:
930 if keep_video_wish:
931 keep_video = keep_video_wish
932 elif keep_video is None:
933 # No clear decision yet, let IE decide
934 keep_video = keep_video_wish
935 except PostProcessingError as e:
936 self.report_error(e.msg)
937 if keep_video is False and not self.params.get('keepvideo', False):
938 try:
939 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
940 os.remove(encodeFilename(filename))
941 except (IOError, OSError):
942 self.report_warning(u'Unable to remove downloaded video file')
943
944 def _make_archive_id(self, info_dict):
945 # Future-proof against any change in case
946 # and backwards compatibility with prior versions
947 extractor = info_dict.get('extractor_key')
948 if extractor is None:
949 if 'id' in info_dict:
950 extractor = info_dict.get('ie_key') # key in a playlist
951 if extractor is None:
952 return None # Incomplete video information
953 return extractor.lower() + u' ' + info_dict['id']
954
955 def in_download_archive(self, info_dict):
956 fn = self.params.get('download_archive')
957 if fn is None:
958 return False
959
960 vid_id = self._make_archive_id(info_dict)
961 if vid_id is None:
962 return False # Incomplete video information
963
964 try:
965 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
966 for line in archive_file:
967 if line.strip() == vid_id:
968 return True
969 except IOError as ioe:
970 if ioe.errno != errno.ENOENT:
971 raise
972 return False
973
974 def record_download_archive(self, info_dict):
975 fn = self.params.get('download_archive')
976 if fn is None:
977 return
978 vid_id = self._make_archive_id(info_dict)
979 assert vid_id
980 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
981 archive_file.write(vid_id + u'\n')
982
983 @staticmethod
984 def format_resolution(format, default='unknown'):
985 if format.get('vcodec') == 'none':
986 return 'audio only'
987 if format.get('_resolution') is not None:
988 return format['_resolution']
989 if format.get('height') is not None:
990 if format.get('width') is not None:
991 res = u'%sx%s' % (format['width'], format['height'])
992 else:
993 res = u'%sp' % format['height']
994 else:
995 res = default
996 return res
997
998 def list_formats(self, info_dict):
999 def format_note(fdict):
1000 res = u''
1001 if fdict.get('format_note') is not None:
1002 res += fdict['format_note'] + u' '
1003 if (fdict.get('vcodec') is not None and
1004 fdict.get('vcodec') != 'none'):
1005 res += u'%-5s' % fdict['vcodec']
1006 elif fdict.get('vbr') is not None:
1007 res += u'video'
1008 if fdict.get('vbr') is not None:
1009 res += u'@%4dk' % fdict['vbr']
1010 if fdict.get('acodec') is not None:
1011 if res:
1012 res += u', '
1013 res += u'%-5s' % fdict['acodec']
1014 elif fdict.get('abr') is not None:
1015 if res:
1016 res += u', '
1017 res += 'audio'
1018 if fdict.get('abr') is not None:
1019 res += u'@%3dk' % fdict['abr']
1020 if fdict.get('filesize') is not None:
1021 if res:
1022 res += u', '
1023 res += format_bytes(fdict['filesize'])
1024 return res
1025
1026 def line(format, idlen=20):
1027 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1028 format['format_id'],
1029 format['ext'],
1030 self.format_resolution(format),
1031 format_note(format),
1032 ))
1033
1034 formats = info_dict.get('formats', [info_dict])
1035 idlen = max(len(u'format code'),
1036 max(len(f['format_id']) for f in formats))
1037 formats_s = [line(f, idlen) for f in formats]
1038 if len(formats) > 1:
1039 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1040 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1041
1042 header_line = line({
1043 'format_id': u'format code', 'ext': u'extension',
1044 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1045 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1046 (info_dict['id'], header_line, u"\n".join(formats_s)))
1047
1048 def urlopen(self, req):
1049 """ Start an HTTP download """
1050 return self._opener.open(req)
1051
1052 def print_debug_header(self):
1053 if not self.params.get('verbose'):
1054 return
1055 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1056 try:
1057 sp = subprocess.Popen(
1058 ['git', 'rev-parse', '--short', 'HEAD'],
1059 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1060 cwd=os.path.dirname(os.path.abspath(__file__)))
1061 out, err = sp.communicate()
1062 out = out.decode().strip()
1063 if re.match('[0-9a-f]+', out):
1064 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1065 except:
1066 try:
1067 sys.exc_clear()
1068 except:
1069 pass
1070 write_string(u'[debug] Python version %s - %s' %
1071 (platform.python_version(), platform_name()) + u'\n')
1072
1073 proxy_map = {}
1074 for handler in self._opener.handlers:
1075 if hasattr(handler, 'proxies'):
1076 proxy_map.update(handler.proxies)
1077 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1078
1079 def _setup_opener(self):
1080 timeout_val = self.params.get('socket_timeout')
1081 timeout = 600 if timeout_val is None else float(timeout_val)
1082
1083 opts_cookiefile = self.params.get('cookiefile')
1084 opts_proxy = self.params.get('proxy')
1085
1086 if opts_cookiefile is None:
1087 self.cookiejar = compat_cookiejar.CookieJar()
1088 else:
1089 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1090 opts_cookiefile)
1091 if os.access(opts_cookiefile, os.R_OK):
1092 self.cookiejar.load()
1093
1094 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1095 self.cookiejar)
1096 if opts_proxy is not None:
1097 if opts_proxy == '':
1098 proxies = {}
1099 else:
1100 proxies = {'http': opts_proxy, 'https': opts_proxy}
1101 else:
1102 proxies = compat_urllib_request.getproxies()
1103 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1104 if 'http' in proxies and 'https' not in proxies:
1105 proxies['https'] = proxies['http']
1106 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1107 https_handler = make_HTTPS_handler(
1108 self.params.get('nocheckcertificate', False))
1109 opener = compat_urllib_request.build_opener(
1110 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1111 # Delete the default user-agent header, which would otherwise apply in
1112 # cases where our custom HTTP handler doesn't come into play
1113 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1114 opener.addheaders = []
1115 self._opener = opener
1116
1117 # TODO remove this global modification
1118 compat_urllib_request.install_opener(opener)
1119 socket.setdefaulttimeout(timeout)