]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Make prefer_free_formats sorting more robust
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import collections
7 import errno
8 import io
9 import json
10 import os
11 import platform
12 import re
13 import shutil
14 import subprocess
15 import socket
16 import sys
17 import time
18 import traceback
19
20 if os.name == 'nt':
21 import ctypes
22
23 from .utils import (
24 compat_cookiejar,
25 compat_http_client,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 formatSeconds,
38 get_term_width,
39 locked_file,
40 make_HTTPS_handler,
41 MaxDownloadsReached,
42 PostProcessingError,
43 platform_name,
44 preferredencoding,
45 SameFileError,
46 sanitize_filename,
47 subtitles_filename,
48 takewhile_inclusive,
49 UnavailableVideoError,
50 url_basename,
51 write_json_file,
52 write_string,
53 YoutubeDLHandler,
54 )
55 from .extractor import get_info_extractor, gen_extractors
56 from .FileDownloader import FileDownloader
57 from .version import __version__
58
59
60 class YoutubeDL(object):
61 """YoutubeDL class.
62
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
69
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
77
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
84
85 Available options:
86
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
139 downloaded.
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
144 again.
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
151
152 The following parameters are not used by YoutubeDL itself, they are used by
153 the FileDownloader:
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
156 """
157
158 params = None
159 _ies = []
160 _pps = []
161 _download_retcode = None
162 _num_downloads = None
163 _screen_file = None
164
165 def __init__(self, params=None):
166 """Create a FileDownloader object with the given options."""
167 self._ies = []
168 self._ies_instances = {}
169 self._pps = []
170 self._progress_hooks = []
171 self._download_retcode = 0
172 self._num_downloads = 0
173 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
174 self._err_file = sys.stderr
175 self.params = {} if params is None else params
176
177 if params.get('bidi_workaround', False):
178 try:
179 import pty
180 master, slave = pty.openpty()
181 width = get_term_width()
182 if width is None:
183 width_args = []
184 else:
185 width_args = ['-w', str(width)]
186 self._fribidi = subprocess.Popen(
187 ['fribidi', '-c', 'UTF-8'] + width_args,
188 stdin=subprocess.PIPE,
189 stdout=slave,
190 stderr=self._err_file)
191 self._fribidi_channel = os.fdopen(master, 'rb')
192 except OSError as ose:
193 if ose.errno == 2:
194 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
195 else:
196 raise
197
198 if (sys.version_info >= (3,) and sys.platform != 'win32' and
199 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
200 and not params['restrictfilenames']):
201 # On Python 3, the Unicode filesystem API will throw errors (#1474)
202 self.report_warning(
203 u'Assuming --restrict-filenames since file system encoding '
204 u'cannot encode all charactes. '
205 u'Set the LC_ALL environment variable to fix this.')
206 self.params['restrictfilenames'] = True
207
208 self.fd = FileDownloader(self, self.params)
209
210 if '%(stitle)s' in self.params.get('outtmpl', ''):
211 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
212
213 self._setup_opener()
214
215 def add_info_extractor(self, ie):
216 """Add an InfoExtractor object to the end of the list."""
217 self._ies.append(ie)
218 self._ies_instances[ie.ie_key()] = ie
219 ie.set_downloader(self)
220
221 def get_info_extractor(self, ie_key):
222 """
223 Get an instance of an IE with name ie_key, it will try to get one from
224 the _ies list, if there's no instance it will create a new one and add
225 it to the extractor list.
226 """
227 ie = self._ies_instances.get(ie_key)
228 if ie is None:
229 ie = get_info_extractor(ie_key)()
230 self.add_info_extractor(ie)
231 return ie
232
233 def add_default_info_extractors(self):
234 """
235 Add the InfoExtractors returned by gen_extractors to the end of the list
236 """
237 for ie in gen_extractors():
238 self.add_info_extractor(ie)
239
240 def add_post_processor(self, pp):
241 """Add a PostProcessor object to the end of the chain."""
242 self._pps.append(pp)
243 pp.set_downloader(self)
244
245 def _bidi_workaround(self, message):
246 if not hasattr(self, '_fribidi_channel'):
247 return message
248
249 assert type(message) == type(u'')
250 line_count = message.count(u'\n') + 1
251 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
252 self._fribidi.stdin.flush()
253 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
254 for _ in range(line_count))
255 return res[:-len(u'\n')]
256
257 def to_screen(self, message, skip_eol=False):
258 """Print message to stdout if not in quiet mode."""
259 return self.to_stdout(message, skip_eol, check_quiet=True)
260
261 def to_stdout(self, message, skip_eol=False, check_quiet=False):
262 """Print message to stdout if not in quiet mode."""
263 if self.params.get('logger'):
264 self.params['logger'].debug(message)
265 elif not check_quiet or not self.params.get('quiet', False):
266 message = self._bidi_workaround(message)
267 terminator = [u'\n', u''][skip_eol]
268 output = message + terminator
269
270 write_string(output, self._screen_file)
271
272 def to_stderr(self, message):
273 """Print message to stderr."""
274 assert type(message) == type(u'')
275 if self.params.get('logger'):
276 self.params['logger'].error(message)
277 else:
278 message = self._bidi_workaround(message)
279 output = message + u'\n'
280 write_string(output, self._err_file)
281
282 def to_console_title(self, message):
283 if not self.params.get('consoletitle', False):
284 return
285 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
286 # c_wchar_p() might not be necessary if `message` is
287 # already of type unicode()
288 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
289 elif 'TERM' in os.environ:
290 write_string(u'\033]0;%s\007' % message, self._screen_file)
291
292 def save_console_title(self):
293 if not self.params.get('consoletitle', False):
294 return
295 if 'TERM' in os.environ:
296 # Save the title on stack
297 write_string(u'\033[22;0t', self._screen_file)
298
299 def restore_console_title(self):
300 if not self.params.get('consoletitle', False):
301 return
302 if 'TERM' in os.environ:
303 # Restore the title from stack
304 write_string(u'\033[23;0t', self._screen_file)
305
306 def __enter__(self):
307 self.save_console_title()
308 return self
309
310 def __exit__(self, *args):
311 self.restore_console_title()
312
313 if self.params.get('cookiefile') is not None:
314 self.cookiejar.save()
315
316 def trouble(self, message=None, tb=None):
317 """Determine action to take when a download problem appears.
318
319 Depending on if the downloader has been configured to ignore
320 download errors or not, this method may throw an exception or
321 not when errors are found, after printing the message.
322
323 tb, if given, is additional traceback information.
324 """
325 if message is not None:
326 self.to_stderr(message)
327 if self.params.get('verbose'):
328 if tb is None:
329 if sys.exc_info()[0]: # if .trouble has been called from an except block
330 tb = u''
331 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
332 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
333 tb += compat_str(traceback.format_exc())
334 else:
335 tb_data = traceback.format_list(traceback.extract_stack())
336 tb = u''.join(tb_data)
337 self.to_stderr(tb)
338 if not self.params.get('ignoreerrors', False):
339 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
340 exc_info = sys.exc_info()[1].exc_info
341 else:
342 exc_info = sys.exc_info()
343 raise DownloadError(message, exc_info)
344 self._download_retcode = 1
345
346 def report_warning(self, message):
347 '''
348 Print the message to stderr, it will be prefixed with 'WARNING:'
349 If stderr is a tty file the 'WARNING:' will be colored
350 '''
351 if self._err_file.isatty() and os.name != 'nt':
352 _msg_header = u'\033[0;33mWARNING:\033[0m'
353 else:
354 _msg_header = u'WARNING:'
355 warning_message = u'%s %s' % (_msg_header, message)
356 self.to_stderr(warning_message)
357
358 def report_error(self, message, tb=None):
359 '''
360 Do the same as trouble, but prefixes the message with 'ERROR:', colored
361 in red if stderr is a tty file.
362 '''
363 if self._err_file.isatty() and os.name != 'nt':
364 _msg_header = u'\033[0;31mERROR:\033[0m'
365 else:
366 _msg_header = u'ERROR:'
367 error_message = u'%s %s' % (_msg_header, message)
368 self.trouble(error_message, tb)
369
370 def report_file_already_downloaded(self, file_name):
371 """Report file has already been fully downloaded."""
372 try:
373 self.to_screen(u'[download] %s has already been downloaded' % file_name)
374 except UnicodeEncodeError:
375 self.to_screen(u'[download] The file has already been downloaded')
376
377 def increment_downloads(self):
378 """Increment the ordinal that assigns a number to each file."""
379 self._num_downloads += 1
380
381 def prepare_filename(self, info_dict):
382 """Generate the output filename."""
383 try:
384 template_dict = dict(info_dict)
385
386 template_dict['epoch'] = int(time.time())
387 autonumber_size = self.params.get('autonumber_size')
388 if autonumber_size is None:
389 autonumber_size = 5
390 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
391 template_dict['autonumber'] = autonumber_templ % self._num_downloads
392 if template_dict.get('playlist_index') is not None:
393 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
394
395 sanitize = lambda k, v: sanitize_filename(
396 compat_str(v),
397 restricted=self.params.get('restrictfilenames'),
398 is_id=(k == u'id'))
399 template_dict = dict((k, sanitize(k, v))
400 for k, v in template_dict.items()
401 if v is not None)
402 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
403
404 tmpl = os.path.expanduser(self.params['outtmpl'])
405 filename = tmpl % template_dict
406 return filename
407 except ValueError as err:
408 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
409 return None
410
411 def _match_entry(self, info_dict):
412 """ Returns None iff the file should be downloaded """
413
414 video_title = info_dict.get('title', info_dict.get('id', u'video'))
415 if 'title' in info_dict:
416 # This can happen when we're just evaluating the playlist
417 title = info_dict['title']
418 matchtitle = self.params.get('matchtitle', False)
419 if matchtitle:
420 if not re.search(matchtitle, title, re.IGNORECASE):
421 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
422 rejecttitle = self.params.get('rejecttitle', False)
423 if rejecttitle:
424 if re.search(rejecttitle, title, re.IGNORECASE):
425 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
426 date = info_dict.get('upload_date', None)
427 if date is not None:
428 dateRange = self.params.get('daterange', DateRange())
429 if date not in dateRange:
430 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
431 view_count = info_dict.get('view_count', None)
432 if view_count is not None:
433 min_views = self.params.get('min_views')
434 if min_views is not None and view_count < min_views:
435 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
436 max_views = self.params.get('max_views')
437 if max_views is not None and view_count > max_views:
438 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
439 age_limit = self.params.get('age_limit')
440 if age_limit is not None:
441 if age_limit < info_dict.get('age_limit', 0):
442 return u'Skipping "' + title + '" because it is age restricted'
443 if self.in_download_archive(info_dict):
444 return u'%s has already been recorded in archive' % video_title
445 return None
446
447 @staticmethod
448 def add_extra_info(info_dict, extra_info):
449 '''Set the keys from extra_info in info dict if they are missing'''
450 for key, value in extra_info.items():
451 info_dict.setdefault(key, value)
452
453 def extract_info(self, url, download=True, ie_key=None, extra_info={},
454 process=True):
455 '''
456 Returns a list with a dictionary for each video we find.
457 If 'download', also downloads the videos.
458 extra_info is a dict containing the extra values to add to each result
459 '''
460
461 if ie_key:
462 ies = [self.get_info_extractor(ie_key)]
463 else:
464 ies = self._ies
465
466 for ie in ies:
467 if not ie.suitable(url):
468 continue
469
470 if not ie.working():
471 self.report_warning(u'The program functionality for this site has been marked as broken, '
472 u'and will probably not work.')
473
474 try:
475 ie_result = ie.extract(url)
476 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
477 break
478 if isinstance(ie_result, list):
479 # Backwards compatibility: old IE result format
480 ie_result = {
481 '_type': 'compat_list',
482 'entries': ie_result,
483 }
484 self.add_extra_info(ie_result,
485 {
486 'extractor': ie.IE_NAME,
487 'webpage_url': url,
488 'webpage_url_basename': url_basename(url),
489 'extractor_key': ie.ie_key(),
490 })
491 if process:
492 return self.process_ie_result(ie_result, download, extra_info)
493 else:
494 return ie_result
495 except ExtractorError as de: # An error we somewhat expected
496 self.report_error(compat_str(de), de.format_traceback())
497 break
498 except Exception as e:
499 if self.params.get('ignoreerrors', False):
500 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
501 break
502 else:
503 raise
504 else:
505 self.report_error(u'no suitable InfoExtractor: %s' % url)
506
507 def process_ie_result(self, ie_result, download=True, extra_info={}):
508 """
509 Take the result of the ie(may be modified) and resolve all unresolved
510 references (URLs, playlist items).
511
512 It will also download the videos if 'download'.
513 Returns the resolved ie_result.
514 """
515
516 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
517 if result_type == 'video':
518 self.add_extra_info(ie_result, extra_info)
519 return self.process_video_result(ie_result, download=download)
520 elif result_type == 'url':
521 # We have to add extra_info to the results because it may be
522 # contained in a playlist
523 return self.extract_info(ie_result['url'],
524 download,
525 ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info)
527 elif result_type == 'url_transparent':
528 # Use the information from the embedding page
529 info = self.extract_info(
530 ie_result['url'], ie_key=ie_result.get('ie_key'),
531 extra_info=extra_info, download=False, process=False)
532
533 def make_result(embedded_info):
534 new_result = ie_result.copy()
535 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
536 'entries', 'urlhandle', 'ie_key', 'duration',
537 'subtitles', 'annotations', 'format',
538 'thumbnail', 'thumbnails'):
539 if f in new_result:
540 del new_result[f]
541 if f in embedded_info:
542 new_result[f] = embedded_info[f]
543 return new_result
544 new_result = make_result(info)
545
546 assert new_result.get('_type') != 'url_transparent'
547 if new_result.get('_type') == 'compat_list':
548 new_result['entries'] = [
549 make_result(e) for e in new_result['entries']]
550
551 return self.process_ie_result(
552 new_result, download=download, extra_info=extra_info)
553 elif result_type == 'playlist':
554 # We process each entry in the playlist
555 playlist = ie_result.get('title', None) or ie_result.get('id', None)
556 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
557
558 playlist_results = []
559
560 n_all_entries = len(ie_result['entries'])
561 playliststart = self.params.get('playliststart', 1) - 1
562 playlistend = self.params.get('playlistend', None)
563 # For backwards compatibility, interpret -1 as whole list
564 if playlistend == -1:
565 playlistend = None
566
567 entries = ie_result['entries'][playliststart:playlistend]
568 n_entries = len(entries)
569
570 self.to_screen(
571 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
572 (ie_result['extractor'], playlist, n_all_entries, n_entries))
573
574 for i, entry in enumerate(entries, 1):
575 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
576 extra = {
577 'playlist': playlist,
578 'playlist_index': i + playliststart,
579 'extractor': ie_result['extractor'],
580 'webpage_url': ie_result['webpage_url'],
581 'webpage_url_basename': url_basename(ie_result['webpage_url']),
582 'extractor_key': ie_result['extractor_key'],
583 }
584
585 reason = self._match_entry(entry)
586 if reason is not None:
587 self.to_screen(u'[download] ' + reason)
588 continue
589
590 entry_result = self.process_ie_result(entry,
591 download=download,
592 extra_info=extra)
593 playlist_results.append(entry_result)
594 ie_result['entries'] = playlist_results
595 return ie_result
596 elif result_type == 'compat_list':
597 def _fixup(r):
598 self.add_extra_info(r,
599 {
600 'extractor': ie_result['extractor'],
601 'webpage_url': ie_result['webpage_url'],
602 'webpage_url_basename': url_basename(ie_result['webpage_url']),
603 'extractor_key': ie_result['extractor_key'],
604 })
605 return r
606 ie_result['entries'] = [
607 self.process_ie_result(_fixup(r), download, extra_info)
608 for r in ie_result['entries']
609 ]
610 return ie_result
611 else:
612 raise Exception('Invalid result type: %s' % result_type)
613
614 def select_format(self, format_spec, available_formats):
615 if format_spec == 'best' or format_spec is None:
616 return available_formats[-1]
617 elif format_spec == 'worst':
618 return available_formats[0]
619 else:
620 extensions = [u'mp4', u'flv', u'webm', u'3gp']
621 if format_spec in extensions:
622 filter_f = lambda f: f['ext'] == format_spec
623 else:
624 filter_f = lambda f: f['format_id'] == format_spec
625 matches = list(filter(filter_f, available_formats))
626 if matches:
627 return matches[-1]
628 return None
629
630 def process_video_result(self, info_dict, download=True):
631 assert info_dict.get('_type', 'video') == 'video'
632
633 if 'playlist' not in info_dict:
634 # It isn't part of a playlist
635 info_dict['playlist'] = None
636 info_dict['playlist_index'] = None
637
638 # This extractors handle format selection themselves
639 if info_dict['extractor'] in [u'Youku']:
640 if download:
641 self.process_info(info_dict)
642 return info_dict
643
644 # We now pick which formats have to be downloaded
645 if info_dict.get('formats') is None:
646 # There's only one format available
647 formats = [info_dict]
648 else:
649 formats = info_dict['formats']
650
651 # We check that all the formats have the format and format_id fields
652 for (i, format) in enumerate(formats):
653 if format.get('format_id') is None:
654 format['format_id'] = compat_str(i)
655 if format.get('format') is None:
656 format['format'] = u'{id} - {res}{note}'.format(
657 id=format['format_id'],
658 res=self.format_resolution(format),
659 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
660 )
661 # Automatically determine file extension if missing
662 if 'ext' not in format:
663 format['ext'] = determine_ext(format['url'])
664
665 format_limit = self.params.get('format_limit', None)
666 if format_limit:
667 formats = list(takewhile_inclusive(
668 lambda f: f['format_id'] != format_limit, formats
669 ))
670 if self.params.get('prefer_free_formats'):
671 def _free_formats_key(f):
672 try:
673 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
674 except ValueError:
675 ext_ord = -1
676 # We only compare the extension if they have the same height and width
677 return (f.get('height') if f.get('height') is not None else -1,
678 f.get('width') if f.get('width') is not None else -1,
679 ext_ord)
680 formats = sorted(formats, key=_free_formats_key)
681
682 info_dict['formats'] = formats
683 if self.params.get('listformats', None):
684 self.list_formats(info_dict)
685 return
686
687 req_format = self.params.get('format', 'best')
688 if req_format is None:
689 req_format = 'best'
690 formats_to_download = []
691 # The -1 is for supporting YoutubeIE
692 if req_format in ('-1', 'all'):
693 formats_to_download = formats
694 else:
695 # We can accept formats requestd in the format: 34/5/best, we pick
696 # the first that is available, starting from left
697 req_formats = req_format.split('/')
698 for rf in req_formats:
699 selected_format = self.select_format(rf, formats)
700 if selected_format is not None:
701 formats_to_download = [selected_format]
702 break
703 if not formats_to_download:
704 raise ExtractorError(u'requested format not available',
705 expected=True)
706
707 if download:
708 if len(formats_to_download) > 1:
709 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
710 for format in formats_to_download:
711 new_info = dict(info_dict)
712 new_info.update(format)
713 self.process_info(new_info)
714 # We update the info dict with the best quality format (backwards compatibility)
715 info_dict.update(formats_to_download[-1])
716 return info_dict
717
718 def process_info(self, info_dict):
719 """Process a single resolved IE result."""
720
721 assert info_dict.get('_type', 'video') == 'video'
722 #We increment the download the download count here to match the previous behaviour.
723 self.increment_downloads()
724
725 info_dict['fulltitle'] = info_dict['title']
726 if len(info_dict['title']) > 200:
727 info_dict['title'] = info_dict['title'][:197] + u'...'
728
729 # Keep for backwards compatibility
730 info_dict['stitle'] = info_dict['title']
731
732 if not 'format' in info_dict:
733 info_dict['format'] = info_dict['ext']
734
735 reason = self._match_entry(info_dict)
736 if reason is not None:
737 self.to_screen(u'[download] ' + reason)
738 return
739
740 max_downloads = self.params.get('max_downloads')
741 if max_downloads is not None:
742 if self._num_downloads > int(max_downloads):
743 raise MaxDownloadsReached()
744
745 filename = self.prepare_filename(info_dict)
746
747 # Forced printings
748 if self.params.get('forcetitle', False):
749 self.to_stdout(info_dict['fulltitle'])
750 if self.params.get('forceid', False):
751 self.to_stdout(info_dict['id'])
752 if self.params.get('forceurl', False):
753 # For RTMP URLs, also include the playpath
754 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
755 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
756 self.to_stdout(info_dict['thumbnail'])
757 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
758 self.to_stdout(info_dict['description'])
759 if self.params.get('forcefilename', False) and filename is not None:
760 self.to_stdout(filename)
761 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
762 self.to_stdout(formatSeconds(info_dict['duration']))
763 if self.params.get('forceformat', False):
764 self.to_stdout(info_dict['format'])
765 if self.params.get('forcejson', False):
766 info_dict['_filename'] = filename
767 self.to_stdout(json.dumps(info_dict))
768
769 # Do nothing else if in simulate mode
770 if self.params.get('simulate', False):
771 return
772
773 if filename is None:
774 return
775
776 try:
777 dn = os.path.dirname(encodeFilename(filename))
778 if dn != '' and not os.path.exists(dn):
779 os.makedirs(dn)
780 except (OSError, IOError) as err:
781 self.report_error(u'unable to create directory ' + compat_str(err))
782 return
783
784 if self.params.get('writedescription', False):
785 descfn = filename + u'.description'
786 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
787 self.to_screen(u'[info] Video description is already present')
788 else:
789 try:
790 self.to_screen(u'[info] Writing video description to: ' + descfn)
791 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
792 descfile.write(info_dict['description'])
793 except (KeyError, TypeError):
794 self.report_warning(u'There\'s no description to write.')
795 except (OSError, IOError):
796 self.report_error(u'Cannot write description file ' + descfn)
797 return
798
799 if self.params.get('writeannotations', False):
800 annofn = filename + u'.annotations.xml'
801 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
802 self.to_screen(u'[info] Video annotations are already present')
803 else:
804 try:
805 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
806 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
807 annofile.write(info_dict['annotations'])
808 except (KeyError, TypeError):
809 self.report_warning(u'There are no annotations to write.')
810 except (OSError, IOError):
811 self.report_error(u'Cannot write annotations file: ' + annofn)
812 return
813
814 subtitles_are_requested = any([self.params.get('writesubtitles', False),
815 self.params.get('writeautomaticsub')])
816
817 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
818 # subtitles download errors are already managed as troubles in relevant IE
819 # that way it will silently go on when used with unsupporting IE
820 subtitles = info_dict['subtitles']
821 sub_format = self.params.get('subtitlesformat', 'srt')
822 for sub_lang in subtitles.keys():
823 sub = subtitles[sub_lang]
824 if sub is None:
825 continue
826 try:
827 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
828 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
829 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
830 else:
831 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
832 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
833 subfile.write(sub)
834 except (OSError, IOError):
835 self.report_error(u'Cannot write subtitles file ' + descfn)
836 return
837
838 if self.params.get('writeinfojson', False):
839 infofn = os.path.splitext(filename)[0] + u'.info.json'
840 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
841 self.to_screen(u'[info] Video description metadata is already present')
842 else:
843 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
844 try:
845 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
846 write_json_file(json_info_dict, encodeFilename(infofn))
847 except (OSError, IOError):
848 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
849 return
850
851 if self.params.get('writethumbnail', False):
852 if info_dict.get('thumbnail') is not None:
853 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
854 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
855 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
856 self.to_screen(u'[%s] %s: Thumbnail is already present' %
857 (info_dict['extractor'], info_dict['id']))
858 else:
859 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
860 (info_dict['extractor'], info_dict['id']))
861 try:
862 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
863 with open(thumb_filename, 'wb') as thumbf:
864 shutil.copyfileobj(uf, thumbf)
865 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
866 (info_dict['extractor'], info_dict['id'], thumb_filename))
867 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
868 self.report_warning(u'Unable to download thumbnail "%s": %s' %
869 (info_dict['thumbnail'], compat_str(err)))
870
871 if not self.params.get('skip_download', False):
872 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
873 success = True
874 else:
875 try:
876 success = self.fd._do_download(filename, info_dict)
877 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
878 self.report_error(u'unable to download video data: %s' % str(err))
879 return
880 except (OSError, IOError) as err:
881 raise UnavailableVideoError(err)
882 except (ContentTooShortError, ) as err:
883 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
884 return
885
886 if success:
887 try:
888 self.post_process(filename, info_dict)
889 except (PostProcessingError) as err:
890 self.report_error(u'postprocessing: %s' % str(err))
891 return
892
893 self.record_download_archive(info_dict)
894
895 def download(self, url_list):
896 """Download a given list of URLs."""
897 if (len(url_list) > 1 and
898 '%' not in self.params['outtmpl']
899 and self.params.get('max_downloads') != 1):
900 raise SameFileError(self.params['outtmpl'])
901
902 for url in url_list:
903 try:
904 #It also downloads the videos
905 self.extract_info(url)
906 except UnavailableVideoError:
907 self.report_error(u'unable to download video')
908 except MaxDownloadsReached:
909 self.to_screen(u'[info] Maximum number of downloaded files reached.')
910 raise
911
912 return self._download_retcode
913
914 def download_with_info_file(self, info_filename):
915 with io.open(info_filename, 'r', encoding='utf-8') as f:
916 info = json.load(f)
917 try:
918 self.process_ie_result(info, download=True)
919 except DownloadError:
920 webpage_url = info.get('webpage_url')
921 if webpage_url is not None:
922 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
923 return self.download([webpage_url])
924 else:
925 raise
926 return self._download_retcode
927
928 def post_process(self, filename, ie_info):
929 """Run all the postprocessors on the given file."""
930 info = dict(ie_info)
931 info['filepath'] = filename
932 keep_video = None
933 for pp in self._pps:
934 try:
935 keep_video_wish, new_info = pp.run(info)
936 if keep_video_wish is not None:
937 if keep_video_wish:
938 keep_video = keep_video_wish
939 elif keep_video is None:
940 # No clear decision yet, let IE decide
941 keep_video = keep_video_wish
942 except PostProcessingError as e:
943 self.report_error(e.msg)
944 if keep_video is False and not self.params.get('keepvideo', False):
945 try:
946 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
947 os.remove(encodeFilename(filename))
948 except (IOError, OSError):
949 self.report_warning(u'Unable to remove downloaded video file')
950
951 def _make_archive_id(self, info_dict):
952 # Future-proof against any change in case
953 # and backwards compatibility with prior versions
954 extractor = info_dict.get('extractor_key')
955 if extractor is None:
956 if 'id' in info_dict:
957 extractor = info_dict.get('ie_key') # key in a playlist
958 if extractor is None:
959 return None # Incomplete video information
960 return extractor.lower() + u' ' + info_dict['id']
961
962 def in_download_archive(self, info_dict):
963 fn = self.params.get('download_archive')
964 if fn is None:
965 return False
966
967 vid_id = self._make_archive_id(info_dict)
968 if vid_id is None:
969 return False # Incomplete video information
970
971 try:
972 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
973 for line in archive_file:
974 if line.strip() == vid_id:
975 return True
976 except IOError as ioe:
977 if ioe.errno != errno.ENOENT:
978 raise
979 return False
980
981 def record_download_archive(self, info_dict):
982 fn = self.params.get('download_archive')
983 if fn is None:
984 return
985 vid_id = self._make_archive_id(info_dict)
986 assert vid_id
987 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
988 archive_file.write(vid_id + u'\n')
989
990 @staticmethod
991 def format_resolution(format, default='unknown'):
992 if format.get('vcodec') == 'none':
993 return 'audio only'
994 if format.get('_resolution') is not None:
995 return format['_resolution']
996 if format.get('height') is not None:
997 if format.get('width') is not None:
998 res = u'%sx%s' % (format['width'], format['height'])
999 else:
1000 res = u'%sp' % format['height']
1001 else:
1002 res = default
1003 return res
1004
1005 def list_formats(self, info_dict):
1006 def format_note(fdict):
1007 res = u''
1008 if fdict.get('format_note') is not None:
1009 res += fdict['format_note'] + u' '
1010 if (fdict.get('vcodec') is not None and
1011 fdict.get('vcodec') != 'none'):
1012 res += u'%-5s' % fdict['vcodec']
1013 elif fdict.get('vbr') is not None:
1014 res += u'video'
1015 if fdict.get('vbr') is not None:
1016 res += u'@%4dk' % fdict['vbr']
1017 if fdict.get('acodec') is not None:
1018 if res:
1019 res += u', '
1020 res += u'%-5s' % fdict['acodec']
1021 elif fdict.get('abr') is not None:
1022 if res:
1023 res += u', '
1024 res += 'audio'
1025 if fdict.get('abr') is not None:
1026 res += u'@%3dk' % fdict['abr']
1027 if fdict.get('filesize') is not None:
1028 if res:
1029 res += u', '
1030 res += format_bytes(fdict['filesize'])
1031 return res
1032
1033 def line(format, idlen=20):
1034 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1035 format['format_id'],
1036 format['ext'],
1037 self.format_resolution(format),
1038 format_note(format),
1039 ))
1040
1041 formats = info_dict.get('formats', [info_dict])
1042 idlen = max(len(u'format code'),
1043 max(len(f['format_id']) for f in formats))
1044 formats_s = [line(f, idlen) for f in formats]
1045 if len(formats) > 1:
1046 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1047 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1048
1049 header_line = line({
1050 'format_id': u'format code', 'ext': u'extension',
1051 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1052 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1053 (info_dict['id'], header_line, u"\n".join(formats_s)))
1054
1055 def urlopen(self, req):
1056 """ Start an HTTP download """
1057 return self._opener.open(req)
1058
1059 def print_debug_header(self):
1060 if not self.params.get('verbose'):
1061 return
1062 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1063 try:
1064 sp = subprocess.Popen(
1065 ['git', 'rev-parse', '--short', 'HEAD'],
1066 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1067 cwd=os.path.dirname(os.path.abspath(__file__)))
1068 out, err = sp.communicate()
1069 out = out.decode().strip()
1070 if re.match('[0-9a-f]+', out):
1071 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1072 except:
1073 try:
1074 sys.exc_clear()
1075 except:
1076 pass
1077 write_string(u'[debug] Python version %s - %s' %
1078 (platform.python_version(), platform_name()) + u'\n')
1079
1080 proxy_map = {}
1081 for handler in self._opener.handlers:
1082 if hasattr(handler, 'proxies'):
1083 proxy_map.update(handler.proxies)
1084 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1085
1086 def _setup_opener(self):
1087 timeout_val = self.params.get('socket_timeout')
1088 timeout = 600 if timeout_val is None else float(timeout_val)
1089
1090 opts_cookiefile = self.params.get('cookiefile')
1091 opts_proxy = self.params.get('proxy')
1092
1093 if opts_cookiefile is None:
1094 self.cookiejar = compat_cookiejar.CookieJar()
1095 else:
1096 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1097 opts_cookiefile)
1098 if os.access(opts_cookiefile, os.R_OK):
1099 self.cookiejar.load()
1100
1101 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1102 self.cookiejar)
1103 if opts_proxy is not None:
1104 if opts_proxy == '':
1105 proxies = {}
1106 else:
1107 proxies = {'http': opts_proxy, 'https': opts_proxy}
1108 else:
1109 proxies = compat_urllib_request.getproxies()
1110 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1111 if 'http' in proxies and 'https' not in proxies:
1112 proxies['https'] = proxies['http']
1113 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1114 https_handler = make_HTTPS_handler(
1115 self.params.get('nocheckcertificate', False))
1116 opener = compat_urllib_request.build_opener(
1117 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1118 # Delete the default user-agent header, which would otherwise apply in
1119 # cases where our custom HTTP handler doesn't come into play
1120 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1121 opener.addheaders = []
1122 self._opener = opener
1123
1124 # TODO remove this global modification
1125 compat_urllib_request.install_opener(opener)
1126 socket.setdefaulttimeout(timeout)