]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Document width, height, and resolution (#1445)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import errno
8 import io
9 import json
10 import os
11 import platform
12 import re
13 import shutil
14 import subprocess
15 import socket
16 import sys
17 import time
18 import traceback
19
20 if os.name == 'nt':
21 import ctypes
22
23 from .utils import (
24 compat_cookiejar,
25 compat_http_client,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 formatSeconds,
38 get_term_width,
39 locked_file,
40 make_HTTPS_handler,
41 MaxDownloadsReached,
42 PagedList,
43 PostProcessingError,
44 platform_name,
45 preferredencoding,
46 SameFileError,
47 sanitize_filename,
48 subtitles_filename,
49 takewhile_inclusive,
50 UnavailableVideoError,
51 url_basename,
52 write_json_file,
53 write_string,
54 YoutubeDLHandler,
55 prepend_extension,
56 )
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
61
62
63 class YoutubeDL(object):
64 """YoutubeDL class.
65
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
72
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
80
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
87
88 Available options:
89
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
142 downloaded.
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
147 again.
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
158
159 The following parameters are not used by YoutubeDL itself, they are used by
160 the FileDownloader:
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
163
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
167 """
168
169 params = None
170 _ies = []
171 _pps = []
172 _download_retcode = None
173 _num_downloads = None
174 _screen_file = None
175
176 def __init__(self, params=None):
177 """Create a FileDownloader object with the given options."""
178 if params is None:
179 params = {}
180 self._ies = []
181 self._ies_instances = {}
182 self._pps = []
183 self._progress_hooks = []
184 self._download_retcode = 0
185 self._num_downloads = 0
186 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
187 self._err_file = sys.stderr
188 self.params = params
189
190 if params.get('bidi_workaround', False):
191 try:
192 import pty
193 master, slave = pty.openpty()
194 width = get_term_width()
195 if width is None:
196 width_args = []
197 else:
198 width_args = ['-w', str(width)]
199 sp_kwargs = dict(
200 stdin=subprocess.PIPE,
201 stdout=slave,
202 stderr=self._err_file)
203 try:
204 self._output_process = subprocess.Popen(
205 ['bidiv'] + width_args, **sp_kwargs
206 )
207 except OSError:
208 self._output_process = subprocess.Popen(
209 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
210 self._output_channel = os.fdopen(master, 'rb')
211 except OSError as ose:
212 if ose.errno == 2:
213 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
214 else:
215 raise
216
217 if (sys.version_info >= (3,) and sys.platform != 'win32' and
218 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
219 and not params['restrictfilenames']):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
221 self.report_warning(
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.')
225 self.params['restrictfilenames'] = True
226
227 if '%(stitle)s' in self.params.get('outtmpl', ''):
228 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
229
230 self._setup_opener()
231
232 def add_info_extractor(self, ie):
233 """Add an InfoExtractor object to the end of the list."""
234 self._ies.append(ie)
235 self._ies_instances[ie.ie_key()] = ie
236 ie.set_downloader(self)
237
238 def get_info_extractor(self, ie_key):
239 """
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
243 """
244 ie = self._ies_instances.get(ie_key)
245 if ie is None:
246 ie = get_info_extractor(ie_key)()
247 self.add_info_extractor(ie)
248 return ie
249
250 def add_default_info_extractors(self):
251 """
252 Add the InfoExtractors returned by gen_extractors to the end of the list
253 """
254 for ie in gen_extractors():
255 self.add_info_extractor(ie)
256
257 def add_post_processor(self, pp):
258 """Add a PostProcessor object to the end of the chain."""
259 self._pps.append(pp)
260 pp.set_downloader(self)
261
262 def add_progress_hook(self, ph):
263 """Add the progress hook (currently only for the file downloader)"""
264 self._progress_hooks.append(ph)
265
266 def _bidi_workaround(self, message):
267 if not hasattr(self, '_output_channel'):
268 return message
269
270 assert hasattr(self, '_output_process')
271 assert type(message) == type('')
272 line_count = message.count('\n') + 1
273 self._output_process.stdin.write((message + '\n').encode('utf-8'))
274 self._output_process.stdin.flush()
275 res = ''.join(self._output_channel.readline().decode('utf-8')
276 for _ in range(line_count))
277 return res[:-len('\n')]
278
279 def to_screen(self, message, skip_eol=False):
280 """Print message to stdout if not in quiet mode."""
281 return self.to_stdout(message, skip_eol, check_quiet=True)
282
283 def to_stdout(self, message, skip_eol=False, check_quiet=False):
284 """Print message to stdout if not in quiet mode."""
285 if self.params.get('logger'):
286 self.params['logger'].debug(message)
287 elif not check_quiet or not self.params.get('quiet', False):
288 message = self._bidi_workaround(message)
289 terminator = ['\n', ''][skip_eol]
290 output = message + terminator
291
292 write_string(output, self._screen_file)
293
294 def to_stderr(self, message):
295 """Print message to stderr."""
296 assert type(message) == type('')
297 if self.params.get('logger'):
298 self.params['logger'].error(message)
299 else:
300 message = self._bidi_workaround(message)
301 output = message + '\n'
302 write_string(output, self._err_file)
303
304 def to_console_title(self, message):
305 if not self.params.get('consoletitle', False):
306 return
307 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
311 elif 'TERM' in os.environ:
312 write_string('\033]0;%s\007' % message, self._screen_file)
313
314 def save_console_title(self):
315 if not self.params.get('consoletitle', False):
316 return
317 if 'TERM' in os.environ:
318 # Save the title on stack
319 write_string('\033[22;0t', self._screen_file)
320
321 def restore_console_title(self):
322 if not self.params.get('consoletitle', False):
323 return
324 if 'TERM' in os.environ:
325 # Restore the title from stack
326 write_string('\033[23;0t', self._screen_file)
327
328 def __enter__(self):
329 self.save_console_title()
330 return self
331
332 def __exit__(self, *args):
333 self.restore_console_title()
334
335 if self.params.get('cookiefile') is not None:
336 self.cookiejar.save()
337
338 def trouble(self, message=None, tb=None):
339 """Determine action to take when a download problem appears.
340
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
344
345 tb, if given, is additional traceback information.
346 """
347 if message is not None:
348 self.to_stderr(message)
349 if self.params.get('verbose'):
350 if tb is None:
351 if sys.exc_info()[0]: # if .trouble has been called from an except block
352 tb = ''
353 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
355 tb += compat_str(traceback.format_exc())
356 else:
357 tb_data = traceback.format_list(traceback.extract_stack())
358 tb = ''.join(tb_data)
359 self.to_stderr(tb)
360 if not self.params.get('ignoreerrors', False):
361 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
362 exc_info = sys.exc_info()[1].exc_info
363 else:
364 exc_info = sys.exc_info()
365 raise DownloadError(message, exc_info)
366 self._download_retcode = 1
367
368 def report_warning(self, message):
369 '''
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
372 '''
373 if self._err_file.isatty() and os.name != 'nt':
374 _msg_header = '\033[0;33mWARNING:\033[0m'
375 else:
376 _msg_header = 'WARNING:'
377 warning_message = '%s %s' % (_msg_header, message)
378 self.to_stderr(warning_message)
379
380 def report_error(self, message, tb=None):
381 '''
382 Do the same as trouble, but prefixes the message with 'ERROR:', colored
383 in red if stderr is a tty file.
384 '''
385 if self._err_file.isatty() and os.name != 'nt':
386 _msg_header = '\033[0;31mERROR:\033[0m'
387 else:
388 _msg_header = 'ERROR:'
389 error_message = '%s %s' % (_msg_header, message)
390 self.trouble(error_message, tb)
391
392 def report_file_already_downloaded(self, file_name):
393 """Report file has already been fully downloaded."""
394 try:
395 self.to_screen('[download] %s has already been downloaded' % file_name)
396 except UnicodeEncodeError:
397 self.to_screen('[download] The file has already been downloaded')
398
399 def prepare_filename(self, info_dict):
400 """Generate the output filename."""
401 try:
402 template_dict = dict(info_dict)
403
404 template_dict['epoch'] = int(time.time())
405 autonumber_size = self.params.get('autonumber_size')
406 if autonumber_size is None:
407 autonumber_size = 5
408 autonumber_templ = '%0' + str(autonumber_size) + 'd'
409 template_dict['autonumber'] = autonumber_templ % self._num_downloads
410 if template_dict.get('playlist_index') is not None:
411 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
412 if template_dict.get('resolution') is None:
413 if template_dict.get('width') and template_dict.get('height'):
414 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
415 elif template_dict.get('height'):
416 res = '%sp' % template_dict['height']
417 elif template_dict.get('width'):
418 res = '?x%d' % template_dict['width']
419
420 sanitize = lambda k, v: sanitize_filename(
421 compat_str(v),
422 restricted=self.params.get('restrictfilenames'),
423 is_id=(k == 'id'))
424 template_dict = dict((k, sanitize(k, v))
425 for k, v in template_dict.items()
426 if v is not None)
427 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
428
429 tmpl = os.path.expanduser(self.params['outtmpl'])
430 filename = tmpl % template_dict
431 return filename
432 except ValueError as err:
433 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
434 return None
435
436 def _match_entry(self, info_dict):
437 """ Returns None iff the file should be downloaded """
438
439 video_title = info_dict.get('title', info_dict.get('id', 'video'))
440 if 'title' in info_dict:
441 # This can happen when we're just evaluating the playlist
442 title = info_dict['title']
443 matchtitle = self.params.get('matchtitle', False)
444 if matchtitle:
445 if not re.search(matchtitle, title, re.IGNORECASE):
446 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
447 rejecttitle = self.params.get('rejecttitle', False)
448 if rejecttitle:
449 if re.search(rejecttitle, title, re.IGNORECASE):
450 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
451 date = info_dict.get('upload_date', None)
452 if date is not None:
453 dateRange = self.params.get('daterange', DateRange())
454 if date not in dateRange:
455 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
456 view_count = info_dict.get('view_count', None)
457 if view_count is not None:
458 min_views = self.params.get('min_views')
459 if min_views is not None and view_count < min_views:
460 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
461 max_views = self.params.get('max_views')
462 if max_views is not None and view_count > max_views:
463 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
464 age_limit = self.params.get('age_limit')
465 if age_limit is not None:
466 if age_limit < info_dict.get('age_limit', 0):
467 return 'Skipping "' + title + '" because it is age restricted'
468 if self.in_download_archive(info_dict):
469 return '%s has already been recorded in archive' % video_title
470 return None
471
472 @staticmethod
473 def add_extra_info(info_dict, extra_info):
474 '''Set the keys from extra_info in info dict if they are missing'''
475 for key, value in extra_info.items():
476 info_dict.setdefault(key, value)
477
478 def extract_info(self, url, download=True, ie_key=None, extra_info={},
479 process=True):
480 '''
481 Returns a list with a dictionary for each video we find.
482 If 'download', also downloads the videos.
483 extra_info is a dict containing the extra values to add to each result
484 '''
485
486 if ie_key:
487 ies = [self.get_info_extractor(ie_key)]
488 else:
489 ies = self._ies
490
491 for ie in ies:
492 if not ie.suitable(url):
493 continue
494
495 if not ie.working():
496 self.report_warning('The program functionality for this site has been marked as broken, '
497 'and will probably not work.')
498
499 try:
500 ie_result = ie.extract(url)
501 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
502 break
503 if isinstance(ie_result, list):
504 # Backwards compatibility: old IE result format
505 ie_result = {
506 '_type': 'compat_list',
507 'entries': ie_result,
508 }
509 self.add_extra_info(ie_result,
510 {
511 'extractor': ie.IE_NAME,
512 'webpage_url': url,
513 'webpage_url_basename': url_basename(url),
514 'extractor_key': ie.ie_key(),
515 })
516 if process:
517 return self.process_ie_result(ie_result, download, extra_info)
518 else:
519 return ie_result
520 except ExtractorError as de: # An error we somewhat expected
521 self.report_error(compat_str(de), de.format_traceback())
522 break
523 except MaxDownloadsReached:
524 raise
525 except Exception as e:
526 if self.params.get('ignoreerrors', False):
527 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
528 break
529 else:
530 raise
531 else:
532 self.report_error('no suitable InfoExtractor: %s' % url)
533
534 def process_ie_result(self, ie_result, download=True, extra_info={}):
535 """
536 Take the result of the ie(may be modified) and resolve all unresolved
537 references (URLs, playlist items).
538
539 It will also download the videos if 'download'.
540 Returns the resolved ie_result.
541 """
542
543 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
544 if result_type == 'video':
545 self.add_extra_info(ie_result, extra_info)
546 return self.process_video_result(ie_result, download=download)
547 elif result_type == 'url':
548 # We have to add extra_info to the results because it may be
549 # contained in a playlist
550 return self.extract_info(ie_result['url'],
551 download,
552 ie_key=ie_result.get('ie_key'),
553 extra_info=extra_info)
554 elif result_type == 'url_transparent':
555 # Use the information from the embedding page
556 info = self.extract_info(
557 ie_result['url'], ie_key=ie_result.get('ie_key'),
558 extra_info=extra_info, download=False, process=False)
559
560 def make_result(embedded_info):
561 new_result = ie_result.copy()
562 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
563 'entries', 'ie_key', 'duration',
564 'subtitles', 'annotations', 'format',
565 'thumbnail', 'thumbnails'):
566 if f in new_result:
567 del new_result[f]
568 if f in embedded_info:
569 new_result[f] = embedded_info[f]
570 return new_result
571 new_result = make_result(info)
572
573 assert new_result.get('_type') != 'url_transparent'
574 if new_result.get('_type') == 'compat_list':
575 new_result['entries'] = [
576 make_result(e) for e in new_result['entries']]
577
578 return self.process_ie_result(
579 new_result, download=download, extra_info=extra_info)
580 elif result_type == 'playlist':
581 # We process each entry in the playlist
582 playlist = ie_result.get('title', None) or ie_result.get('id', None)
583 self.to_screen('[download] Downloading playlist: %s' % playlist)
584
585 playlist_results = []
586
587 playliststart = self.params.get('playliststart', 1) - 1
588 playlistend = self.params.get('playlistend', None)
589 # For backwards compatibility, interpret -1 as whole list
590 if playlistend == -1:
591 playlistend = None
592
593 if isinstance(ie_result['entries'], list):
594 n_all_entries = len(ie_result['entries'])
595 entries = ie_result['entries'][playliststart:playlistend]
596 n_entries = len(entries)
597 self.to_screen(
598 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
599 (ie_result['extractor'], playlist, n_all_entries, n_entries))
600 else:
601 assert isinstance(ie_result['entries'], PagedList)
602 entries = ie_result['entries'].getslice(
603 playliststart, playlistend)
604 n_entries = len(entries)
605 self.to_screen(
606 "[%s] playlist %s: Downloading %d videos" %
607 (ie_result['extractor'], playlist, n_entries))
608
609 for i, entry in enumerate(entries, 1):
610 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
611 extra = {
612 'playlist': playlist,
613 'playlist_index': i + playliststart,
614 'extractor': ie_result['extractor'],
615 'webpage_url': ie_result['webpage_url'],
616 'webpage_url_basename': url_basename(ie_result['webpage_url']),
617 'extractor_key': ie_result['extractor_key'],
618 }
619
620 reason = self._match_entry(entry)
621 if reason is not None:
622 self.to_screen('[download] ' + reason)
623 continue
624
625 entry_result = self.process_ie_result(entry,
626 download=download,
627 extra_info=extra)
628 playlist_results.append(entry_result)
629 ie_result['entries'] = playlist_results
630 return ie_result
631 elif result_type == 'compat_list':
632 def _fixup(r):
633 self.add_extra_info(r,
634 {
635 'extractor': ie_result['extractor'],
636 'webpage_url': ie_result['webpage_url'],
637 'webpage_url_basename': url_basename(ie_result['webpage_url']),
638 'extractor_key': ie_result['extractor_key'],
639 })
640 return r
641 ie_result['entries'] = [
642 self.process_ie_result(_fixup(r), download, extra_info)
643 for r in ie_result['entries']
644 ]
645 return ie_result
646 else:
647 raise Exception('Invalid result type: %s' % result_type)
648
649 def select_format(self, format_spec, available_formats):
650 if format_spec == 'best' or format_spec is None:
651 return available_formats[-1]
652 elif format_spec == 'worst':
653 return available_formats[0]
654 elif format_spec == 'bestaudio':
655 audio_formats = [
656 f for f in available_formats
657 if f.get('vcodec') == 'none']
658 if audio_formats:
659 return audio_formats[-1]
660 elif format_spec == 'worstaudio':
661 audio_formats = [
662 f for f in available_formats
663 if f.get('vcodec') == 'none']
664 if audio_formats:
665 return audio_formats[0]
666 else:
667 extensions = ['mp4', 'flv', 'webm', '3gp']
668 if format_spec in extensions:
669 filter_f = lambda f: f['ext'] == format_spec
670 else:
671 filter_f = lambda f: f['format_id'] == format_spec
672 matches = list(filter(filter_f, available_formats))
673 if matches:
674 return matches[-1]
675 return None
676
677 def process_video_result(self, info_dict, download=True):
678 assert info_dict.get('_type', 'video') == 'video'
679
680 if 'playlist' not in info_dict:
681 # It isn't part of a playlist
682 info_dict['playlist'] = None
683 info_dict['playlist_index'] = None
684
685 if 'display_id' not in info_dict and 'id' in info_dict:
686 info_dict['display_id'] = info_dict['id']
687
688 # This extractors handle format selection themselves
689 if info_dict['extractor'] in ['Youku']:
690 if download:
691 self.process_info(info_dict)
692 return info_dict
693
694 # We now pick which formats have to be downloaded
695 if info_dict.get('formats') is None:
696 # There's only one format available
697 formats = [info_dict]
698 else:
699 formats = info_dict['formats']
700
701 # We check that all the formats have the format and format_id fields
702 for (i, format) in enumerate(formats):
703 if format.get('format_id') is None:
704 format['format_id'] = compat_str(i)
705 if format.get('format') is None:
706 format['format'] = '{id} - {res}{note}'.format(
707 id=format['format_id'],
708 res=self.format_resolution(format),
709 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
710 )
711 # Automatically determine file extension if missing
712 if 'ext' not in format:
713 format['ext'] = determine_ext(format['url'])
714
715 format_limit = self.params.get('format_limit', None)
716 if format_limit:
717 formats = list(takewhile_inclusive(
718 lambda f: f['format_id'] != format_limit, formats
719 ))
720
721 # TODO Central sorting goes here
722
723 if formats[0] is not info_dict:
724 # only set the 'formats' fields if the original info_dict list them
725 # otherwise we end up with a circular reference, the first (and unique)
726 # element in the 'formats' field in info_dict is info_dict itself,
727 # wich can't be exported to json
728 info_dict['formats'] = formats
729 if self.params.get('listformats', None):
730 self.list_formats(info_dict)
731 return
732
733 req_format = self.params.get('format')
734 if req_format is None:
735 req_format = 'best'
736 formats_to_download = []
737 # The -1 is for supporting YoutubeIE
738 if req_format in ('-1', 'all'):
739 formats_to_download = formats
740 else:
741 # We can accept formats requested in the format: 34/5/best, we pick
742 # the first that is available, starting from left
743 req_formats = req_format.split('/')
744 for rf in req_formats:
745 if re.match(r'.+?\+.+?', rf) is not None:
746 # Two formats have been requested like '137+139'
747 format_1, format_2 = rf.split('+')
748 formats_info = (self.select_format(format_1, formats),
749 self.select_format(format_2, formats))
750 if all(formats_info):
751 selected_format = {
752 'requested_formats': formats_info,
753 'format': rf,
754 'ext': formats_info[0]['ext'],
755 }
756 else:
757 selected_format = None
758 else:
759 selected_format = self.select_format(rf, formats)
760 if selected_format is not None:
761 formats_to_download = [selected_format]
762 break
763 if not formats_to_download:
764 raise ExtractorError('requested format not available',
765 expected=True)
766
767 if download:
768 if len(formats_to_download) > 1:
769 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
770 for format in formats_to_download:
771 new_info = dict(info_dict)
772 new_info.update(format)
773 self.process_info(new_info)
774 # We update the info dict with the best quality format (backwards compatibility)
775 info_dict.update(formats_to_download[-1])
776 return info_dict
777
778 def process_info(self, info_dict):
779 """Process a single resolved IE result."""
780
781 assert info_dict.get('_type', 'video') == 'video'
782
783 max_downloads = self.params.get('max_downloads')
784 if max_downloads is not None:
785 if self._num_downloads >= int(max_downloads):
786 raise MaxDownloadsReached()
787
788 info_dict['fulltitle'] = info_dict['title']
789 if len(info_dict['title']) > 200:
790 info_dict['title'] = info_dict['title'][:197] + '...'
791
792 # Keep for backwards compatibility
793 info_dict['stitle'] = info_dict['title']
794
795 if not 'format' in info_dict:
796 info_dict['format'] = info_dict['ext']
797
798 reason = self._match_entry(info_dict)
799 if reason is not None:
800 self.to_screen('[download] ' + reason)
801 return
802
803 self._num_downloads += 1
804
805 filename = self.prepare_filename(info_dict)
806
807 # Forced printings
808 if self.params.get('forcetitle', False):
809 self.to_stdout(info_dict['fulltitle'])
810 if self.params.get('forceid', False):
811 self.to_stdout(info_dict['id'])
812 if self.params.get('forceurl', False):
813 # For RTMP URLs, also include the playpath
814 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
815 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
816 self.to_stdout(info_dict['thumbnail'])
817 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
818 self.to_stdout(info_dict['description'])
819 if self.params.get('forcefilename', False) and filename is not None:
820 self.to_stdout(filename)
821 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
822 self.to_stdout(formatSeconds(info_dict['duration']))
823 if self.params.get('forceformat', False):
824 self.to_stdout(info_dict['format'])
825 if self.params.get('forcejson', False):
826 info_dict['_filename'] = filename
827 self.to_stdout(json.dumps(info_dict))
828
829 # Do nothing else if in simulate mode
830 if self.params.get('simulate', False):
831 return
832
833 if filename is None:
834 return
835
836 try:
837 dn = os.path.dirname(encodeFilename(filename))
838 if dn != '' and not os.path.exists(dn):
839 os.makedirs(dn)
840 except (OSError, IOError) as err:
841 self.report_error('unable to create directory ' + compat_str(err))
842 return
843
844 if self.params.get('writedescription', False):
845 descfn = filename + '.description'
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
847 self.to_screen('[info] Video description is already present')
848 else:
849 try:
850 self.to_screen('[info] Writing video description to: ' + descfn)
851 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
852 descfile.write(info_dict['description'])
853 except (KeyError, TypeError):
854 self.report_warning('There\'s no description to write.')
855 except (OSError, IOError):
856 self.report_error('Cannot write description file ' + descfn)
857 return
858
859 if self.params.get('writeannotations', False):
860 annofn = filename + '.annotations.xml'
861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
862 self.to_screen('[info] Video annotations are already present')
863 else:
864 try:
865 self.to_screen('[info] Writing video annotations to: ' + annofn)
866 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
867 annofile.write(info_dict['annotations'])
868 except (KeyError, TypeError):
869 self.report_warning('There are no annotations to write.')
870 except (OSError, IOError):
871 self.report_error('Cannot write annotations file: ' + annofn)
872 return
873
874 subtitles_are_requested = any([self.params.get('writesubtitles', False),
875 self.params.get('writeautomaticsub')])
876
877 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
878 # subtitles download errors are already managed as troubles in relevant IE
879 # that way it will silently go on when used with unsupporting IE
880 subtitles = info_dict['subtitles']
881 sub_format = self.params.get('subtitlesformat', 'srt')
882 for sub_lang in subtitles.keys():
883 sub = subtitles[sub_lang]
884 if sub is None:
885 continue
886 try:
887 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
888 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
889 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
890 else:
891 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
892 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
893 subfile.write(sub)
894 except (OSError, IOError):
895 self.report_error('Cannot write subtitles file ' + descfn)
896 return
897
898 if self.params.get('writeinfojson', False):
899 infofn = os.path.splitext(filename)[0] + '.info.json'
900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
901 self.to_screen('[info] Video description metadata is already present')
902 else:
903 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
904 try:
905 write_json_file(info_dict, encodeFilename(infofn))
906 except (OSError, IOError):
907 self.report_error('Cannot write metadata to JSON file ' + infofn)
908 return
909
910 if self.params.get('writethumbnail', False):
911 if info_dict.get('thumbnail') is not None:
912 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
913 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
914 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
915 self.to_screen('[%s] %s: Thumbnail is already present' %
916 (info_dict['extractor'], info_dict['id']))
917 else:
918 self.to_screen('[%s] %s: Downloading thumbnail ...' %
919 (info_dict['extractor'], info_dict['id']))
920 try:
921 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
922 with open(thumb_filename, 'wb') as thumbf:
923 shutil.copyfileobj(uf, thumbf)
924 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
925 (info_dict['extractor'], info_dict['id'], thumb_filename))
926 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
927 self.report_warning('Unable to download thumbnail "%s": %s' %
928 (info_dict['thumbnail'], compat_str(err)))
929
930 if not self.params.get('skip_download', False):
931 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
932 success = True
933 else:
934 try:
935 def dl(name, info):
936 fd = get_suitable_downloader(info)(self, self.params)
937 for ph in self._progress_hooks:
938 fd.add_progress_hook(ph)
939 return fd.download(name, info)
940 if info_dict.get('requested_formats') is not None:
941 downloaded = []
942 success = True
943 merger = FFmpegMergerPP(self)
944 if not merger._get_executable():
945 postprocessors = []
946 self.report_warning('You have requested multiple '
947 'formats but ffmpeg or avconv are not installed.'
948 ' The formats won\'t be merged')
949 else:
950 postprocessors = [merger]
951 for f in info_dict['requested_formats']:
952 new_info = dict(info_dict)
953 new_info.update(f)
954 fname = self.prepare_filename(new_info)
955 fname = prepend_extension(fname, 'f%s' % f['format_id'])
956 downloaded.append(fname)
957 partial_success = dl(fname, new_info)
958 success = success and partial_success
959 info_dict['__postprocessors'] = postprocessors
960 info_dict['__files_to_merge'] = downloaded
961 else:
962 # Just a single file
963 success = dl(filename, info_dict)
964 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
965 self.report_error('unable to download video data: %s' % str(err))
966 return
967 except (OSError, IOError) as err:
968 raise UnavailableVideoError(err)
969 except (ContentTooShortError, ) as err:
970 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
971 return
972
973 if success:
974 try:
975 self.post_process(filename, info_dict)
976 except (PostProcessingError) as err:
977 self.report_error('postprocessing: %s' % str(err))
978 return
979
980 self.record_download_archive(info_dict)
981
982 def download(self, url_list):
983 """Download a given list of URLs."""
984 if (len(url_list) > 1 and
985 '%' not in self.params['outtmpl']
986 and self.params.get('max_downloads') != 1):
987 raise SameFileError(self.params['outtmpl'])
988
989 for url in url_list:
990 try:
991 #It also downloads the videos
992 self.extract_info(url)
993 except UnavailableVideoError:
994 self.report_error('unable to download video')
995 except MaxDownloadsReached:
996 self.to_screen('[info] Maximum number of downloaded files reached.')
997 raise
998
999 return self._download_retcode
1000
1001 def download_with_info_file(self, info_filename):
1002 with io.open(info_filename, 'r', encoding='utf-8') as f:
1003 info = json.load(f)
1004 try:
1005 self.process_ie_result(info, download=True)
1006 except DownloadError:
1007 webpage_url = info.get('webpage_url')
1008 if webpage_url is not None:
1009 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1010 return self.download([webpage_url])
1011 else:
1012 raise
1013 return self._download_retcode
1014
1015 def post_process(self, filename, ie_info):
1016 """Run all the postprocessors on the given file."""
1017 info = dict(ie_info)
1018 info['filepath'] = filename
1019 keep_video = None
1020 pps_chain = []
1021 if ie_info.get('__postprocessors') is not None:
1022 pps_chain.extend(ie_info['__postprocessors'])
1023 pps_chain.extend(self._pps)
1024 for pp in pps_chain:
1025 try:
1026 keep_video_wish, new_info = pp.run(info)
1027 if keep_video_wish is not None:
1028 if keep_video_wish:
1029 keep_video = keep_video_wish
1030 elif keep_video is None:
1031 # No clear decision yet, let IE decide
1032 keep_video = keep_video_wish
1033 except PostProcessingError as e:
1034 self.report_error(e.msg)
1035 if keep_video is False and not self.params.get('keepvideo', False):
1036 try:
1037 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1038 os.remove(encodeFilename(filename))
1039 except (IOError, OSError):
1040 self.report_warning('Unable to remove downloaded video file')
1041
1042 def _make_archive_id(self, info_dict):
1043 # Future-proof against any change in case
1044 # and backwards compatibility with prior versions
1045 extractor = info_dict.get('extractor_key')
1046 if extractor is None:
1047 if 'id' in info_dict:
1048 extractor = info_dict.get('ie_key') # key in a playlist
1049 if extractor is None:
1050 return None # Incomplete video information
1051 return extractor.lower() + ' ' + info_dict['id']
1052
1053 def in_download_archive(self, info_dict):
1054 fn = self.params.get('download_archive')
1055 if fn is None:
1056 return False
1057
1058 vid_id = self._make_archive_id(info_dict)
1059 if vid_id is None:
1060 return False # Incomplete video information
1061
1062 try:
1063 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1064 for line in archive_file:
1065 if line.strip() == vid_id:
1066 return True
1067 except IOError as ioe:
1068 if ioe.errno != errno.ENOENT:
1069 raise
1070 return False
1071
1072 def record_download_archive(self, info_dict):
1073 fn = self.params.get('download_archive')
1074 if fn is None:
1075 return
1076 vid_id = self._make_archive_id(info_dict)
1077 assert vid_id
1078 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1079 archive_file.write(vid_id + '\n')
1080
1081 @staticmethod
1082 def format_resolution(format, default='unknown'):
1083 if format.get('vcodec') == 'none':
1084 return 'audio only'
1085 if format.get('resolution') is not None:
1086 return format['resolution']
1087 if format.get('height') is not None:
1088 if format.get('width') is not None:
1089 res = '%sx%s' % (format['width'], format['height'])
1090 else:
1091 res = '%sp' % format['height']
1092 elif format.get('width') is not None:
1093 res = '?x%d' % format['width']
1094 else:
1095 res = default
1096 return res
1097
1098 def list_formats(self, info_dict):
1099 def format_note(fdict):
1100 res = ''
1101 if fdict.get('ext') in ['f4f', 'f4m']:
1102 res += '(unsupported) '
1103 if fdict.get('format_note') is not None:
1104 res += fdict['format_note'] + ' '
1105 if fdict.get('tbr') is not None:
1106 res += '%4dk ' % fdict['tbr']
1107 if fdict.get('container') is not None:
1108 if res:
1109 res += ', '
1110 res += '%s container' % fdict['container']
1111 if (fdict.get('vcodec') is not None and
1112 fdict.get('vcodec') != 'none'):
1113 if res:
1114 res += ', '
1115 res += fdict['vcodec']
1116 if fdict.get('vbr') is not None:
1117 res += '@'
1118 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1119 res += 'video@'
1120 if fdict.get('vbr') is not None:
1121 res += '%4dk' % fdict['vbr']
1122 if fdict.get('acodec') is not None:
1123 if res:
1124 res += ', '
1125 if fdict['acodec'] == 'none':
1126 res += 'video only'
1127 else:
1128 res += '%-5s' % fdict['acodec']
1129 elif fdict.get('abr') is not None:
1130 if res:
1131 res += ', '
1132 res += 'audio'
1133 if fdict.get('abr') is not None:
1134 res += '@%3dk' % fdict['abr']
1135 if fdict.get('asr') is not None:
1136 res += ' (%5dHz)' % fdict['asr']
1137 if fdict.get('filesize') is not None:
1138 if res:
1139 res += ', '
1140 res += format_bytes(fdict['filesize'])
1141 return res
1142
1143 def line(format, idlen=20):
1144 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1145 format['format_id'],
1146 format['ext'],
1147 self.format_resolution(format),
1148 format_note(format),
1149 ))
1150
1151 formats = info_dict.get('formats', [info_dict])
1152 idlen = max(len('format code'),
1153 max(len(f['format_id']) for f in formats))
1154 formats_s = [line(f, idlen) for f in formats]
1155 if len(formats) > 1:
1156 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1157 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1158
1159 header_line = line({
1160 'format_id': 'format code', 'ext': 'extension',
1161 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1162 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1163 (info_dict['id'], header_line, '\n'.join(formats_s)))
1164
1165 def urlopen(self, req):
1166 """ Start an HTTP download """
1167 return self._opener.open(req)
1168
1169 def print_debug_header(self):
1170 if not self.params.get('verbose'):
1171 return
1172 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1173 try:
1174 sp = subprocess.Popen(
1175 ['git', 'rev-parse', '--short', 'HEAD'],
1176 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1177 cwd=os.path.dirname(os.path.abspath(__file__)))
1178 out, err = sp.communicate()
1179 out = out.decode().strip()
1180 if re.match('[0-9a-f]+', out):
1181 write_string('[debug] Git HEAD: ' + out + '\n')
1182 except:
1183 try:
1184 sys.exc_clear()
1185 except:
1186 pass
1187 write_string('[debug] Python version %s - %s' %
1188 (platform.python_version(), platform_name()) + '\n')
1189
1190 proxy_map = {}
1191 for handler in self._opener.handlers:
1192 if hasattr(handler, 'proxies'):
1193 proxy_map.update(handler.proxies)
1194 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1195
1196 def _setup_opener(self):
1197 timeout_val = self.params.get('socket_timeout')
1198 timeout = 600 if timeout_val is None else float(timeout_val)
1199
1200 opts_cookiefile = self.params.get('cookiefile')
1201 opts_proxy = self.params.get('proxy')
1202
1203 if opts_cookiefile is None:
1204 self.cookiejar = compat_cookiejar.CookieJar()
1205 else:
1206 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1207 opts_cookiefile)
1208 if os.access(opts_cookiefile, os.R_OK):
1209 self.cookiejar.load()
1210
1211 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1212 self.cookiejar)
1213 if opts_proxy is not None:
1214 if opts_proxy == '':
1215 proxies = {}
1216 else:
1217 proxies = {'http': opts_proxy, 'https': opts_proxy}
1218 else:
1219 proxies = compat_urllib_request.getproxies()
1220 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1221 if 'http' in proxies and 'https' not in proxies:
1222 proxies['https'] = proxies['http']
1223 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1224
1225 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1226 https_handler = make_HTTPS_handler(
1227 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1228 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1229 opener = compat_urllib_request.build_opener(
1230 https_handler, proxy_handler, cookie_processor, ydlh)
1231 # Delete the default user-agent header, which would otherwise apply in
1232 # cases where our custom HTTP handler doesn't come into play
1233 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1234 opener.addheaders = []
1235 self._opener = opener
1236
1237 # TODO remove this global modification
1238 compat_urllib_request.install_opener(opener)
1239 socket.setdefaulttimeout(timeout)