]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Set 'NA' as the default value for missing fields in the output template (fixes #1931)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import collections
7 import errno
8 import io
9 import json
10 import os
11 import platform
12 import re
13 import shutil
14 import subprocess
15 import socket
16 import sys
17 import time
18 import traceback
19
20 if os.name == 'nt':
21 import ctypes
22
23 from .utils import (
24 compat_cookiejar,
25 compat_http_client,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 get_term_width,
38 locked_file,
39 make_HTTPS_handler,
40 MaxDownloadsReached,
41 PostProcessingError,
42 platform_name,
43 preferredencoding,
44 SameFileError,
45 sanitize_filename,
46 subtitles_filename,
47 takewhile_inclusive,
48 UnavailableVideoError,
49 write_json_file,
50 write_string,
51 YoutubeDLHandler,
52 )
53 from .extractor import get_info_extractor, gen_extractors
54 from .FileDownloader import FileDownloader
55 from .version import __version__
56
57
58 class YoutubeDL(object):
59 """YoutubeDL class.
60
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
67
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
75
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
82
83 Available options:
84
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
87 videopassword: Password for acces a video.
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
97 forcejson: Force printing info_dict as JSON.
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
109 logger: Log messages to a logging.Logger instance.
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
113 writeannotations: Write the video annotations to a .annotations.xml file
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
116 writeautomaticsub: Write the automatic subtitles to a file
117 allsubtitles: Downloads all the subtitles of the video
118 (requires writesubtitles or writeautomaticsub)
119 listsubtitles: Lists all available subtitles for the video
120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
121 subtitleslangs: List of languages of the subtitles to download
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
125 cachedir: Location of the cache files in the filesystem.
126 None to disable filesystem cache.
127 noplaylist: Download single video instead of a playlist if in doubt.
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
130 download_archive: File name of a file where all downloads are recorded.
131 Videos already present in the file are not downloaded
132 again.
133 cookiefile: File name where cookies should be read from and dumped to.
134 nocheckcertificate:Do not verify SSL certificates
135 proxy: URL of the proxy server to use
136 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 bidi_workaround: Work around buggy terminals without bidirectional text
138 support, using fridibi
139
140 The following parameters are not used by YoutubeDL itself, they are used by
141 the FileDownloader:
142 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
143 noresizebuffer, retries, continuedl, noprogress, consoletitle
144 """
145
146 params = None
147 _ies = []
148 _pps = []
149 _download_retcode = None
150 _num_downloads = None
151 _screen_file = None
152
153 def __init__(self, params=None):
154 """Create a FileDownloader object with the given options."""
155 self._ies = []
156 self._ies_instances = {}
157 self._pps = []
158 self._progress_hooks = []
159 self._download_retcode = 0
160 self._num_downloads = 0
161 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
162 self._err_file = sys.stderr
163 self.params = {} if params is None else params
164
165 if params.get('bidi_workaround', False):
166 try:
167 import pty
168 master, slave = pty.openpty()
169 width = get_term_width()
170 if width is None:
171 width_args = []
172 else:
173 width_args = ['-w', str(width)]
174 self._fribidi = subprocess.Popen(
175 ['fribidi', '-c', 'UTF-8'] + width_args,
176 stdin=subprocess.PIPE,
177 stdout=slave,
178 stderr=self._err_file)
179 self._fribidi_channel = os.fdopen(master, 'rb')
180 except OSError as ose:
181 if ose.errno == 2:
182 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
183 else:
184 raise
185
186 if (sys.version_info >= (3,) and sys.platform != 'win32' and
187 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
188 and not params['restrictfilenames']):
189 # On Python 3, the Unicode filesystem API will throw errors (#1474)
190 self.report_warning(
191 u'Assuming --restrict-filenames since file system encoding '
192 u'cannot encode all charactes. '
193 u'Set the LC_ALL environment variable to fix this.')
194 self.params['restrictfilenames'] = True
195
196 self.fd = FileDownloader(self, self.params)
197
198 if '%(stitle)s' in self.params.get('outtmpl', ''):
199 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
200
201 self._setup_opener()
202
203 def add_info_extractor(self, ie):
204 """Add an InfoExtractor object to the end of the list."""
205 self._ies.append(ie)
206 self._ies_instances[ie.ie_key()] = ie
207 ie.set_downloader(self)
208
209 def get_info_extractor(self, ie_key):
210 """
211 Get an instance of an IE with name ie_key, it will try to get one from
212 the _ies list, if there's no instance it will create a new one and add
213 it to the extractor list.
214 """
215 ie = self._ies_instances.get(ie_key)
216 if ie is None:
217 ie = get_info_extractor(ie_key)()
218 self.add_info_extractor(ie)
219 return ie
220
221 def add_default_info_extractors(self):
222 """
223 Add the InfoExtractors returned by gen_extractors to the end of the list
224 """
225 for ie in gen_extractors():
226 self.add_info_extractor(ie)
227
228 def add_post_processor(self, pp):
229 """Add a PostProcessor object to the end of the chain."""
230 self._pps.append(pp)
231 pp.set_downloader(self)
232
233 def _bidi_workaround(self, message):
234 if not hasattr(self, '_fribidi_channel'):
235 return message
236
237 assert type(message) == type(u'')
238 line_count = message.count(u'\n') + 1
239 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
240 self._fribidi.stdin.flush()
241 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
242 for _ in range(line_count))
243 return res[:-len(u'\n')]
244
245 def to_screen(self, message, skip_eol=False):
246 """Print message to stdout if not in quiet mode."""
247 return self.to_stdout(message, skip_eol, check_quiet=True)
248
249 def to_stdout(self, message, skip_eol=False, check_quiet=False):
250 """Print message to stdout if not in quiet mode."""
251 if self.params.get('logger'):
252 self.params['logger'].debug(message)
253 elif not check_quiet or not self.params.get('quiet', False):
254 message = self._bidi_workaround(message)
255 terminator = [u'\n', u''][skip_eol]
256 output = message + terminator
257
258 write_string(output, self._screen_file)
259
260 def to_stderr(self, message):
261 """Print message to stderr."""
262 assert type(message) == type(u'')
263 if self.params.get('logger'):
264 self.params['logger'].error(message)
265 else:
266 message = self._bidi_workaround(message)
267 output = message + u'\n'
268 write_string(output, self._err_file)
269
270 def to_console_title(self, message):
271 if not self.params.get('consoletitle', False):
272 return
273 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
274 # c_wchar_p() might not be necessary if `message` is
275 # already of type unicode()
276 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
277 elif 'TERM' in os.environ:
278 write_string(u'\033]0;%s\007' % message, self._screen_file)
279
280 def save_console_title(self):
281 if not self.params.get('consoletitle', False):
282 return
283 if 'TERM' in os.environ:
284 # Save the title on stack
285 write_string(u'\033[22;0t', self._screen_file)
286
287 def restore_console_title(self):
288 if not self.params.get('consoletitle', False):
289 return
290 if 'TERM' in os.environ:
291 # Restore the title from stack
292 write_string(u'\033[23;0t', self._screen_file)
293
294 def __enter__(self):
295 self.save_console_title()
296 return self
297
298 def __exit__(self, *args):
299 self.restore_console_title()
300
301 if self.params.get('cookiefile') is not None:
302 self.cookiejar.save()
303
304 def trouble(self, message=None, tb=None):
305 """Determine action to take when a download problem appears.
306
307 Depending on if the downloader has been configured to ignore
308 download errors or not, this method may throw an exception or
309 not when errors are found, after printing the message.
310
311 tb, if given, is additional traceback information.
312 """
313 if message is not None:
314 self.to_stderr(message)
315 if self.params.get('verbose'):
316 if tb is None:
317 if sys.exc_info()[0]: # if .trouble has been called from an except block
318 tb = u''
319 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
320 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
321 tb += compat_str(traceback.format_exc())
322 else:
323 tb_data = traceback.format_list(traceback.extract_stack())
324 tb = u''.join(tb_data)
325 self.to_stderr(tb)
326 if not self.params.get('ignoreerrors', False):
327 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
328 exc_info = sys.exc_info()[1].exc_info
329 else:
330 exc_info = sys.exc_info()
331 raise DownloadError(message, exc_info)
332 self._download_retcode = 1
333
334 def report_warning(self, message):
335 '''
336 Print the message to stderr, it will be prefixed with 'WARNING:'
337 If stderr is a tty file the 'WARNING:' will be colored
338 '''
339 if self._err_file.isatty() and os.name != 'nt':
340 _msg_header = u'\033[0;33mWARNING:\033[0m'
341 else:
342 _msg_header = u'WARNING:'
343 warning_message = u'%s %s' % (_msg_header, message)
344 self.to_stderr(warning_message)
345
346 def report_error(self, message, tb=None):
347 '''
348 Do the same as trouble, but prefixes the message with 'ERROR:', colored
349 in red if stderr is a tty file.
350 '''
351 if self._err_file.isatty() and os.name != 'nt':
352 _msg_header = u'\033[0;31mERROR:\033[0m'
353 else:
354 _msg_header = u'ERROR:'
355 error_message = u'%s %s' % (_msg_header, message)
356 self.trouble(error_message, tb)
357
358 def report_writedescription(self, descfn):
359 """ Report that the description file is being written """
360 self.to_screen(u'[info] Writing video description to: ' + descfn)
361
362 def report_writesubtitles(self, sub_filename):
363 """ Report that the subtitles file is being written """
364 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
365
366 def report_writeinfojson(self, infofn):
367 """ Report that the metadata file has been written """
368 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
369
370 def report_writeannotations(self, annofn):
371 """ Report that the annotations file has been written. """
372 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
373
374 def report_file_already_downloaded(self, file_name):
375 """Report file has already been fully downloaded."""
376 try:
377 self.to_screen(u'[download] %s has already been downloaded' % file_name)
378 except UnicodeEncodeError:
379 self.to_screen(u'[download] The file has already been downloaded')
380
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
384
385 def prepare_filename(self, info_dict):
386 """Generate the output filename."""
387 try:
388 template_dict = dict(info_dict)
389
390 template_dict['epoch'] = int(time.time())
391 autonumber_size = self.params.get('autonumber_size')
392 if autonumber_size is None:
393 autonumber_size = 5
394 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
395 template_dict['autonumber'] = autonumber_templ % self._num_downloads
396 if template_dict.get('playlist_index') is not None:
397 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
398
399 sanitize = lambda k, v: sanitize_filename(
400 u'NA' if v is None else compat_str(v),
401 restricted=self.params.get('restrictfilenames'),
402 is_id=(k == u'id'))
403 template_dict = dict((k, sanitize(k, v))
404 for k, v in template_dict.items())
405 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
406
407 tmpl = os.path.expanduser(self.params['outtmpl'])
408 filename = tmpl % template_dict
409 return filename
410 except ValueError as err:
411 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
412 return None
413
414 def _match_entry(self, info_dict):
415 """ Returns None iff the file should be downloaded """
416
417 if 'title' in info_dict:
418 # This can happen when we're just evaluating the playlist
419 title = info_dict['title']
420 matchtitle = self.params.get('matchtitle', False)
421 if matchtitle:
422 if not re.search(matchtitle, title, re.IGNORECASE):
423 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
424 rejecttitle = self.params.get('rejecttitle', False)
425 if rejecttitle:
426 if re.search(rejecttitle, title, re.IGNORECASE):
427 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
428 date = info_dict.get('upload_date', None)
429 if date is not None:
430 dateRange = self.params.get('daterange', DateRange())
431 if date not in dateRange:
432 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
433 age_limit = self.params.get('age_limit')
434 if age_limit is not None:
435 if age_limit < info_dict.get('age_limit', 0):
436 return u'Skipping "' + title + '" because it is age restricted'
437 if self.in_download_archive(info_dict):
438 return (u'%s has already been recorded in archive'
439 % info_dict.get('title', info_dict.get('id', u'video')))
440 return None
441
442 @staticmethod
443 def add_extra_info(info_dict, extra_info):
444 '''Set the keys from extra_info in info dict if they are missing'''
445 for key, value in extra_info.items():
446 info_dict.setdefault(key, value)
447
448 def extract_info(self, url, download=True, ie_key=None, extra_info={},
449 process=True):
450 '''
451 Returns a list with a dictionary for each video we find.
452 If 'download', also downloads the videos.
453 extra_info is a dict containing the extra values to add to each result
454 '''
455
456 if ie_key:
457 ies = [self.get_info_extractor(ie_key)]
458 else:
459 ies = self._ies
460
461 for ie in ies:
462 if not ie.suitable(url):
463 continue
464
465 if not ie.working():
466 self.report_warning(u'The program functionality for this site has been marked as broken, '
467 u'and will probably not work.')
468
469 try:
470 ie_result = ie.extract(url)
471 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
472 break
473 if isinstance(ie_result, list):
474 # Backwards compatibility: old IE result format
475 ie_result = {
476 '_type': 'compat_list',
477 'entries': ie_result,
478 }
479 self.add_extra_info(ie_result,
480 {
481 'extractor': ie.IE_NAME,
482 'webpage_url': url,
483 'extractor_key': ie.ie_key(),
484 })
485 if process:
486 return self.process_ie_result(ie_result, download, extra_info)
487 else:
488 return ie_result
489 except ExtractorError as de: # An error we somewhat expected
490 self.report_error(compat_str(de), de.format_traceback())
491 break
492 except Exception as e:
493 if self.params.get('ignoreerrors', False):
494 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
495 break
496 else:
497 raise
498 else:
499 self.report_error(u'no suitable InfoExtractor: %s' % url)
500
501 def process_ie_result(self, ie_result, download=True, extra_info={}):
502 """
503 Take the result of the ie(may be modified) and resolve all unresolved
504 references (URLs, playlist items).
505
506 It will also download the videos if 'download'.
507 Returns the resolved ie_result.
508 """
509
510 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
511 if result_type == 'video':
512 self.add_extra_info(ie_result, extra_info)
513 return self.process_video_result(ie_result, download=download)
514 elif result_type == 'url':
515 # We have to add extra_info to the results because it may be
516 # contained in a playlist
517 return self.extract_info(ie_result['url'],
518 download,
519 ie_key=ie_result.get('ie_key'),
520 extra_info=extra_info)
521 elif result_type == 'url_transparent':
522 # Use the information from the embedding page
523 info = self.extract_info(
524 ie_result['url'], ie_key=ie_result.get('ie_key'),
525 extra_info=extra_info, download=False, process=False)
526
527 def make_result(embedded_info):
528 new_result = ie_result.copy()
529 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
530 'entries', 'urlhandle', 'ie_key', 'duration',
531 'subtitles', 'annotations', 'format',
532 'thumbnail', 'thumbnails'):
533 if f in new_result:
534 del new_result[f]
535 if f in embedded_info:
536 new_result[f] = embedded_info[f]
537 return new_result
538 new_result = make_result(info)
539
540 assert new_result.get('_type') != 'url_transparent'
541 if new_result.get('_type') == 'compat_list':
542 new_result['entries'] = [
543 make_result(e) for e in new_result['entries']]
544
545 return self.process_ie_result(
546 new_result, download=download, extra_info=extra_info)
547 elif result_type == 'playlist':
548 # We process each entry in the playlist
549 playlist = ie_result.get('title', None) or ie_result.get('id', None)
550 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
551
552 playlist_results = []
553
554 n_all_entries = len(ie_result['entries'])
555 playliststart = self.params.get('playliststart', 1) - 1
556 playlistend = self.params.get('playlistend', -1)
557
558 if playlistend == -1:
559 entries = ie_result['entries'][playliststart:]
560 else:
561 entries = ie_result['entries'][playliststart:playlistend]
562
563 n_entries = len(entries)
564
565 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
566 (ie_result['extractor'], playlist, n_all_entries, n_entries))
567
568 for i, entry in enumerate(entries, 1):
569 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
570 extra = {
571 'playlist': playlist,
572 'playlist_index': i + playliststart,
573 'extractor': ie_result['extractor'],
574 'webpage_url': ie_result['webpage_url'],
575 'extractor_key': ie_result['extractor_key'],
576 }
577
578 reason = self._match_entry(entry)
579 if reason is not None:
580 self.to_screen(u'[download] ' + reason)
581 continue
582
583 entry_result = self.process_ie_result(entry,
584 download=download,
585 extra_info=extra)
586 playlist_results.append(entry_result)
587 ie_result['entries'] = playlist_results
588 return ie_result
589 elif result_type == 'compat_list':
590 def _fixup(r):
591 self.add_extra_info(r,
592 {
593 'extractor': ie_result['extractor'],
594 'webpage_url': ie_result['webpage_url'],
595 'extractor_key': ie_result['extractor_key'],
596 })
597 return r
598 ie_result['entries'] = [
599 self.process_ie_result(_fixup(r), download, extra_info)
600 for r in ie_result['entries']
601 ]
602 return ie_result
603 else:
604 raise Exception('Invalid result type: %s' % result_type)
605
606 def select_format(self, format_spec, available_formats):
607 if format_spec == 'best' or format_spec is None:
608 return available_formats[-1]
609 elif format_spec == 'worst':
610 return available_formats[0]
611 else:
612 extensions = [u'mp4', u'flv', u'webm', u'3gp']
613 if format_spec in extensions:
614 filter_f = lambda f: f['ext'] == format_spec
615 else:
616 filter_f = lambda f: f['format_id'] == format_spec
617 matches = list(filter(filter_f, available_formats))
618 if matches:
619 return matches[-1]
620 return None
621
622 def process_video_result(self, info_dict, download=True):
623 assert info_dict.get('_type', 'video') == 'video'
624
625 if 'playlist' not in info_dict:
626 # It isn't part of a playlist
627 info_dict['playlist'] = None
628 info_dict['playlist_index'] = None
629
630 # This extractors handle format selection themselves
631 if info_dict['extractor'] in [u'youtube', u'Youku']:
632 if download:
633 self.process_info(info_dict)
634 return info_dict
635
636 # We now pick which formats have to be downloaded
637 if info_dict.get('formats') is None:
638 # There's only one format available
639 formats = [info_dict]
640 else:
641 formats = info_dict['formats']
642
643 # We check that all the formats have the format and format_id fields
644 for (i, format) in enumerate(formats):
645 if format.get('format_id') is None:
646 format['format_id'] = compat_str(i)
647 if format.get('format') is None:
648 format['format'] = u'{id} - {res}{note}'.format(
649 id=format['format_id'],
650 res=self.format_resolution(format),
651 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
652 )
653 # Automatically determine file extension if missing
654 if 'ext' not in format:
655 format['ext'] = determine_ext(format['url'])
656
657 if self.params.get('listformats', None):
658 self.list_formats(info_dict)
659 return
660
661 format_limit = self.params.get('format_limit', None)
662 if format_limit:
663 formats = list(takewhile_inclusive(
664 lambda f: f['format_id'] != format_limit, formats
665 ))
666 if self.params.get('prefer_free_formats'):
667 def _free_formats_key(f):
668 try:
669 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
670 except ValueError:
671 ext_ord = -1
672 # We only compare the extension if they have the same height and width
673 return (f.get('height'), f.get('width'), ext_ord)
674 formats = sorted(formats, key=_free_formats_key)
675
676 req_format = self.params.get('format', 'best')
677 if req_format is None:
678 req_format = 'best'
679 formats_to_download = []
680 # The -1 is for supporting YoutubeIE
681 if req_format in ('-1', 'all'):
682 formats_to_download = formats
683 else:
684 # We can accept formats requestd in the format: 34/5/best, we pick
685 # the first that is available, starting from left
686 req_formats = req_format.split('/')
687 for rf in req_formats:
688 selected_format = self.select_format(rf, formats)
689 if selected_format is not None:
690 formats_to_download = [selected_format]
691 break
692 if not formats_to_download:
693 raise ExtractorError(u'requested format not available',
694 expected=True)
695
696 if download:
697 if len(formats_to_download) > 1:
698 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
699 for format in formats_to_download:
700 new_info = dict(info_dict)
701 new_info.update(format)
702 self.process_info(new_info)
703 # We update the info dict with the best quality format (backwards compatibility)
704 info_dict.update(formats_to_download[-1])
705 return info_dict
706
707 def process_info(self, info_dict):
708 """Process a single resolved IE result."""
709
710 assert info_dict.get('_type', 'video') == 'video'
711 #We increment the download the download count here to match the previous behaviour.
712 self.increment_downloads()
713
714 info_dict['fulltitle'] = info_dict['title']
715 if len(info_dict['title']) > 200:
716 info_dict['title'] = info_dict['title'][:197] + u'...'
717
718 # Keep for backwards compatibility
719 info_dict['stitle'] = info_dict['title']
720
721 if not 'format' in info_dict:
722 info_dict['format'] = info_dict['ext']
723
724 reason = self._match_entry(info_dict)
725 if reason is not None:
726 self.to_screen(u'[download] ' + reason)
727 return
728
729 max_downloads = self.params.get('max_downloads')
730 if max_downloads is not None:
731 if self._num_downloads > int(max_downloads):
732 raise MaxDownloadsReached()
733
734 filename = self.prepare_filename(info_dict)
735
736 # Forced printings
737 if self.params.get('forcetitle', False):
738 self.to_stdout(info_dict['fulltitle'])
739 if self.params.get('forceid', False):
740 self.to_stdout(info_dict['id'])
741 if self.params.get('forceurl', False):
742 # For RTMP URLs, also include the playpath
743 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
744 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
745 self.to_stdout(info_dict['thumbnail'])
746 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
747 self.to_stdout(info_dict['description'])
748 if self.params.get('forcefilename', False) and filename is not None:
749 self.to_stdout(filename)
750 if self.params.get('forceformat', False):
751 self.to_stdout(info_dict['format'])
752 if self.params.get('forcejson', False):
753 info_dict['_filename'] = filename
754 self.to_stdout(json.dumps(info_dict))
755
756 # Do nothing else if in simulate mode
757 if self.params.get('simulate', False):
758 return
759
760 if filename is None:
761 return
762
763 try:
764 dn = os.path.dirname(encodeFilename(filename))
765 if dn != '' and not os.path.exists(dn):
766 os.makedirs(dn)
767 except (OSError, IOError) as err:
768 self.report_error(u'unable to create directory ' + compat_str(err))
769 return
770
771 if self.params.get('writedescription', False):
772 try:
773 descfn = filename + u'.description'
774 self.report_writedescription(descfn)
775 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
776 descfile.write(info_dict['description'])
777 except (KeyError, TypeError):
778 self.report_warning(u'There\'s no description to write.')
779 except (OSError, IOError):
780 self.report_error(u'Cannot write description file ' + descfn)
781 return
782
783 if self.params.get('writeannotations', False):
784 try:
785 annofn = filename + u'.annotations.xml'
786 self.report_writeannotations(annofn)
787 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
788 annofile.write(info_dict['annotations'])
789 except (KeyError, TypeError):
790 self.report_warning(u'There are no annotations to write.')
791 except (OSError, IOError):
792 self.report_error(u'Cannot write annotations file: ' + annofn)
793 return
794
795 subtitles_are_requested = any([self.params.get('writesubtitles', False),
796 self.params.get('writeautomaticsub')])
797
798 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
799 # subtitles download errors are already managed as troubles in relevant IE
800 # that way it will silently go on when used with unsupporting IE
801 subtitles = info_dict['subtitles']
802 sub_format = self.params.get('subtitlesformat', 'srt')
803 for sub_lang in subtitles.keys():
804 sub = subtitles[sub_lang]
805 if sub is None:
806 continue
807 try:
808 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
809 self.report_writesubtitles(sub_filename)
810 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
811 subfile.write(sub)
812 except (OSError, IOError):
813 self.report_error(u'Cannot write subtitles file ' + descfn)
814 return
815
816 if self.params.get('writeinfojson', False):
817 infofn = os.path.splitext(filename)[0] + u'.info.json'
818 self.report_writeinfojson(infofn)
819 try:
820 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
821 write_json_file(json_info_dict, encodeFilename(infofn))
822 except (OSError, IOError):
823 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
824 return
825
826 if self.params.get('writethumbnail', False):
827 if info_dict.get('thumbnail') is not None:
828 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
829 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
830 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
831 (info_dict['extractor'], info_dict['id']))
832 try:
833 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
834 with open(thumb_filename, 'wb') as thumbf:
835 shutil.copyfileobj(uf, thumbf)
836 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
837 (info_dict['extractor'], info_dict['id'], thumb_filename))
838 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
839 self.report_warning(u'Unable to download thumbnail "%s": %s' %
840 (info_dict['thumbnail'], compat_str(err)))
841
842 if not self.params.get('skip_download', False):
843 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
844 success = True
845 else:
846 try:
847 success = self.fd._do_download(filename, info_dict)
848 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
849 self.report_error(u'unable to download video data: %s' % str(err))
850 return
851 except (OSError, IOError) as err:
852 raise UnavailableVideoError(err)
853 except (ContentTooShortError, ) as err:
854 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
855 return
856
857 if success:
858 try:
859 self.post_process(filename, info_dict)
860 except (PostProcessingError) as err:
861 self.report_error(u'postprocessing: %s' % str(err))
862 return
863
864 self.record_download_archive(info_dict)
865
866 def download(self, url_list):
867 """Download a given list of URLs."""
868 if (len(url_list) > 1 and
869 '%' not in self.params['outtmpl']
870 and self.params.get('max_downloads') != 1):
871 raise SameFileError(self.params['outtmpl'])
872
873 for url in url_list:
874 try:
875 #It also downloads the videos
876 self.extract_info(url)
877 except UnavailableVideoError:
878 self.report_error(u'unable to download video')
879 except MaxDownloadsReached:
880 self.to_screen(u'[info] Maximum number of downloaded files reached.')
881 raise
882
883 return self._download_retcode
884
885 def download_with_info_file(self, info_filename):
886 with io.open(info_filename, 'r', encoding='utf-8') as f:
887 info = json.load(f)
888 try:
889 self.process_ie_result(info, download=True)
890 except DownloadError:
891 webpage_url = info.get('webpage_url')
892 if webpage_url is not None:
893 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
894 return self.download([webpage_url])
895 else:
896 raise
897 return self._download_retcode
898
899 def post_process(self, filename, ie_info):
900 """Run all the postprocessors on the given file."""
901 info = dict(ie_info)
902 info['filepath'] = filename
903 keep_video = None
904 for pp in self._pps:
905 try:
906 keep_video_wish, new_info = pp.run(info)
907 if keep_video_wish is not None:
908 if keep_video_wish:
909 keep_video = keep_video_wish
910 elif keep_video is None:
911 # No clear decision yet, let IE decide
912 keep_video = keep_video_wish
913 except PostProcessingError as e:
914 self.report_error(e.msg)
915 if keep_video is False and not self.params.get('keepvideo', False):
916 try:
917 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
918 os.remove(encodeFilename(filename))
919 except (IOError, OSError):
920 self.report_warning(u'Unable to remove downloaded video file')
921
922 def _make_archive_id(self, info_dict):
923 # Future-proof against any change in case
924 # and backwards compatibility with prior versions
925 extractor = info_dict.get('extractor_key')
926 if extractor is None:
927 if 'id' in info_dict:
928 extractor = info_dict.get('ie_key') # key in a playlist
929 if extractor is None:
930 return None # Incomplete video information
931 return extractor.lower() + u' ' + info_dict['id']
932
933 def in_download_archive(self, info_dict):
934 fn = self.params.get('download_archive')
935 if fn is None:
936 return False
937
938 vid_id = self._make_archive_id(info_dict)
939 if vid_id is None:
940 return False # Incomplete video information
941
942 try:
943 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
944 for line in archive_file:
945 if line.strip() == vid_id:
946 return True
947 except IOError as ioe:
948 if ioe.errno != errno.ENOENT:
949 raise
950 return False
951
952 def record_download_archive(self, info_dict):
953 fn = self.params.get('download_archive')
954 if fn is None:
955 return
956 vid_id = self._make_archive_id(info_dict)
957 assert vid_id
958 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
959 archive_file.write(vid_id + u'\n')
960
961 @staticmethod
962 def format_resolution(format, default='unknown'):
963 if format.get('vcodec') == 'none':
964 return 'audio only'
965 if format.get('_resolution') is not None:
966 return format['_resolution']
967 if format.get('height') is not None:
968 if format.get('width') is not None:
969 res = u'%sx%s' % (format['width'], format['height'])
970 else:
971 res = u'%sp' % format['height']
972 else:
973 res = default
974 return res
975
976 def list_formats(self, info_dict):
977 def format_note(fdict):
978 res = u''
979 if fdict.get('format_note') is not None:
980 res += fdict['format_note'] + u' '
981 if (fdict.get('vcodec') is not None and
982 fdict.get('vcodec') != 'none'):
983 res += u'%-5s' % fdict['vcodec']
984 elif fdict.get('vbr') is not None:
985 res += u'video'
986 if fdict.get('vbr') is not None:
987 res += u'@%4dk' % fdict['vbr']
988 if fdict.get('acodec') is not None:
989 if res:
990 res += u', '
991 res += u'%-5s' % fdict['acodec']
992 elif fdict.get('abr') is not None:
993 if res:
994 res += u', '
995 res += 'audio'
996 if fdict.get('abr') is not None:
997 res += u'@%3dk' % fdict['abr']
998 if fdict.get('filesize') is not None:
999 if res:
1000 res += u', '
1001 res += format_bytes(fdict['filesize'])
1002 return res
1003
1004 def line(format, idlen=20):
1005 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1006 format['format_id'],
1007 format['ext'],
1008 self.format_resolution(format),
1009 format_note(format),
1010 ))
1011
1012 formats = info_dict.get('formats', [info_dict])
1013 idlen = max(len(u'format code'),
1014 max(len(f['format_id']) for f in formats))
1015 formats_s = [line(f, idlen) for f in formats]
1016 if len(formats) > 1:
1017 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1018 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1019
1020 header_line = line({
1021 'format_id': u'format code', 'ext': u'extension',
1022 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1023 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1024 (info_dict['id'], header_line, u"\n".join(formats_s)))
1025
1026 def urlopen(self, req):
1027 """ Start an HTTP download """
1028 return self._opener.open(req)
1029
1030 def print_debug_header(self):
1031 if not self.params.get('verbose'):
1032 return
1033 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1034 try:
1035 sp = subprocess.Popen(
1036 ['git', 'rev-parse', '--short', 'HEAD'],
1037 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1038 cwd=os.path.dirname(os.path.abspath(__file__)))
1039 out, err = sp.communicate()
1040 out = out.decode().strip()
1041 if re.match('[0-9a-f]+', out):
1042 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1043 except:
1044 try:
1045 sys.exc_clear()
1046 except:
1047 pass
1048 write_string(u'[debug] Python version %s - %s' %
1049 (platform.python_version(), platform_name()) + u'\n')
1050
1051 proxy_map = {}
1052 for handler in self._opener.handlers:
1053 if hasattr(handler, 'proxies'):
1054 proxy_map.update(handler.proxies)
1055 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1056
1057 def _setup_opener(self):
1058 timeout_val = self.params.get('socket_timeout')
1059 timeout = 600 if timeout_val is None else float(timeout_val)
1060
1061 opts_cookiefile = self.params.get('cookiefile')
1062 opts_proxy = self.params.get('proxy')
1063
1064 if opts_cookiefile is None:
1065 self.cookiejar = compat_cookiejar.CookieJar()
1066 else:
1067 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1068 opts_cookiefile)
1069 if os.access(opts_cookiefile, os.R_OK):
1070 self.cookiejar.load()
1071
1072 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1073 self.cookiejar)
1074 if opts_proxy is not None:
1075 if opts_proxy == '':
1076 proxies = {}
1077 else:
1078 proxies = {'http': opts_proxy, 'https': opts_proxy}
1079 else:
1080 proxies = compat_urllib_request.getproxies()
1081 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1082 if 'http' in proxies and 'https' not in proxies:
1083 proxies['https'] = proxies['http']
1084 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1085 https_handler = make_HTTPS_handler(
1086 self.params.get('nocheckcertificate', False))
1087 opener = compat_urllib_request.build_opener(
1088 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1089 # Delete the default user-agent header, which would otherwise apply in
1090 # cases where our custom HTTP handler doesn't come into play
1091 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1092 opener.addheaders = []
1093 self._opener = opener
1094
1095 # TODO remove this global modification
1096 compat_urllib_request.install_opener(opener)
1097 socket.setdefaulttimeout(timeout)