]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Fix thumbnail filename determination (Fixes #1945)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import collections
7 import errno
8 import io
9 import json
10 import os
11 import platform
12 import re
13 import shutil
14 import subprocess
15 import socket
16 import sys
17 import time
18 import traceback
19
20 if os.name == 'nt':
21 import ctypes
22
23 from .utils import (
24 compat_cookiejar,
25 compat_http_client,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 get_term_width,
38 locked_file,
39 make_HTTPS_handler,
40 MaxDownloadsReached,
41 PostProcessingError,
42 platform_name,
43 preferredencoding,
44 SameFileError,
45 sanitize_filename,
46 subtitles_filename,
47 takewhile_inclusive,
48 UnavailableVideoError,
49 write_json_file,
50 write_string,
51 YoutubeDLHandler,
52 )
53 from .extractor import get_info_extractor, gen_extractors
54 from .FileDownloader import FileDownloader
55 from .version import __version__
56
57
58 class YoutubeDL(object):
59 """YoutubeDL class.
60
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
67
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
75
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
82
83 Available options:
84
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
87 videopassword: Password for acces a video.
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
97 forcejson: Force printing info_dict as JSON.
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
109 logger: Log messages to a logging.Logger instance.
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
113 writeannotations: Write the video annotations to a .annotations.xml file
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
116 writeautomaticsub: Write the automatic subtitles to a file
117 allsubtitles: Downloads all the subtitles of the video
118 (requires writesubtitles or writeautomaticsub)
119 listsubtitles: Lists all available subtitles for the video
120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
121 subtitleslangs: List of languages of the subtitles to download
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
125 cachedir: Location of the cache files in the filesystem.
126 None to disable filesystem cache.
127 noplaylist: Download single video instead of a playlist if in doubt.
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
130 download_archive: File name of a file where all downloads are recorded.
131 Videos already present in the file are not downloaded
132 again.
133 cookiefile: File name where cookies should be read from and dumped to.
134 nocheckcertificate:Do not verify SSL certificates
135 proxy: URL of the proxy server to use
136 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 bidi_workaround: Work around buggy terminals without bidirectional text
138 support, using fridibi
139
140 The following parameters are not used by YoutubeDL itself, they are used by
141 the FileDownloader:
142 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
143 noresizebuffer, retries, continuedl, noprogress, consoletitle
144 """
145
146 params = None
147 _ies = []
148 _pps = []
149 _download_retcode = None
150 _num_downloads = None
151 _screen_file = None
152
153 def __init__(self, params=None):
154 """Create a FileDownloader object with the given options."""
155 self._ies = []
156 self._ies_instances = {}
157 self._pps = []
158 self._progress_hooks = []
159 self._download_retcode = 0
160 self._num_downloads = 0
161 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
162 self._err_file = sys.stderr
163 self.params = {} if params is None else params
164
165 if params.get('bidi_workaround', False):
166 try:
167 import pty
168 master, slave = pty.openpty()
169 width = get_term_width()
170 if width is None:
171 width_args = []
172 else:
173 width_args = ['-w', str(width)]
174 self._fribidi = subprocess.Popen(
175 ['fribidi', '-c', 'UTF-8'] + width_args,
176 stdin=subprocess.PIPE,
177 stdout=slave,
178 stderr=self._err_file)
179 self._fribidi_channel = os.fdopen(master, 'rb')
180 except OSError as ose:
181 if ose.errno == 2:
182 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
183 else:
184 raise
185
186 if (sys.version_info >= (3,) and sys.platform != 'win32' and
187 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
188 and not params['restrictfilenames']):
189 # On Python 3, the Unicode filesystem API will throw errors (#1474)
190 self.report_warning(
191 u'Assuming --restrict-filenames since file system encoding '
192 u'cannot encode all charactes. '
193 u'Set the LC_ALL environment variable to fix this.')
194 self.params['restrictfilenames'] = True
195
196 self.fd = FileDownloader(self, self.params)
197
198 if '%(stitle)s' in self.params.get('outtmpl', ''):
199 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
200
201 self._setup_opener()
202
203 def add_info_extractor(self, ie):
204 """Add an InfoExtractor object to the end of the list."""
205 self._ies.append(ie)
206 self._ies_instances[ie.ie_key()] = ie
207 ie.set_downloader(self)
208
209 def get_info_extractor(self, ie_key):
210 """
211 Get an instance of an IE with name ie_key, it will try to get one from
212 the _ies list, if there's no instance it will create a new one and add
213 it to the extractor list.
214 """
215 ie = self._ies_instances.get(ie_key)
216 if ie is None:
217 ie = get_info_extractor(ie_key)()
218 self.add_info_extractor(ie)
219 return ie
220
221 def add_default_info_extractors(self):
222 """
223 Add the InfoExtractors returned by gen_extractors to the end of the list
224 """
225 for ie in gen_extractors():
226 self.add_info_extractor(ie)
227
228 def add_post_processor(self, pp):
229 """Add a PostProcessor object to the end of the chain."""
230 self._pps.append(pp)
231 pp.set_downloader(self)
232
233 def _bidi_workaround(self, message):
234 if not hasattr(self, '_fribidi_channel'):
235 return message
236
237 assert type(message) == type(u'')
238 line_count = message.count(u'\n') + 1
239 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
240 self._fribidi.stdin.flush()
241 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
242 for _ in range(line_count))
243 return res[:-len(u'\n')]
244
245 def to_screen(self, message, skip_eol=False):
246 """Print message to stdout if not in quiet mode."""
247 return self.to_stdout(message, skip_eol, check_quiet=True)
248
249 def to_stdout(self, message, skip_eol=False, check_quiet=False):
250 """Print message to stdout if not in quiet mode."""
251 if self.params.get('logger'):
252 self.params['logger'].debug(message)
253 elif not check_quiet or not self.params.get('quiet', False):
254 message = self._bidi_workaround(message)
255 terminator = [u'\n', u''][skip_eol]
256 output = message + terminator
257
258 write_string(output, self._screen_file)
259
260 def to_stderr(self, message):
261 """Print message to stderr."""
262 assert type(message) == type(u'')
263 if self.params.get('logger'):
264 self.params['logger'].error(message)
265 else:
266 message = self._bidi_workaround(message)
267 output = message + u'\n'
268 write_string(output, self._err_file)
269
270 def to_console_title(self, message):
271 if not self.params.get('consoletitle', False):
272 return
273 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
274 # c_wchar_p() might not be necessary if `message` is
275 # already of type unicode()
276 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
277 elif 'TERM' in os.environ:
278 write_string(u'\033]0;%s\007' % message, self._screen_file)
279
280 def save_console_title(self):
281 if not self.params.get('consoletitle', False):
282 return
283 if 'TERM' in os.environ:
284 # Save the title on stack
285 write_string(u'\033[22;0t', self._screen_file)
286
287 def restore_console_title(self):
288 if not self.params.get('consoletitle', False):
289 return
290 if 'TERM' in os.environ:
291 # Restore the title from stack
292 write_string(u'\033[23;0t', self._screen_file)
293
294 def __enter__(self):
295 self.save_console_title()
296 return self
297
298 def __exit__(self, *args):
299 self.restore_console_title()
300
301 if self.params.get('cookiefile') is not None:
302 self.cookiejar.save()
303
304 def trouble(self, message=None, tb=None):
305 """Determine action to take when a download problem appears.
306
307 Depending on if the downloader has been configured to ignore
308 download errors or not, this method may throw an exception or
309 not when errors are found, after printing the message.
310
311 tb, if given, is additional traceback information.
312 """
313 if message is not None:
314 self.to_stderr(message)
315 if self.params.get('verbose'):
316 if tb is None:
317 if sys.exc_info()[0]: # if .trouble has been called from an except block
318 tb = u''
319 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
320 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
321 tb += compat_str(traceback.format_exc())
322 else:
323 tb_data = traceback.format_list(traceback.extract_stack())
324 tb = u''.join(tb_data)
325 self.to_stderr(tb)
326 if not self.params.get('ignoreerrors', False):
327 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
328 exc_info = sys.exc_info()[1].exc_info
329 else:
330 exc_info = sys.exc_info()
331 raise DownloadError(message, exc_info)
332 self._download_retcode = 1
333
334 def report_warning(self, message):
335 '''
336 Print the message to stderr, it will be prefixed with 'WARNING:'
337 If stderr is a tty file the 'WARNING:' will be colored
338 '''
339 if self._err_file.isatty() and os.name != 'nt':
340 _msg_header = u'\033[0;33mWARNING:\033[0m'
341 else:
342 _msg_header = u'WARNING:'
343 warning_message = u'%s %s' % (_msg_header, message)
344 self.to_stderr(warning_message)
345
346 def report_error(self, message, tb=None):
347 '''
348 Do the same as trouble, but prefixes the message with 'ERROR:', colored
349 in red if stderr is a tty file.
350 '''
351 if self._err_file.isatty() and os.name != 'nt':
352 _msg_header = u'\033[0;31mERROR:\033[0m'
353 else:
354 _msg_header = u'ERROR:'
355 error_message = u'%s %s' % (_msg_header, message)
356 self.trouble(error_message, tb)
357
358 def report_writedescription(self, descfn):
359 """ Report that the description file is being written """
360 self.to_screen(u'[info] Writing video description to: ' + descfn)
361
362 def report_writesubtitles(self, sub_filename):
363 """ Report that the subtitles file is being written """
364 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
365
366 def report_writeinfojson(self, infofn):
367 """ Report that the metadata file has been written """
368 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
369
370 def report_writeannotations(self, annofn):
371 """ Report that the annotations file has been written. """
372 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
373
374 def report_file_already_downloaded(self, file_name):
375 """Report file has already been fully downloaded."""
376 try:
377 self.to_screen(u'[download] %s has already been downloaded' % file_name)
378 except UnicodeEncodeError:
379 self.to_screen(u'[download] The file has already been downloaded')
380
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
384
385 def prepare_filename(self, info_dict):
386 """Generate the output filename."""
387 try:
388 template_dict = dict(info_dict)
389
390 template_dict['epoch'] = int(time.time())
391 autonumber_size = self.params.get('autonumber_size')
392 if autonumber_size is None:
393 autonumber_size = 5
394 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
395 template_dict['autonumber'] = autonumber_templ % self._num_downloads
396 if template_dict.get('playlist_index') is not None:
397 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
398
399 sanitize = lambda k, v: sanitize_filename(
400 compat_str(v),
401 restricted=self.params.get('restrictfilenames'),
402 is_id=(k == u'id'))
403 template_dict = dict((k, sanitize(k, v))
404 for k, v in template_dict.items()
405 if v is not None)
406 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
407
408 tmpl = os.path.expanduser(self.params['outtmpl'])
409 filename = tmpl % template_dict
410 return filename
411 except ValueError as err:
412 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
413 return None
414
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
417
418 if 'title' in info_dict:
419 # This can happen when we're just evaluating the playlist
420 title = info_dict['title']
421 matchtitle = self.params.get('matchtitle', False)
422 if matchtitle:
423 if not re.search(matchtitle, title, re.IGNORECASE):
424 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
425 rejecttitle = self.params.get('rejecttitle', False)
426 if rejecttitle:
427 if re.search(rejecttitle, title, re.IGNORECASE):
428 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
429 date = info_dict.get('upload_date', None)
430 if date is not None:
431 dateRange = self.params.get('daterange', DateRange())
432 if date not in dateRange:
433 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
434 age_limit = self.params.get('age_limit')
435 if age_limit is not None:
436 if age_limit < info_dict.get('age_limit', 0):
437 return u'Skipping "' + title + '" because it is age restricted'
438 if self.in_download_archive(info_dict):
439 return (u'%s has already been recorded in archive'
440 % info_dict.get('title', info_dict.get('id', u'video')))
441 return None
442
443 @staticmethod
444 def add_extra_info(info_dict, extra_info):
445 '''Set the keys from extra_info in info dict if they are missing'''
446 for key, value in extra_info.items():
447 info_dict.setdefault(key, value)
448
449 def extract_info(self, url, download=True, ie_key=None, extra_info={},
450 process=True):
451 '''
452 Returns a list with a dictionary for each video we find.
453 If 'download', also downloads the videos.
454 extra_info is a dict containing the extra values to add to each result
455 '''
456
457 if ie_key:
458 ies = [self.get_info_extractor(ie_key)]
459 else:
460 ies = self._ies
461
462 for ie in ies:
463 if not ie.suitable(url):
464 continue
465
466 if not ie.working():
467 self.report_warning(u'The program functionality for this site has been marked as broken, '
468 u'and will probably not work.')
469
470 try:
471 ie_result = ie.extract(url)
472 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
473 break
474 if isinstance(ie_result, list):
475 # Backwards compatibility: old IE result format
476 ie_result = {
477 '_type': 'compat_list',
478 'entries': ie_result,
479 }
480 self.add_extra_info(ie_result,
481 {
482 'extractor': ie.IE_NAME,
483 'webpage_url': url,
484 'extractor_key': ie.ie_key(),
485 })
486 if process:
487 return self.process_ie_result(ie_result, download, extra_info)
488 else:
489 return ie_result
490 except ExtractorError as de: # An error we somewhat expected
491 self.report_error(compat_str(de), de.format_traceback())
492 break
493 except Exception as e:
494 if self.params.get('ignoreerrors', False):
495 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
496 break
497 else:
498 raise
499 else:
500 self.report_error(u'no suitable InfoExtractor: %s' % url)
501
502 def process_ie_result(self, ie_result, download=True, extra_info={}):
503 """
504 Take the result of the ie(may be modified) and resolve all unresolved
505 references (URLs, playlist items).
506
507 It will also download the videos if 'download'.
508 Returns the resolved ie_result.
509 """
510
511 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
512 if result_type == 'video':
513 self.add_extra_info(ie_result, extra_info)
514 return self.process_video_result(ie_result, download=download)
515 elif result_type == 'url':
516 # We have to add extra_info to the results because it may be
517 # contained in a playlist
518 return self.extract_info(ie_result['url'],
519 download,
520 ie_key=ie_result.get('ie_key'),
521 extra_info=extra_info)
522 elif result_type == 'url_transparent':
523 # Use the information from the embedding page
524 info = self.extract_info(
525 ie_result['url'], ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info, download=False, process=False)
527
528 def make_result(embedded_info):
529 new_result = ie_result.copy()
530 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
531 'entries', 'urlhandle', 'ie_key', 'duration',
532 'subtitles', 'annotations', 'format',
533 'thumbnail', 'thumbnails'):
534 if f in new_result:
535 del new_result[f]
536 if f in embedded_info:
537 new_result[f] = embedded_info[f]
538 return new_result
539 new_result = make_result(info)
540
541 assert new_result.get('_type') != 'url_transparent'
542 if new_result.get('_type') == 'compat_list':
543 new_result['entries'] = [
544 make_result(e) for e in new_result['entries']]
545
546 return self.process_ie_result(
547 new_result, download=download, extra_info=extra_info)
548 elif result_type == 'playlist':
549 # We process each entry in the playlist
550 playlist = ie_result.get('title', None) or ie_result.get('id', None)
551 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
552
553 playlist_results = []
554
555 n_all_entries = len(ie_result['entries'])
556 playliststart = self.params.get('playliststart', 1) - 1
557 playlistend = self.params.get('playlistend', -1)
558
559 if playlistend == -1:
560 entries = ie_result['entries'][playliststart:]
561 else:
562 entries = ie_result['entries'][playliststart:playlistend]
563
564 n_entries = len(entries)
565
566 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
567 (ie_result['extractor'], playlist, n_all_entries, n_entries))
568
569 for i, entry in enumerate(entries, 1):
570 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
571 extra = {
572 'playlist': playlist,
573 'playlist_index': i + playliststart,
574 'extractor': ie_result['extractor'],
575 'webpage_url': ie_result['webpage_url'],
576 'extractor_key': ie_result['extractor_key'],
577 }
578
579 reason = self._match_entry(entry)
580 if reason is not None:
581 self.to_screen(u'[download] ' + reason)
582 continue
583
584 entry_result = self.process_ie_result(entry,
585 download=download,
586 extra_info=extra)
587 playlist_results.append(entry_result)
588 ie_result['entries'] = playlist_results
589 return ie_result
590 elif result_type == 'compat_list':
591 def _fixup(r):
592 self.add_extra_info(r,
593 {
594 'extractor': ie_result['extractor'],
595 'webpage_url': ie_result['webpage_url'],
596 'extractor_key': ie_result['extractor_key'],
597 })
598 return r
599 ie_result['entries'] = [
600 self.process_ie_result(_fixup(r), download, extra_info)
601 for r in ie_result['entries']
602 ]
603 return ie_result
604 else:
605 raise Exception('Invalid result type: %s' % result_type)
606
607 def select_format(self, format_spec, available_formats):
608 if format_spec == 'best' or format_spec is None:
609 return available_formats[-1]
610 elif format_spec == 'worst':
611 return available_formats[0]
612 else:
613 extensions = [u'mp4', u'flv', u'webm', u'3gp']
614 if format_spec in extensions:
615 filter_f = lambda f: f['ext'] == format_spec
616 else:
617 filter_f = lambda f: f['format_id'] == format_spec
618 matches = list(filter(filter_f, available_formats))
619 if matches:
620 return matches[-1]
621 return None
622
623 def process_video_result(self, info_dict, download=True):
624 assert info_dict.get('_type', 'video') == 'video'
625
626 if 'playlist' not in info_dict:
627 # It isn't part of a playlist
628 info_dict['playlist'] = None
629 info_dict['playlist_index'] = None
630
631 # This extractors handle format selection themselves
632 if info_dict['extractor'] in [u'youtube', u'Youku']:
633 if download:
634 self.process_info(info_dict)
635 return info_dict
636
637 # We now pick which formats have to be downloaded
638 if info_dict.get('formats') is None:
639 # There's only one format available
640 formats = [info_dict]
641 else:
642 formats = info_dict['formats']
643
644 # We check that all the formats have the format and format_id fields
645 for (i, format) in enumerate(formats):
646 if format.get('format_id') is None:
647 format['format_id'] = compat_str(i)
648 if format.get('format') is None:
649 format['format'] = u'{id} - {res}{note}'.format(
650 id=format['format_id'],
651 res=self.format_resolution(format),
652 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
653 )
654 # Automatically determine file extension if missing
655 if 'ext' not in format:
656 format['ext'] = determine_ext(format['url'])
657
658 if self.params.get('listformats', None):
659 self.list_formats(info_dict)
660 return
661
662 format_limit = self.params.get('format_limit', None)
663 if format_limit:
664 formats = list(takewhile_inclusive(
665 lambda f: f['format_id'] != format_limit, formats
666 ))
667 if self.params.get('prefer_free_formats'):
668 def _free_formats_key(f):
669 try:
670 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
671 except ValueError:
672 ext_ord = -1
673 # We only compare the extension if they have the same height and width
674 return (f.get('height'), f.get('width'), ext_ord)
675 formats = sorted(formats, key=_free_formats_key)
676
677 req_format = self.params.get('format', 'best')
678 if req_format is None:
679 req_format = 'best'
680 formats_to_download = []
681 # The -1 is for supporting YoutubeIE
682 if req_format in ('-1', 'all'):
683 formats_to_download = formats
684 else:
685 # We can accept formats requestd in the format: 34/5/best, we pick
686 # the first that is available, starting from left
687 req_formats = req_format.split('/')
688 for rf in req_formats:
689 selected_format = self.select_format(rf, formats)
690 if selected_format is not None:
691 formats_to_download = [selected_format]
692 break
693 if not formats_to_download:
694 raise ExtractorError(u'requested format not available',
695 expected=True)
696
697 if download:
698 if len(formats_to_download) > 1:
699 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
700 for format in formats_to_download:
701 new_info = dict(info_dict)
702 new_info.update(format)
703 self.process_info(new_info)
704 # We update the info dict with the best quality format (backwards compatibility)
705 info_dict.update(formats_to_download[-1])
706 return info_dict
707
708 def process_info(self, info_dict):
709 """Process a single resolved IE result."""
710
711 assert info_dict.get('_type', 'video') == 'video'
712 #We increment the download the download count here to match the previous behaviour.
713 self.increment_downloads()
714
715 info_dict['fulltitle'] = info_dict['title']
716 if len(info_dict['title']) > 200:
717 info_dict['title'] = info_dict['title'][:197] + u'...'
718
719 # Keep for backwards compatibility
720 info_dict['stitle'] = info_dict['title']
721
722 if not 'format' in info_dict:
723 info_dict['format'] = info_dict['ext']
724
725 reason = self._match_entry(info_dict)
726 if reason is not None:
727 self.to_screen(u'[download] ' + reason)
728 return
729
730 max_downloads = self.params.get('max_downloads')
731 if max_downloads is not None:
732 if self._num_downloads > int(max_downloads):
733 raise MaxDownloadsReached()
734
735 filename = self.prepare_filename(info_dict)
736
737 # Forced printings
738 if self.params.get('forcetitle', False):
739 self.to_stdout(info_dict['fulltitle'])
740 if self.params.get('forceid', False):
741 self.to_stdout(info_dict['id'])
742 if self.params.get('forceurl', False):
743 # For RTMP URLs, also include the playpath
744 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
745 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
746 self.to_stdout(info_dict['thumbnail'])
747 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
748 self.to_stdout(info_dict['description'])
749 if self.params.get('forcefilename', False) and filename is not None:
750 self.to_stdout(filename)
751 if self.params.get('forceformat', False):
752 self.to_stdout(info_dict['format'])
753 if self.params.get('forcejson', False):
754 info_dict['_filename'] = filename
755 self.to_stdout(json.dumps(info_dict))
756
757 # Do nothing else if in simulate mode
758 if self.params.get('simulate', False):
759 return
760
761 if filename is None:
762 return
763
764 try:
765 dn = os.path.dirname(encodeFilename(filename))
766 if dn != '' and not os.path.exists(dn):
767 os.makedirs(dn)
768 except (OSError, IOError) as err:
769 self.report_error(u'unable to create directory ' + compat_str(err))
770 return
771
772 if self.params.get('writedescription', False):
773 try:
774 descfn = filename + u'.description'
775 self.report_writedescription(descfn)
776 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
777 descfile.write(info_dict['description'])
778 except (KeyError, TypeError):
779 self.report_warning(u'There\'s no description to write.')
780 except (OSError, IOError):
781 self.report_error(u'Cannot write description file ' + descfn)
782 return
783
784 if self.params.get('writeannotations', False):
785 try:
786 annofn = filename + u'.annotations.xml'
787 self.report_writeannotations(annofn)
788 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
789 annofile.write(info_dict['annotations'])
790 except (KeyError, TypeError):
791 self.report_warning(u'There are no annotations to write.')
792 except (OSError, IOError):
793 self.report_error(u'Cannot write annotations file: ' + annofn)
794 return
795
796 subtitles_are_requested = any([self.params.get('writesubtitles', False),
797 self.params.get('writeautomaticsub')])
798
799 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
800 # subtitles download errors are already managed as troubles in relevant IE
801 # that way it will silently go on when used with unsupporting IE
802 subtitles = info_dict['subtitles']
803 sub_format = self.params.get('subtitlesformat', 'srt')
804 for sub_lang in subtitles.keys():
805 sub = subtitles[sub_lang]
806 if sub is None:
807 continue
808 try:
809 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
810 self.report_writesubtitles(sub_filename)
811 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
812 subfile.write(sub)
813 except (OSError, IOError):
814 self.report_error(u'Cannot write subtitles file ' + descfn)
815 return
816
817 if self.params.get('writeinfojson', False):
818 infofn = os.path.splitext(filename)[0] + u'.info.json'
819 self.report_writeinfojson(infofn)
820 try:
821 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
822 write_json_file(json_info_dict, encodeFilename(infofn))
823 except (OSError, IOError):
824 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
825 return
826
827 if self.params.get('writethumbnail', False):
828 if info_dict.get('thumbnail') is not None:
829 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
830 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
831 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
832 (info_dict['extractor'], info_dict['id']))
833 try:
834 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
835 with open(thumb_filename, 'wb') as thumbf:
836 shutil.copyfileobj(uf, thumbf)
837 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
838 (info_dict['extractor'], info_dict['id'], thumb_filename))
839 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
840 self.report_warning(u'Unable to download thumbnail "%s": %s' %
841 (info_dict['thumbnail'], compat_str(err)))
842
843 if not self.params.get('skip_download', False):
844 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
845 success = True
846 else:
847 try:
848 success = self.fd._do_download(filename, info_dict)
849 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
850 self.report_error(u'unable to download video data: %s' % str(err))
851 return
852 except (OSError, IOError) as err:
853 raise UnavailableVideoError(err)
854 except (ContentTooShortError, ) as err:
855 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
856 return
857
858 if success:
859 try:
860 self.post_process(filename, info_dict)
861 except (PostProcessingError) as err:
862 self.report_error(u'postprocessing: %s' % str(err))
863 return
864
865 self.record_download_archive(info_dict)
866
867 def download(self, url_list):
868 """Download a given list of URLs."""
869 if (len(url_list) > 1 and
870 '%' not in self.params['outtmpl']
871 and self.params.get('max_downloads') != 1):
872 raise SameFileError(self.params['outtmpl'])
873
874 for url in url_list:
875 try:
876 #It also downloads the videos
877 self.extract_info(url)
878 except UnavailableVideoError:
879 self.report_error(u'unable to download video')
880 except MaxDownloadsReached:
881 self.to_screen(u'[info] Maximum number of downloaded files reached.')
882 raise
883
884 return self._download_retcode
885
886 def download_with_info_file(self, info_filename):
887 with io.open(info_filename, 'r', encoding='utf-8') as f:
888 info = json.load(f)
889 try:
890 self.process_ie_result(info, download=True)
891 except DownloadError:
892 webpage_url = info.get('webpage_url')
893 if webpage_url is not None:
894 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
895 return self.download([webpage_url])
896 else:
897 raise
898 return self._download_retcode
899
900 def post_process(self, filename, ie_info):
901 """Run all the postprocessors on the given file."""
902 info = dict(ie_info)
903 info['filepath'] = filename
904 keep_video = None
905 for pp in self._pps:
906 try:
907 keep_video_wish, new_info = pp.run(info)
908 if keep_video_wish is not None:
909 if keep_video_wish:
910 keep_video = keep_video_wish
911 elif keep_video is None:
912 # No clear decision yet, let IE decide
913 keep_video = keep_video_wish
914 except PostProcessingError as e:
915 self.report_error(e.msg)
916 if keep_video is False and not self.params.get('keepvideo', False):
917 try:
918 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
919 os.remove(encodeFilename(filename))
920 except (IOError, OSError):
921 self.report_warning(u'Unable to remove downloaded video file')
922
923 def _make_archive_id(self, info_dict):
924 # Future-proof against any change in case
925 # and backwards compatibility with prior versions
926 extractor = info_dict.get('extractor_key')
927 if extractor is None:
928 if 'id' in info_dict:
929 extractor = info_dict.get('ie_key') # key in a playlist
930 if extractor is None:
931 return None # Incomplete video information
932 return extractor.lower() + u' ' + info_dict['id']
933
934 def in_download_archive(self, info_dict):
935 fn = self.params.get('download_archive')
936 if fn is None:
937 return False
938
939 vid_id = self._make_archive_id(info_dict)
940 if vid_id is None:
941 return False # Incomplete video information
942
943 try:
944 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
945 for line in archive_file:
946 if line.strip() == vid_id:
947 return True
948 except IOError as ioe:
949 if ioe.errno != errno.ENOENT:
950 raise
951 return False
952
953 def record_download_archive(self, info_dict):
954 fn = self.params.get('download_archive')
955 if fn is None:
956 return
957 vid_id = self._make_archive_id(info_dict)
958 assert vid_id
959 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
960 archive_file.write(vid_id + u'\n')
961
962 @staticmethod
963 def format_resolution(format, default='unknown'):
964 if format.get('vcodec') == 'none':
965 return 'audio only'
966 if format.get('_resolution') is not None:
967 return format['_resolution']
968 if format.get('height') is not None:
969 if format.get('width') is not None:
970 res = u'%sx%s' % (format['width'], format['height'])
971 else:
972 res = u'%sp' % format['height']
973 else:
974 res = default
975 return res
976
977 def list_formats(self, info_dict):
978 def format_note(fdict):
979 res = u''
980 if fdict.get('format_note') is not None:
981 res += fdict['format_note'] + u' '
982 if (fdict.get('vcodec') is not None and
983 fdict.get('vcodec') != 'none'):
984 res += u'%-5s' % fdict['vcodec']
985 elif fdict.get('vbr') is not None:
986 res += u'video'
987 if fdict.get('vbr') is not None:
988 res += u'@%4dk' % fdict['vbr']
989 if fdict.get('acodec') is not None:
990 if res:
991 res += u', '
992 res += u'%-5s' % fdict['acodec']
993 elif fdict.get('abr') is not None:
994 if res:
995 res += u', '
996 res += 'audio'
997 if fdict.get('abr') is not None:
998 res += u'@%3dk' % fdict['abr']
999 if fdict.get('filesize') is not None:
1000 if res:
1001 res += u', '
1002 res += format_bytes(fdict['filesize'])
1003 return res
1004
1005 def line(format, idlen=20):
1006 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1007 format['format_id'],
1008 format['ext'],
1009 self.format_resolution(format),
1010 format_note(format),
1011 ))
1012
1013 formats = info_dict.get('formats', [info_dict])
1014 idlen = max(len(u'format code'),
1015 max(len(f['format_id']) for f in formats))
1016 formats_s = [line(f, idlen) for f in formats]
1017 if len(formats) > 1:
1018 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1019 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1020
1021 header_line = line({
1022 'format_id': u'format code', 'ext': u'extension',
1023 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1024 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1025 (info_dict['id'], header_line, u"\n".join(formats_s)))
1026
1027 def urlopen(self, req):
1028 """ Start an HTTP download """
1029 return self._opener.open(req)
1030
1031 def print_debug_header(self):
1032 if not self.params.get('verbose'):
1033 return
1034 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1035 try:
1036 sp = subprocess.Popen(
1037 ['git', 'rev-parse', '--short', 'HEAD'],
1038 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1039 cwd=os.path.dirname(os.path.abspath(__file__)))
1040 out, err = sp.communicate()
1041 out = out.decode().strip()
1042 if re.match('[0-9a-f]+', out):
1043 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1044 except:
1045 try:
1046 sys.exc_clear()
1047 except:
1048 pass
1049 write_string(u'[debug] Python version %s - %s' %
1050 (platform.python_version(), platform_name()) + u'\n')
1051
1052 proxy_map = {}
1053 for handler in self._opener.handlers:
1054 if hasattr(handler, 'proxies'):
1055 proxy_map.update(handler.proxies)
1056 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1057
1058 def _setup_opener(self):
1059 timeout_val = self.params.get('socket_timeout')
1060 timeout = 600 if timeout_val is None else float(timeout_val)
1061
1062 opts_cookiefile = self.params.get('cookiefile')
1063 opts_proxy = self.params.get('proxy')
1064
1065 if opts_cookiefile is None:
1066 self.cookiejar = compat_cookiejar.CookieJar()
1067 else:
1068 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1069 opts_cookiefile)
1070 if os.access(opts_cookiefile, os.R_OK):
1071 self.cookiejar.load()
1072
1073 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1074 self.cookiejar)
1075 if opts_proxy is not None:
1076 if opts_proxy == '':
1077 proxies = {}
1078 else:
1079 proxies = {'http': opts_proxy, 'https': opts_proxy}
1080 else:
1081 proxies = compat_urllib_request.getproxies()
1082 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1083 if 'http' in proxies and 'https' not in proxies:
1084 proxies['https'] = proxies['http']
1085 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1086 https_handler = make_HTTPS_handler(
1087 self.params.get('nocheckcertificate', False))
1088 opener = compat_urllib_request.build_opener(
1089 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1090 # Delete the default user-agent header, which would otherwise apply in
1091 # cases where our custom HTTP handler doesn't come into play
1092 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1093 opener.addheaders = []
1094 self._opener = opener
1095
1096 # TODO remove this global modification
1097 compat_urllib_request.install_opener(opener)
1098 socket.setdefaulttimeout(timeout)