]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Merge branch 'master' of github.com:rg3/youtube-dl
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import json
9 import os
10 import platform
11 import re
12 import shutil
13 import subprocess
14 import socket
15 import sys
16 import time
17 import traceback
18
19 if os.name == 'nt':
20 import ctypes
21
22 from .utils import (
23 compat_cookiejar,
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 locked_file,
38 make_HTTPS_handler,
39 MaxDownloadsReached,
40 PostProcessingError,
41 platform_name,
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
50 YoutubeDLHandler,
51 )
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
55
56
57 class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 downloadarchive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
131 again.
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135
136 The following parameters are not used by YoutubeDL itself, they are used by
137 the FileDownloader:
138 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
139 noresizebuffer, retries, continuedl, noprogress, consoletitle
140 """
141
142 params = None
143 _ies = []
144 _pps = []
145 _download_retcode = None
146 _num_downloads = None
147 _screen_file = None
148
149 def __init__(self, params):
150 """Create a FileDownloader object with the given options."""
151 self._ies = []
152 self._ies_instances = {}
153 self._pps = []
154 self._progress_hooks = []
155 self._download_retcode = 0
156 self._num_downloads = 0
157 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
158
159 if (sys.version_info >= (3,) and sys.platform != 'win32' and
160 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
161 and not params['restrictfilenames']):
162 # On Python 3, the Unicode filesystem API will throw errors (#1474)
163 self.report_warning(
164 u'Assuming --restrict-filenames since file system encoding '
165 u'cannot encode all charactes. '
166 u'Set the LC_ALL environment variable to fix this.')
167 params['restrictfilenames'] = True
168
169 self.params = params
170 self.fd = FileDownloader(self, self.params)
171
172 if '%(stitle)s' in self.params['outtmpl']:
173 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
174
175 self._setup_opener()
176
177 def add_info_extractor(self, ie):
178 """Add an InfoExtractor object to the end of the list."""
179 self._ies.append(ie)
180 self._ies_instances[ie.ie_key()] = ie
181 ie.set_downloader(self)
182
183 def get_info_extractor(self, ie_key):
184 """
185 Get an instance of an IE with name ie_key, it will try to get one from
186 the _ies list, if there's no instance it will create a new one and add
187 it to the extractor list.
188 """
189 ie = self._ies_instances.get(ie_key)
190 if ie is None:
191 ie = get_info_extractor(ie_key)()
192 self.add_info_extractor(ie)
193 return ie
194
195 def add_default_info_extractors(self):
196 """
197 Add the InfoExtractors returned by gen_extractors to the end of the list
198 """
199 for ie in gen_extractors():
200 self.add_info_extractor(ie)
201
202 def add_post_processor(self, pp):
203 """Add a PostProcessor object to the end of the chain."""
204 self._pps.append(pp)
205 pp.set_downloader(self)
206
207 def to_screen(self, message, skip_eol=False):
208 """Print message to stdout if not in quiet mode."""
209 if self.params.get('logger'):
210 self.params['logger'].debug(message)
211 elif not self.params.get('quiet', False):
212 terminator = [u'\n', u''][skip_eol]
213 output = message + terminator
214 write_string(output, self._screen_file)
215
216 def to_stderr(self, message):
217 """Print message to stderr."""
218 assert type(message) == type(u'')
219 if self.params.get('logger'):
220 self.params['logger'].error(message)
221 else:
222 output = message + u'\n'
223 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
224 output = output.encode(preferredencoding())
225 sys.stderr.write(output)
226
227 def to_console_title(self, message):
228 if not self.params.get('consoletitle', False):
229 return
230 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
231 # c_wchar_p() might not be necessary if `message` is
232 # already of type unicode()
233 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
234 elif 'TERM' in os.environ:
235 write_string(u'\033]0;%s\007' % message, self._screen_file)
236
237 def save_console_title(self):
238 if not self.params.get('consoletitle', False):
239 return
240 if 'TERM' in os.environ:
241 # Save the title on stack
242 write_string(u'\033[22;0t', self._screen_file)
243
244 def restore_console_title(self):
245 if not self.params.get('consoletitle', False):
246 return
247 if 'TERM' in os.environ:
248 # Restore the title from stack
249 write_string(u'\033[23;0t', self._screen_file)
250
251 def __enter__(self):
252 self.save_console_title()
253 return self
254
255 def __exit__(self, *args):
256 self.restore_console_title()
257
258 if self.params.get('cookiefile') is not None:
259 self.cookiejar.save()
260
261 def trouble(self, message=None, tb=None):
262 """Determine action to take when a download problem appears.
263
264 Depending on if the downloader has been configured to ignore
265 download errors or not, this method may throw an exception or
266 not when errors are found, after printing the message.
267
268 tb, if given, is additional traceback information.
269 """
270 if message is not None:
271 self.to_stderr(message)
272 if self.params.get('verbose'):
273 if tb is None:
274 if sys.exc_info()[0]: # if .trouble has been called from an except block
275 tb = u''
276 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
277 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
278 tb += compat_str(traceback.format_exc())
279 else:
280 tb_data = traceback.format_list(traceback.extract_stack())
281 tb = u''.join(tb_data)
282 self.to_stderr(tb)
283 if not self.params.get('ignoreerrors', False):
284 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
285 exc_info = sys.exc_info()[1].exc_info
286 else:
287 exc_info = sys.exc_info()
288 raise DownloadError(message, exc_info)
289 self._download_retcode = 1
290
291 def report_warning(self, message):
292 '''
293 Print the message to stderr, it will be prefixed with 'WARNING:'
294 If stderr is a tty file the 'WARNING:' will be colored
295 '''
296 if sys.stderr.isatty() and os.name != 'nt':
297 _msg_header = u'\033[0;33mWARNING:\033[0m'
298 else:
299 _msg_header = u'WARNING:'
300 warning_message = u'%s %s' % (_msg_header, message)
301 self.to_stderr(warning_message)
302
303 def report_error(self, message, tb=None):
304 '''
305 Do the same as trouble, but prefixes the message with 'ERROR:', colored
306 in red if stderr is a tty file.
307 '''
308 if sys.stderr.isatty() and os.name != 'nt':
309 _msg_header = u'\033[0;31mERROR:\033[0m'
310 else:
311 _msg_header = u'ERROR:'
312 error_message = u'%s %s' % (_msg_header, message)
313 self.trouble(error_message, tb)
314
315 def report_writedescription(self, descfn):
316 """ Report that the description file is being written """
317 self.to_screen(u'[info] Writing video description to: ' + descfn)
318
319 def report_writesubtitles(self, sub_filename):
320 """ Report that the subtitles file is being written """
321 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
322
323 def report_writeinfojson(self, infofn):
324 """ Report that the metadata file has been written """
325 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
326
327 def report_writeannotations(self, annofn):
328 """ Report that the annotations file has been written. """
329 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
330
331 def report_file_already_downloaded(self, file_name):
332 """Report file has already been fully downloaded."""
333 try:
334 self.to_screen(u'[download] %s has already been downloaded' % file_name)
335 except UnicodeEncodeError:
336 self.to_screen(u'[download] The file has already been downloaded')
337
338 def increment_downloads(self):
339 """Increment the ordinal that assigns a number to each file."""
340 self._num_downloads += 1
341
342 def prepare_filename(self, info_dict):
343 """Generate the output filename."""
344 try:
345 template_dict = dict(info_dict)
346
347 template_dict['epoch'] = int(time.time())
348 autonumber_size = self.params.get('autonumber_size')
349 if autonumber_size is None:
350 autonumber_size = 5
351 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
352 template_dict['autonumber'] = autonumber_templ % self._num_downloads
353 if template_dict.get('playlist_index') is not None:
354 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
355
356 sanitize = lambda k, v: sanitize_filename(
357 u'NA' if v is None else compat_str(v),
358 restricted=self.params.get('restrictfilenames'),
359 is_id=(k == u'id'))
360 template_dict = dict((k, sanitize(k, v))
361 for k, v in template_dict.items())
362
363 tmpl = os.path.expanduser(self.params['outtmpl'])
364 filename = tmpl % template_dict
365 return filename
366 except KeyError as err:
367 self.report_error(u'Erroneous output template')
368 return None
369 except ValueError as err:
370 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
371 return None
372
373 def _match_entry(self, info_dict):
374 """ Returns None iff the file should be downloaded """
375
376 if 'title' in info_dict:
377 # This can happen when we're just evaluating the playlist
378 title = info_dict['title']
379 matchtitle = self.params.get('matchtitle', False)
380 if matchtitle:
381 if not re.search(matchtitle, title, re.IGNORECASE):
382 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
383 rejecttitle = self.params.get('rejecttitle', False)
384 if rejecttitle:
385 if re.search(rejecttitle, title, re.IGNORECASE):
386 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
387 date = info_dict.get('upload_date', None)
388 if date is not None:
389 dateRange = self.params.get('daterange', DateRange())
390 if date not in dateRange:
391 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
392 age_limit = self.params.get('age_limit')
393 if age_limit is not None:
394 if age_limit < info_dict.get('age_limit', 0):
395 return u'Skipping "' + title + '" because it is age restricted'
396 if self.in_download_archive(info_dict):
397 return (u'%s has already been recorded in archive'
398 % info_dict.get('title', info_dict.get('id', u'video')))
399 return None
400
401 @staticmethod
402 def add_extra_info(info_dict, extra_info):
403 '''Set the keys from extra_info in info dict if they are missing'''
404 for key, value in extra_info.items():
405 info_dict.setdefault(key, value)
406
407 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
408 '''
409 Returns a list with a dictionary for each video we find.
410 If 'download', also downloads the videos.
411 extra_info is a dict containing the extra values to add to each result
412 '''
413
414 if ie_key:
415 ies = [self.get_info_extractor(ie_key)]
416 else:
417 ies = self._ies
418
419 for ie in ies:
420 if not ie.suitable(url):
421 continue
422
423 if not ie.working():
424 self.report_warning(u'The program functionality for this site has been marked as broken, '
425 u'and will probably not work.')
426
427 try:
428 ie_result = ie.extract(url)
429 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
430 break
431 if isinstance(ie_result, list):
432 # Backwards compatibility: old IE result format
433 ie_result = {
434 '_type': 'compat_list',
435 'entries': ie_result,
436 }
437 self.add_extra_info(ie_result,
438 {
439 'extractor': ie.IE_NAME,
440 'webpage_url': url,
441 'extractor_key': ie.ie_key(),
442 })
443 return self.process_ie_result(ie_result, download, extra_info)
444 except ExtractorError as de: # An error we somewhat expected
445 self.report_error(compat_str(de), de.format_traceback())
446 break
447 except Exception as e:
448 if self.params.get('ignoreerrors', False):
449 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
450 break
451 else:
452 raise
453 else:
454 self.report_error(u'no suitable InfoExtractor: %s' % url)
455
456 def process_ie_result(self, ie_result, download=True, extra_info={}):
457 """
458 Take the result of the ie(may be modified) and resolve all unresolved
459 references (URLs, playlist items).
460
461 It will also download the videos if 'download'.
462 Returns the resolved ie_result.
463 """
464
465 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
466 if result_type == 'video':
467 self.add_extra_info(ie_result, extra_info)
468 return self.process_video_result(ie_result, download=download)
469 elif result_type == 'url':
470 # We have to add extra_info to the results because it may be
471 # contained in a playlist
472 return self.extract_info(ie_result['url'],
473 download,
474 ie_key=ie_result.get('ie_key'),
475 extra_info=extra_info)
476 elif result_type == 'playlist':
477
478 # We process each entry in the playlist
479 playlist = ie_result.get('title', None) or ie_result.get('id', None)
480 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
481
482 playlist_results = []
483
484 n_all_entries = len(ie_result['entries'])
485 playliststart = self.params.get('playliststart', 1) - 1
486 playlistend = self.params.get('playlistend', -1)
487
488 if playlistend == -1:
489 entries = ie_result['entries'][playliststart:]
490 else:
491 entries = ie_result['entries'][playliststart:playlistend]
492
493 n_entries = len(entries)
494
495 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
496 (ie_result['extractor'], playlist, n_all_entries, n_entries))
497
498 for i, entry in enumerate(entries, 1):
499 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
500 extra = {
501 'playlist': playlist,
502 'playlist_index': i + playliststart,
503 'extractor': ie_result['extractor'],
504 'webpage_url': ie_result['webpage_url'],
505 'extractor_key': ie_result['extractor_key'],
506 }
507
508 reason = self._match_entry(entry)
509 if reason is not None:
510 self.to_screen(u'[download] ' + reason)
511 continue
512
513 entry_result = self.process_ie_result(entry,
514 download=download,
515 extra_info=extra)
516 playlist_results.append(entry_result)
517 ie_result['entries'] = playlist_results
518 return ie_result
519 elif result_type == 'compat_list':
520 def _fixup(r):
521 self.add_extra_info(r,
522 {
523 'extractor': ie_result['extractor'],
524 'webpage_url': ie_result['webpage_url'],
525 'extractor_key': ie_result['extractor_key'],
526 })
527 return r
528 ie_result['entries'] = [
529 self.process_ie_result(_fixup(r), download, extra_info)
530 for r in ie_result['entries']
531 ]
532 return ie_result
533 else:
534 raise Exception('Invalid result type: %s' % result_type)
535
536 def select_format(self, format_spec, available_formats):
537 if format_spec == 'best' or format_spec is None:
538 return available_formats[-1]
539 elif format_spec == 'worst':
540 return available_formats[0]
541 else:
542 extensions = [u'mp4', u'flv', u'webm', u'3gp']
543 if format_spec in extensions:
544 filter_f = lambda f: f['ext'] == format_spec
545 else:
546 filter_f = lambda f: f['format_id'] == format_spec
547 matches = list(filter(filter_f, available_formats))
548 if matches:
549 return matches[-1]
550 return None
551
552 def process_video_result(self, info_dict, download=True):
553 assert info_dict.get('_type', 'video') == 'video'
554
555 if 'playlist' not in info_dict:
556 # It isn't part of a playlist
557 info_dict['playlist'] = None
558 info_dict['playlist_index'] = None
559
560 # This extractors handle format selection themselves
561 if info_dict['extractor'] in [u'youtube', u'Youku']:
562 if download:
563 self.process_info(info_dict)
564 return info_dict
565
566 # We now pick which formats have to be downloaded
567 if info_dict.get('formats') is None:
568 # There's only one format available
569 formats = [info_dict]
570 else:
571 formats = info_dict['formats']
572
573 # We check that all the formats have the format and format_id fields
574 for (i, format) in enumerate(formats):
575 if format.get('format_id') is None:
576 format['format_id'] = compat_str(i)
577 if format.get('format') is None:
578 format['format'] = u'{id} - {res}{note}'.format(
579 id=format['format_id'],
580 res=self.format_resolution(format),
581 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
582 )
583 # Automatically determine file extension if missing
584 if 'ext' not in format:
585 format['ext'] = determine_ext(format['url'])
586
587 if self.params.get('listformats', None):
588 self.list_formats(info_dict)
589 return
590
591 format_limit = self.params.get('format_limit', None)
592 if format_limit:
593 formats = list(takewhile_inclusive(
594 lambda f: f['format_id'] != format_limit, formats
595 ))
596 if self.params.get('prefer_free_formats'):
597 def _free_formats_key(f):
598 try:
599 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
600 except ValueError:
601 ext_ord = -1
602 # We only compare the extension if they have the same height and width
603 return (f.get('height'), f.get('width'), ext_ord)
604 formats = sorted(formats, key=_free_formats_key)
605
606 req_format = self.params.get('format', 'best')
607 if req_format is None:
608 req_format = 'best'
609 formats_to_download = []
610 # The -1 is for supporting YoutubeIE
611 if req_format in ('-1', 'all'):
612 formats_to_download = formats
613 else:
614 # We can accept formats requestd in the format: 34/5/best, we pick
615 # the first that is available, starting from left
616 req_formats = req_format.split('/')
617 for rf in req_formats:
618 selected_format = self.select_format(rf, formats)
619 if selected_format is not None:
620 formats_to_download = [selected_format]
621 break
622 if not formats_to_download:
623 raise ExtractorError(u'requested format not available',
624 expected=True)
625
626 if download:
627 if len(formats_to_download) > 1:
628 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
629 for format in formats_to_download:
630 new_info = dict(info_dict)
631 new_info.update(format)
632 self.process_info(new_info)
633 # We update the info dict with the best quality format (backwards compatibility)
634 info_dict.update(formats_to_download[-1])
635 return info_dict
636
637 def process_info(self, info_dict):
638 """Process a single resolved IE result."""
639
640 assert info_dict.get('_type', 'video') == 'video'
641 #We increment the download the download count here to match the previous behaviour.
642 self.increment_downloads()
643
644 info_dict['fulltitle'] = info_dict['title']
645 if len(info_dict['title']) > 200:
646 info_dict['title'] = info_dict['title'][:197] + u'...'
647
648 # Keep for backwards compatibility
649 info_dict['stitle'] = info_dict['title']
650
651 if not 'format' in info_dict:
652 info_dict['format'] = info_dict['ext']
653
654 reason = self._match_entry(info_dict)
655 if reason is not None:
656 self.to_screen(u'[download] ' + reason)
657 return
658
659 max_downloads = self.params.get('max_downloads')
660 if max_downloads is not None:
661 if self._num_downloads > int(max_downloads):
662 raise MaxDownloadsReached()
663
664 filename = self.prepare_filename(info_dict)
665
666 # Forced printings
667 if self.params.get('forcetitle', False):
668 compat_print(info_dict['fulltitle'])
669 if self.params.get('forceid', False):
670 compat_print(info_dict['id'])
671 if self.params.get('forceurl', False):
672 # For RTMP URLs, also include the playpath
673 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
674 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
675 compat_print(info_dict['thumbnail'])
676 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
677 compat_print(info_dict['description'])
678 if self.params.get('forcefilename', False) and filename is not None:
679 compat_print(filename)
680 if self.params.get('forceformat', False):
681 compat_print(info_dict['format'])
682 if self.params.get('forcejson', False):
683 compat_print(json.dumps(info_dict))
684
685 # Do nothing else if in simulate mode
686 if self.params.get('simulate', False):
687 return
688
689 if filename is None:
690 return
691
692 try:
693 dn = os.path.dirname(encodeFilename(filename))
694 if dn != '' and not os.path.exists(dn):
695 os.makedirs(dn)
696 except (OSError, IOError) as err:
697 self.report_error(u'unable to create directory ' + compat_str(err))
698 return
699
700 if self.params.get('writedescription', False):
701 try:
702 descfn = filename + u'.description'
703 self.report_writedescription(descfn)
704 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
705 descfile.write(info_dict['description'])
706 except (KeyError, TypeError):
707 self.report_warning(u'There\'s no description to write.')
708 except (OSError, IOError):
709 self.report_error(u'Cannot write description file ' + descfn)
710 return
711
712 if self.params.get('writeannotations', False):
713 try:
714 annofn = filename + u'.annotations.xml'
715 self.report_writeannotations(annofn)
716 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
717 annofile.write(info_dict['annotations'])
718 except (KeyError, TypeError):
719 self.report_warning(u'There are no annotations to write.')
720 except (OSError, IOError):
721 self.report_error(u'Cannot write annotations file: ' + annofn)
722 return
723
724 subtitles_are_requested = any([self.params.get('writesubtitles', False),
725 self.params.get('writeautomaticsub')])
726
727 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
728 # subtitles download errors are already managed as troubles in relevant IE
729 # that way it will silently go on when used with unsupporting IE
730 subtitles = info_dict['subtitles']
731 sub_format = self.params.get('subtitlesformat', 'srt')
732 for sub_lang in subtitles.keys():
733 sub = subtitles[sub_lang]
734 if sub is None:
735 continue
736 try:
737 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
738 self.report_writesubtitles(sub_filename)
739 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
740 subfile.write(sub)
741 except (OSError, IOError):
742 self.report_error(u'Cannot write subtitles file ' + descfn)
743 return
744
745 if self.params.get('writeinfojson', False):
746 infofn = os.path.splitext(filename)[0] + u'.info.json'
747 self.report_writeinfojson(infofn)
748 try:
749 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
750 write_json_file(json_info_dict, encodeFilename(infofn))
751 except (OSError, IOError):
752 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
753 return
754
755 if self.params.get('writethumbnail', False):
756 if info_dict.get('thumbnail') is not None:
757 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
758 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
759 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
760 (info_dict['extractor'], info_dict['id']))
761 try:
762 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
763 with open(thumb_filename, 'wb') as thumbf:
764 shutil.copyfileobj(uf, thumbf)
765 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
766 (info_dict['extractor'], info_dict['id'], thumb_filename))
767 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
768 self.report_warning(u'Unable to download thumbnail "%s": %s' %
769 (info_dict['thumbnail'], compat_str(err)))
770
771 if not self.params.get('skip_download', False):
772 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
773 success = True
774 else:
775 try:
776 success = self.fd._do_download(filename, info_dict)
777 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
778 self.report_error(u'unable to download video data: %s' % str(err))
779 return
780 except (OSError, IOError) as err:
781 raise UnavailableVideoError(err)
782 except (ContentTooShortError, ) as err:
783 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
784 return
785
786 if success:
787 try:
788 self.post_process(filename, info_dict)
789 except (PostProcessingError) as err:
790 self.report_error(u'postprocessing: %s' % str(err))
791 return
792
793 self.record_download_archive(info_dict)
794
795 def download(self, url_list):
796 """Download a given list of URLs."""
797 if (len(url_list) > 1 and
798 '%' not in self.params['outtmpl']
799 and self.params.get('max_downloads') != 1):
800 raise SameFileError(self.params['outtmpl'])
801
802 for url in url_list:
803 try:
804 #It also downloads the videos
805 self.extract_info(url)
806 except UnavailableVideoError:
807 self.report_error(u'unable to download video')
808 except MaxDownloadsReached:
809 self.to_screen(u'[info] Maximum number of downloaded files reached.')
810 raise
811
812 return self._download_retcode
813
814 def post_process(self, filename, ie_info):
815 """Run all the postprocessors on the given file."""
816 info = dict(ie_info)
817 info['filepath'] = filename
818 keep_video = None
819 for pp in self._pps:
820 try:
821 keep_video_wish, new_info = pp.run(info)
822 if keep_video_wish is not None:
823 if keep_video_wish:
824 keep_video = keep_video_wish
825 elif keep_video is None:
826 # No clear decision yet, let IE decide
827 keep_video = keep_video_wish
828 except PostProcessingError as e:
829 self.report_error(e.msg)
830 if keep_video is False and not self.params.get('keepvideo', False):
831 try:
832 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
833 os.remove(encodeFilename(filename))
834 except (IOError, OSError):
835 self.report_warning(u'Unable to remove downloaded video file')
836
837 def _make_archive_id(self, info_dict):
838 # Future-proof against any change in case
839 # and backwards compatibility with prior versions
840 extractor = info_dict.get('extractor')
841 if extractor is None:
842 if 'id' in info_dict:
843 extractor = info_dict.get('ie_key') # key in a playlist
844 if extractor is None:
845 return None # Incomplete video information
846 return extractor.lower() + u' ' + info_dict['id']
847
848 def in_download_archive(self, info_dict):
849 fn = self.params.get('download_archive')
850 if fn is None:
851 return False
852
853 vid_id = self._make_archive_id(info_dict)
854 if vid_id is None:
855 return False # Incomplete video information
856
857 try:
858 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
859 for line in archive_file:
860 if line.strip() == vid_id:
861 return True
862 except IOError as ioe:
863 if ioe.errno != errno.ENOENT:
864 raise
865 return False
866
867 def record_download_archive(self, info_dict):
868 fn = self.params.get('download_archive')
869 if fn is None:
870 return
871 vid_id = self._make_archive_id(info_dict)
872 assert vid_id
873 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
874 archive_file.write(vid_id + u'\n')
875
876 @staticmethod
877 def format_resolution(format, default='unknown'):
878 if format.get('_resolution') is not None:
879 return format['_resolution']
880 if format.get('height') is not None:
881 if format.get('width') is not None:
882 res = u'%sx%s' % (format['width'], format['height'])
883 else:
884 res = u'%sp' % format['height']
885 else:
886 res = default
887 return res
888
889 def list_formats(self, info_dict):
890 def format_note(fdict):
891 res = u''
892 if fdict.get('format_note') is not None:
893 res += fdict['format_note'] + u' '
894 if fdict.get('vcodec') is not None:
895 res += u'%-5s' % fdict['vcodec']
896 elif fdict.get('vbr') is not None:
897 res += u'video'
898 if fdict.get('vbr') is not None:
899 res += u'@%4dk' % fdict['vbr']
900 if fdict.get('acodec') is not None:
901 if res:
902 res += u', '
903 res += u'%-5s' % fdict['acodec']
904 elif fdict.get('abr') is not None:
905 if res:
906 res += u', '
907 res += 'audio'
908 if fdict.get('abr') is not None:
909 res += u'@%3dk' % fdict['abr']
910 if fdict.get('filesize') is not None:
911 if res:
912 res += u', '
913 res += format_bytes(fdict['filesize'])
914 return res
915
916 def line(format, idlen=20):
917 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
918 format['format_id'],
919 format['ext'],
920 self.format_resolution(format),
921 format_note(format),
922 ))
923
924 formats = info_dict.get('formats', [info_dict])
925 idlen = max(len(u'format code'),
926 max(len(f['format_id']) for f in formats))
927 formats_s = [line(f, idlen) for f in formats]
928 if len(formats) > 1:
929 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
930 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
931
932 header_line = line({
933 'format_id': u'format code', 'ext': u'extension',
934 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
935 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
936 (info_dict['id'], header_line, u"\n".join(formats_s)))
937
938 def urlopen(self, req):
939 """ Start an HTTP download """
940 return self._opener.open(req)
941
942 def print_debug_header(self):
943 if not self.params.get('verbose'):
944 return
945 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
946 try:
947 sp = subprocess.Popen(
948 ['git', 'rev-parse', '--short', 'HEAD'],
949 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
950 cwd=os.path.dirname(os.path.abspath(__file__)))
951 out, err = sp.communicate()
952 out = out.decode().strip()
953 if re.match('[0-9a-f]+', out):
954 write_string(u'[debug] Git HEAD: ' + out + u'\n')
955 except:
956 try:
957 sys.exc_clear()
958 except:
959 pass
960 write_string(u'[debug] Python version %s - %s' %
961 (platform.python_version(), platform_name()) + u'\n')
962
963 proxy_map = {}
964 for handler in self._opener.handlers:
965 if hasattr(handler, 'proxies'):
966 proxy_map.update(handler.proxies)
967 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
968
969 def _setup_opener(self, timeout=20):
970 opts_cookiefile = self.params.get('cookiefile')
971 opts_proxy = self.params.get('proxy')
972
973 if opts_cookiefile is None:
974 self.cookiejar = compat_cookiejar.CookieJar()
975 else:
976 self.cookiejar = compat_cookiejar.MozillaCookieJar(
977 opts_cookiefile)
978 if os.access(opts_cookiefile, os.R_OK):
979 self.cookiejar.load()
980
981 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
982 self.cookiejar)
983 if opts_proxy is not None:
984 if opts_proxy == '':
985 proxies = {}
986 else:
987 proxies = {'http': opts_proxy, 'https': opts_proxy}
988 else:
989 proxies = compat_urllib_request.getproxies()
990 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
991 if 'http' in proxies and 'https' not in proxies:
992 proxies['https'] = proxies['http']
993 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
994 https_handler = make_HTTPS_handler(
995 self.params.get('nocheckcertificate', False))
996 opener = compat_urllib_request.build_opener(
997 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
998 # Delete the default user-agent header, which would otherwise apply in
999 # cases where our custom HTTP handler doesn't come into play
1000 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1001 opener.addheaders = []
1002 self._opener = opener
1003
1004 # TODO remove this global modification
1005 compat_urllib_request.install_opener(opener)
1006 socket.setdefaulttimeout(timeout)