]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Merge remote-tracking branch 'rupertbaxter2/master'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25 import ctypes
26
27 from .compat import (
28 compat_cookiejar,
29 compat_expanduser,
30 compat_http_client,
31 compat_kwargs,
32 compat_str,
33 compat_urllib_error,
34 compat_urllib_request,
35 )
36 from .utils import (
37 escape_url,
38 ContentTooShortError,
39 date_from_str,
40 DateRange,
41 DEFAULT_OUTTMPL,
42 determine_ext,
43 DownloadError,
44 encodeFilename,
45 ExtractorError,
46 format_bytes,
47 formatSeconds,
48 get_term_width,
49 locked_file,
50 make_HTTPS_handler,
51 MaxDownloadsReached,
52 PagedList,
53 parse_filesize,
54 PostProcessingError,
55 platform_name,
56 preferredencoding,
57 SameFileError,
58 sanitize_filename,
59 subtitles_filename,
60 takewhile_inclusive,
61 UnavailableVideoError,
62 url_basename,
63 version_tuple,
64 write_json_file,
65 write_string,
66 YoutubeDLHandler,
67 prepend_extension,
68 args_to_str,
69 age_restricted,
70 )
71 from .cache import Cache
72 from .extractor import get_info_extractor, gen_extractors
73 from .downloader import get_suitable_downloader
74 from .downloader.rtmp import rtmpdump_version
75 from .postprocessor import (
76 FFmpegFixupStretchedPP,
77 FFmpegMergerPP,
78 FFmpegPostProcessor,
79 get_postprocessor,
80 )
81 from .version import __version__
82
83
84 class YoutubeDL(object):
85 """YoutubeDL class.
86
87 YoutubeDL objects are the ones responsible of downloading the
88 actual video file and writing it to disk if the user has requested
89 it, among some other tasks. In most cases there should be one per
90 program. As, given a video URL, the downloader doesn't know how to
91 extract all the needed information, task that InfoExtractors do, it
92 has to pass the URL to one of them.
93
94 For this, YoutubeDL objects have a method that allows
95 InfoExtractors to be registered in a given order. When it is passed
96 a URL, the YoutubeDL object handles it to the first InfoExtractor it
97 finds that reports being able to handle it. The InfoExtractor extracts
98 all the information about the video or videos the URL refers to, and
99 YoutubeDL process the extracted information, possibly using a File
100 Downloader to download the video.
101
102 YoutubeDL objects accept a lot of parameters. In order not to saturate
103 the object constructor with arguments, it receives a dictionary of
104 options instead. These options are available through the params
105 attribute for the InfoExtractors to use. The YoutubeDL also
106 registers itself as the downloader in charge for the InfoExtractors
107 that are added to it, so this is a "mutual registration".
108
109 Available options:
110
111 username: Username for authentication purposes.
112 password: Password for authentication purposes.
113 videopassword: Password for acces a video.
114 usenetrc: Use netrc for authentication instead.
115 verbose: Print additional info to stdout.
116 quiet: Do not print messages to stdout.
117 no_warnings: Do not print out anything for warnings.
118 forceurl: Force printing final URL.
119 forcetitle: Force printing title.
120 forceid: Force printing ID.
121 forcethumbnail: Force printing thumbnail URL.
122 forcedescription: Force printing description.
123 forcefilename: Force printing final filename.
124 forceduration: Force printing duration.
125 forcejson: Force printing info_dict as JSON.
126 dump_single_json: Force printing the info_dict of the whole playlist
127 (or video) as a single JSON line.
128 simulate: Do not download the video files.
129 format: Video format code. See options.py for more information.
130 format_limit: Highest quality format to try.
131 outtmpl: Template for output names.
132 restrictfilenames: Do not allow "&" and spaces in file names
133 ignoreerrors: Do not stop on download errors.
134 nooverwrites: Prevent overwriting files.
135 playliststart: Playlist item to start at.
136 playlistend: Playlist item to end at.
137 playlistreverse: Download playlist items in reverse order.
138 matchtitle: Download only matching titles.
139 rejecttitle: Reject downloads for matching titles.
140 logger: Log messages to a logging.Logger instance.
141 logtostderr: Log messages to stderr instead of stdout.
142 writedescription: Write the video description to a .description file
143 writeinfojson: Write the video description to a .info.json file
144 writeannotations: Write the video annotations to a .annotations.xml file
145 writethumbnail: Write the thumbnail image to a file
146 writesubtitles: Write the video subtitles to a file
147 writeautomaticsub: Write the automatic subtitles to a file
148 allsubtitles: Downloads all the subtitles of the video
149 (requires writesubtitles or writeautomaticsub)
150 listsubtitles: Lists all available subtitles for the video
151 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
152 subtitleslangs: List of languages of the subtitles to download
153 keepvideo: Keep the video file after post-processing
154 daterange: A DateRange object, download only if the upload_date is in the range.
155 skip_download: Skip the actual download of the video file
156 cachedir: Location of the cache files in the filesystem.
157 False to disable filesystem cache.
158 noplaylist: Download single video instead of a playlist if in doubt.
159 age_limit: An integer representing the user's age in years.
160 Unsuitable videos for the given age are skipped.
161 min_views: An integer representing the minimum view count the video
162 must have in order to not be skipped.
163 Videos without view count information are always
164 downloaded. None for no limit.
165 max_views: An integer representing the maximum view count.
166 Videos that are more popular than that are not
167 downloaded.
168 Videos without view count information are always
169 downloaded. None for no limit.
170 download_archive: File name of a file where all downloads are recorded.
171 Videos already present in the file are not downloaded
172 again.
173 cookiefile: File name where cookies should be read from and dumped to.
174 nocheckcertificate:Do not verify SSL certificates
175 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
176 At the moment, this is only supported by YouTube.
177 proxy: URL of the proxy server to use
178 socket_timeout: Time to wait for unresponsive hosts, in seconds
179 bidi_workaround: Work around buggy terminals without bidirectional text
180 support, using fridibi
181 debug_printtraffic:Print out sent and received HTTP traffic
182 include_ads: Download ads as well
183 default_search: Prepend this string if an input url is not valid.
184 'auto' for elaborate guessing
185 encoding: Use this encoding instead of the system-specified.
186 extract_flat: Do not resolve URLs, return the immediate result.
187 Pass in 'in_playlist' to only show this behavior for
188 playlist items.
189 postprocessors: A list of dictionaries, each with an entry
190 * key: The name of the postprocessor. See
191 youtube_dl/postprocessor/__init__.py for a list.
192 as well as any further keyword arguments for the
193 postprocessor.
194 progress_hooks: A list of functions that get called on download
195 progress, with a dictionary with the entries
196 * filename: The final filename
197 * status: One of "downloading" and "finished"
198
199 The dict may also have some of the following entries:
200
201 * downloaded_bytes: Bytes on disk
202 * total_bytes: Size of the whole file, None if unknown
203 * tmpfilename: The filename we're currently writing to
204 * eta: The estimated time in seconds, None if unknown
205 * speed: The download speed in bytes/second, None if
206 unknown
207
208 Progress hooks are guaranteed to be called at least once
209 (with status "finished") if the download is successful.
210 merge_output_format: Extension to use when merging formats.
211 fixup: Automatically correct known faults of the file.
212 One of:
213 - "never": do nothing
214 - "warn": only emit a warning
215 - "detect_or_warn": check whether we can do anything
216 about it, warn otherwise
217 source_address: (Experimental) Client-side IP address to bind to.
218 call_home: Boolean, true iff we are allowed to contact the
219 youtube-dl servers for debugging.
220 sleep_interval: Number of seconds to sleep before each download.
221
222
223 The following parameters are not used by YoutubeDL itself, they are used by
224 the FileDownloader:
225 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
226 noresizebuffer, retries, continuedl, noprogress, consoletitle
227
228 The following options are used by the post processors:
229 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
230 otherwise prefer avconv.
231 exec_cmd: Arbitrary command to run after downloading
232 """
233
234 params = None
235 _ies = []
236 _pps = []
237 _download_retcode = None
238 _num_downloads = None
239 _screen_file = None
240
241 def __init__(self, params=None, auto_init=True):
242 """Create a FileDownloader object with the given options."""
243 if params is None:
244 params = {}
245 self._ies = []
246 self._ies_instances = {}
247 self._pps = []
248 self._progress_hooks = []
249 self._download_retcode = 0
250 self._num_downloads = 0
251 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
252 self._err_file = sys.stderr
253 self.params = params
254 self.cache = Cache(self)
255
256 if params.get('bidi_workaround', False):
257 try:
258 import pty
259 master, slave = pty.openpty()
260 width = get_term_width()
261 if width is None:
262 width_args = []
263 else:
264 width_args = ['-w', str(width)]
265 sp_kwargs = dict(
266 stdin=subprocess.PIPE,
267 stdout=slave,
268 stderr=self._err_file)
269 try:
270 self._output_process = subprocess.Popen(
271 ['bidiv'] + width_args, **sp_kwargs
272 )
273 except OSError:
274 self._output_process = subprocess.Popen(
275 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
276 self._output_channel = os.fdopen(master, 'rb')
277 except OSError as ose:
278 if ose.errno == 2:
279 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
280 else:
281 raise
282
283 if (sys.version_info >= (3,) and sys.platform != 'win32' and
284 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
285 and not params.get('restrictfilenames', False)):
286 # On Python 3, the Unicode filesystem API will throw errors (#1474)
287 self.report_warning(
288 'Assuming --restrict-filenames since file system encoding '
289 'cannot encode all characters. '
290 'Set the LC_ALL environment variable to fix this.')
291 self.params['restrictfilenames'] = True
292
293 if '%(stitle)s' in self.params.get('outtmpl', ''):
294 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
295
296 self._setup_opener()
297
298 if auto_init:
299 self.print_debug_header()
300 self.add_default_info_extractors()
301
302 for pp_def_raw in self.params.get('postprocessors', []):
303 pp_class = get_postprocessor(pp_def_raw['key'])
304 pp_def = dict(pp_def_raw)
305 del pp_def['key']
306 pp = pp_class(self, **compat_kwargs(pp_def))
307 self.add_post_processor(pp)
308
309 for ph in self.params.get('progress_hooks', []):
310 self.add_progress_hook(ph)
311
312 def warn_if_short_id(self, argv):
313 # short YouTube ID starting with dash?
314 idxs = [
315 i for i, a in enumerate(argv)
316 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
317 if idxs:
318 correct_argv = (
319 ['youtube-dl'] +
320 [a for i, a in enumerate(argv) if i not in idxs] +
321 ['--'] + [argv[i] for i in idxs]
322 )
323 self.report_warning(
324 'Long argument string detected. '
325 'Use -- to separate parameters and URLs, like this:\n%s\n' %
326 args_to_str(correct_argv))
327
328 def add_info_extractor(self, ie):
329 """Add an InfoExtractor object to the end of the list."""
330 self._ies.append(ie)
331 self._ies_instances[ie.ie_key()] = ie
332 ie.set_downloader(self)
333
334 def get_info_extractor(self, ie_key):
335 """
336 Get an instance of an IE with name ie_key, it will try to get one from
337 the _ies list, if there's no instance it will create a new one and add
338 it to the extractor list.
339 """
340 ie = self._ies_instances.get(ie_key)
341 if ie is None:
342 ie = get_info_extractor(ie_key)()
343 self.add_info_extractor(ie)
344 return ie
345
346 def add_default_info_extractors(self):
347 """
348 Add the InfoExtractors returned by gen_extractors to the end of the list
349 """
350 for ie in gen_extractors():
351 self.add_info_extractor(ie)
352
353 def add_post_processor(self, pp):
354 """Add a PostProcessor object to the end of the chain."""
355 self._pps.append(pp)
356 pp.set_downloader(self)
357
358 def add_progress_hook(self, ph):
359 """Add the progress hook (currently only for the file downloader)"""
360 self._progress_hooks.append(ph)
361
362 def _bidi_workaround(self, message):
363 if not hasattr(self, '_output_channel'):
364 return message
365
366 assert hasattr(self, '_output_process')
367 assert isinstance(message, compat_str)
368 line_count = message.count('\n') + 1
369 self._output_process.stdin.write((message + '\n').encode('utf-8'))
370 self._output_process.stdin.flush()
371 res = ''.join(self._output_channel.readline().decode('utf-8')
372 for _ in range(line_count))
373 return res[:-len('\n')]
374
375 def to_screen(self, message, skip_eol=False):
376 """Print message to stdout if not in quiet mode."""
377 return self.to_stdout(message, skip_eol, check_quiet=True)
378
379 def _write_string(self, s, out=None):
380 write_string(s, out=out, encoding=self.params.get('encoding'))
381
382 def to_stdout(self, message, skip_eol=False, check_quiet=False):
383 """Print message to stdout if not in quiet mode."""
384 if self.params.get('logger'):
385 self.params['logger'].debug(message)
386 elif not check_quiet or not self.params.get('quiet', False):
387 message = self._bidi_workaround(message)
388 terminator = ['\n', ''][skip_eol]
389 output = message + terminator
390
391 self._write_string(output, self._screen_file)
392
393 def to_stderr(self, message):
394 """Print message to stderr."""
395 assert isinstance(message, compat_str)
396 if self.params.get('logger'):
397 self.params['logger'].error(message)
398 else:
399 message = self._bidi_workaround(message)
400 output = message + '\n'
401 self._write_string(output, self._err_file)
402
403 def to_console_title(self, message):
404 if not self.params.get('consoletitle', False):
405 return
406 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
407 # c_wchar_p() might not be necessary if `message` is
408 # already of type unicode()
409 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
410 elif 'TERM' in os.environ:
411 self._write_string('\033]0;%s\007' % message, self._screen_file)
412
413 def save_console_title(self):
414 if not self.params.get('consoletitle', False):
415 return
416 if 'TERM' in os.environ:
417 # Save the title on stack
418 self._write_string('\033[22;0t', self._screen_file)
419
420 def restore_console_title(self):
421 if not self.params.get('consoletitle', False):
422 return
423 if 'TERM' in os.environ:
424 # Restore the title from stack
425 self._write_string('\033[23;0t', self._screen_file)
426
427 def __enter__(self):
428 self.save_console_title()
429 return self
430
431 def __exit__(self, *args):
432 self.restore_console_title()
433
434 if self.params.get('cookiefile') is not None:
435 self.cookiejar.save()
436
437 def trouble(self, message=None, tb=None):
438 """Determine action to take when a download problem appears.
439
440 Depending on if the downloader has been configured to ignore
441 download errors or not, this method may throw an exception or
442 not when errors are found, after printing the message.
443
444 tb, if given, is additional traceback information.
445 """
446 if message is not None:
447 self.to_stderr(message)
448 if self.params.get('verbose'):
449 if tb is None:
450 if sys.exc_info()[0]: # if .trouble has been called from an except block
451 tb = ''
452 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
453 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
454 tb += compat_str(traceback.format_exc())
455 else:
456 tb_data = traceback.format_list(traceback.extract_stack())
457 tb = ''.join(tb_data)
458 self.to_stderr(tb)
459 if not self.params.get('ignoreerrors', False):
460 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
461 exc_info = sys.exc_info()[1].exc_info
462 else:
463 exc_info = sys.exc_info()
464 raise DownloadError(message, exc_info)
465 self._download_retcode = 1
466
467 def report_warning(self, message):
468 '''
469 Print the message to stderr, it will be prefixed with 'WARNING:'
470 If stderr is a tty file the 'WARNING:' will be colored
471 '''
472 if self.params.get('logger') is not None:
473 self.params['logger'].warning(message)
474 else:
475 if self.params.get('no_warnings'):
476 return
477 if self._err_file.isatty() and os.name != 'nt':
478 _msg_header = '\033[0;33mWARNING:\033[0m'
479 else:
480 _msg_header = 'WARNING:'
481 warning_message = '%s %s' % (_msg_header, message)
482 self.to_stderr(warning_message)
483
484 def report_error(self, message, tb=None):
485 '''
486 Do the same as trouble, but prefixes the message with 'ERROR:', colored
487 in red if stderr is a tty file.
488 '''
489 if self._err_file.isatty() and os.name != 'nt':
490 _msg_header = '\033[0;31mERROR:\033[0m'
491 else:
492 _msg_header = 'ERROR:'
493 error_message = '%s %s' % (_msg_header, message)
494 self.trouble(error_message, tb)
495
496 def report_file_already_downloaded(self, file_name):
497 """Report file has already been fully downloaded."""
498 try:
499 self.to_screen('[download] %s has already been downloaded' % file_name)
500 except UnicodeEncodeError:
501 self.to_screen('[download] The file has already been downloaded')
502
503 def prepare_filename(self, info_dict):
504 """Generate the output filename."""
505 try:
506 template_dict = dict(info_dict)
507
508 template_dict['epoch'] = int(time.time())
509 autonumber_size = self.params.get('autonumber_size')
510 if autonumber_size is None:
511 autonumber_size = 5
512 autonumber_templ = '%0' + str(autonumber_size) + 'd'
513 template_dict['autonumber'] = autonumber_templ % self._num_downloads
514 if template_dict.get('playlist_index') is not None:
515 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
516 if template_dict.get('resolution') is None:
517 if template_dict.get('width') and template_dict.get('height'):
518 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
519 elif template_dict.get('height'):
520 template_dict['resolution'] = '%sp' % template_dict['height']
521 elif template_dict.get('width'):
522 template_dict['resolution'] = '?x%d' % template_dict['width']
523
524 sanitize = lambda k, v: sanitize_filename(
525 compat_str(v),
526 restricted=self.params.get('restrictfilenames'),
527 is_id=(k == 'id'))
528 template_dict = dict((k, sanitize(k, v))
529 for k, v in template_dict.items()
530 if v is not None)
531 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
532
533 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
534 tmpl = compat_expanduser(outtmpl)
535 filename = tmpl % template_dict
536 return filename
537 except ValueError as err:
538 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
539 return None
540
541 def _match_entry(self, info_dict):
542 """ Returns None iff the file should be downloaded """
543
544 video_title = info_dict.get('title', info_dict.get('id', 'video'))
545 if 'title' in info_dict:
546 # This can happen when we're just evaluating the playlist
547 title = info_dict['title']
548 matchtitle = self.params.get('matchtitle', False)
549 if matchtitle:
550 if not re.search(matchtitle, title, re.IGNORECASE):
551 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
552 rejecttitle = self.params.get('rejecttitle', False)
553 if rejecttitle:
554 if re.search(rejecttitle, title, re.IGNORECASE):
555 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
556 date = info_dict.get('upload_date', None)
557 if date is not None:
558 dateRange = self.params.get('daterange', DateRange())
559 if date not in dateRange:
560 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
561 view_count = info_dict.get('view_count', None)
562 if view_count is not None:
563 min_views = self.params.get('min_views')
564 if min_views is not None and view_count < min_views:
565 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
566 max_views = self.params.get('max_views')
567 if max_views is not None and view_count > max_views:
568 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
569 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
570 return 'Skipping "%s" because it is age restricted' % title
571 if self.in_download_archive(info_dict):
572 return '%s has already been recorded in archive' % video_title
573 return None
574
575 @staticmethod
576 def add_extra_info(info_dict, extra_info):
577 '''Set the keys from extra_info in info dict if they are missing'''
578 for key, value in extra_info.items():
579 info_dict.setdefault(key, value)
580
581 def extract_info(self, url, download=True, ie_key=None, extra_info={},
582 process=True):
583 '''
584 Returns a list with a dictionary for each video we find.
585 If 'download', also downloads the videos.
586 extra_info is a dict containing the extra values to add to each result
587 '''
588
589 if ie_key:
590 ies = [self.get_info_extractor(ie_key)]
591 else:
592 ies = self._ies
593
594 for ie in ies:
595 if not ie.suitable(url):
596 continue
597
598 if not ie.working():
599 self.report_warning('The program functionality for this site has been marked as broken, '
600 'and will probably not work.')
601
602 try:
603 ie_result = ie.extract(url)
604 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
605 break
606 if isinstance(ie_result, list):
607 # Backwards compatibility: old IE result format
608 ie_result = {
609 '_type': 'compat_list',
610 'entries': ie_result,
611 }
612 self.add_default_extra_info(ie_result, ie, url)
613 if process:
614 return self.process_ie_result(ie_result, download, extra_info)
615 else:
616 return ie_result
617 except ExtractorError as de: # An error we somewhat expected
618 self.report_error(compat_str(de), de.format_traceback())
619 break
620 except MaxDownloadsReached:
621 raise
622 except Exception as e:
623 if self.params.get('ignoreerrors', False):
624 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
625 break
626 else:
627 raise
628 else:
629 self.report_error('no suitable InfoExtractor for URL %s' % url)
630
631 def add_default_extra_info(self, ie_result, ie, url):
632 self.add_extra_info(ie_result, {
633 'extractor': ie.IE_NAME,
634 'webpage_url': url,
635 'webpage_url_basename': url_basename(url),
636 'extractor_key': ie.ie_key(),
637 })
638
639 def process_ie_result(self, ie_result, download=True, extra_info={}):
640 """
641 Take the result of the ie(may be modified) and resolve all unresolved
642 references (URLs, playlist items).
643
644 It will also download the videos if 'download'.
645 Returns the resolved ie_result.
646 """
647
648 result_type = ie_result.get('_type', 'video')
649
650 if result_type in ('url', 'url_transparent'):
651 extract_flat = self.params.get('extract_flat', False)
652 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
653 extract_flat is True):
654 if self.params.get('forcejson', False):
655 self.to_stdout(json.dumps(ie_result))
656 return ie_result
657
658 if result_type == 'video':
659 self.add_extra_info(ie_result, extra_info)
660 return self.process_video_result(ie_result, download=download)
661 elif result_type == 'url':
662 # We have to add extra_info to the results because it may be
663 # contained in a playlist
664 return self.extract_info(ie_result['url'],
665 download,
666 ie_key=ie_result.get('ie_key'),
667 extra_info=extra_info)
668 elif result_type == 'url_transparent':
669 # Use the information from the embedding page
670 info = self.extract_info(
671 ie_result['url'], ie_key=ie_result.get('ie_key'),
672 extra_info=extra_info, download=False, process=False)
673
674 force_properties = dict(
675 (k, v) for k, v in ie_result.items() if v is not None)
676 for f in ('_type', 'url'):
677 if f in force_properties:
678 del force_properties[f]
679 new_result = info.copy()
680 new_result.update(force_properties)
681
682 assert new_result.get('_type') != 'url_transparent'
683
684 return self.process_ie_result(
685 new_result, download=download, extra_info=extra_info)
686 elif result_type == 'playlist' or result_type == 'multi_video':
687 # We process each entry in the playlist
688 playlist = ie_result.get('title', None) or ie_result.get('id', None)
689 self.to_screen('[download] Downloading playlist: %s' % playlist)
690
691 playlist_results = []
692
693 playliststart = self.params.get('playliststart', 1) - 1
694 playlistend = self.params.get('playlistend', None)
695 # For backwards compatibility, interpret -1 as whole list
696 if playlistend == -1:
697 playlistend = None
698
699 ie_entries = ie_result['entries']
700 if isinstance(ie_entries, list):
701 n_all_entries = len(ie_entries)
702 entries = ie_entries[playliststart:playlistend]
703 n_entries = len(entries)
704 self.to_screen(
705 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
706 (ie_result['extractor'], playlist, n_all_entries, n_entries))
707 elif isinstance(ie_entries, PagedList):
708 entries = ie_entries.getslice(
709 playliststart, playlistend)
710 n_entries = len(entries)
711 self.to_screen(
712 "[%s] playlist %s: Downloading %d videos" %
713 (ie_result['extractor'], playlist, n_entries))
714 else: # iterable
715 entries = list(itertools.islice(
716 ie_entries, playliststart, playlistend))
717 n_entries = len(entries)
718 self.to_screen(
719 "[%s] playlist %s: Downloading %d videos" %
720 (ie_result['extractor'], playlist, n_entries))
721
722 if self.params.get('playlistreverse', False):
723 entries = entries[::-1]
724
725 for i, entry in enumerate(entries, 1):
726 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
727 extra = {
728 'n_entries': n_entries,
729 'playlist': playlist,
730 'playlist_id': ie_result.get('id'),
731 'playlist_title': ie_result.get('title'),
732 'playlist_index': i + playliststart,
733 'extractor': ie_result['extractor'],
734 'webpage_url': ie_result['webpage_url'],
735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
736 'extractor_key': ie_result['extractor_key'],
737 }
738
739 reason = self._match_entry(entry)
740 if reason is not None:
741 self.to_screen('[download] ' + reason)
742 continue
743
744 entry_result = self.process_ie_result(entry,
745 download=download,
746 extra_info=extra)
747 playlist_results.append(entry_result)
748 ie_result['entries'] = playlist_results
749 return ie_result
750 elif result_type == 'compat_list':
751 self.report_warning(
752 'Extractor %s returned a compat_list result. '
753 'It needs to be updated.' % ie_result.get('extractor'))
754
755 def _fixup(r):
756 self.add_extra_info(
757 r,
758 {
759 'extractor': ie_result['extractor'],
760 'webpage_url': ie_result['webpage_url'],
761 'webpage_url_basename': url_basename(ie_result['webpage_url']),
762 'extractor_key': ie_result['extractor_key'],
763 }
764 )
765 return r
766 ie_result['entries'] = [
767 self.process_ie_result(_fixup(r), download, extra_info)
768 for r in ie_result['entries']
769 ]
770 return ie_result
771 else:
772 raise Exception('Invalid result type: %s' % result_type)
773
774 def _apply_format_filter(self, format_spec, available_formats):
775 " Returns a tuple of the remaining format_spec and filtered formats "
776
777 OPERATORS = {
778 '<': operator.lt,
779 '<=': operator.le,
780 '>': operator.gt,
781 '>=': operator.ge,
782 '=': operator.eq,
783 '!=': operator.ne,
784 }
785 operator_rex = re.compile(r'''(?x)\s*\[
786 (?P<key>width|height|tbr|abr|vbr|filesize)
787 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
788 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
789 \]$
790 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
791 m = operator_rex.search(format_spec)
792 if not m:
793 raise ValueError('Invalid format specification %r' % format_spec)
794
795 try:
796 comparison_value = int(m.group('value'))
797 except ValueError:
798 comparison_value = parse_filesize(m.group('value'))
799 if comparison_value is None:
800 comparison_value = parse_filesize(m.group('value') + 'B')
801 if comparison_value is None:
802 raise ValueError(
803 'Invalid value %r in format specification %r' % (
804 m.group('value'), format_spec))
805 op = OPERATORS[m.group('op')]
806
807 def _filter(f):
808 actual_value = f.get(m.group('key'))
809 if actual_value is None:
810 return m.group('none_inclusive')
811 return op(actual_value, comparison_value)
812 new_formats = [f for f in available_formats if _filter(f)]
813
814 new_format_spec = format_spec[:-len(m.group(0))]
815 if not new_format_spec:
816 new_format_spec = 'best'
817
818 return (new_format_spec, new_formats)
819
820 def select_format(self, format_spec, available_formats):
821 while format_spec.endswith(']'):
822 format_spec, available_formats = self._apply_format_filter(
823 format_spec, available_formats)
824 if not available_formats:
825 return None
826
827 if format_spec == 'best' or format_spec is None:
828 return available_formats[-1]
829 elif format_spec == 'worst':
830 return available_formats[0]
831 elif format_spec == 'bestaudio':
832 audio_formats = [
833 f for f in available_formats
834 if f.get('vcodec') == 'none']
835 if audio_formats:
836 return audio_formats[-1]
837 elif format_spec == 'worstaudio':
838 audio_formats = [
839 f for f in available_formats
840 if f.get('vcodec') == 'none']
841 if audio_formats:
842 return audio_formats[0]
843 elif format_spec == 'bestvideo':
844 video_formats = [
845 f for f in available_formats
846 if f.get('acodec') == 'none']
847 if video_formats:
848 return video_formats[-1]
849 elif format_spec == 'worstvideo':
850 video_formats = [
851 f for f in available_formats
852 if f.get('acodec') == 'none']
853 if video_formats:
854 return video_formats[0]
855 else:
856 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
857 if format_spec in extensions:
858 filter_f = lambda f: f['ext'] == format_spec
859 else:
860 filter_f = lambda f: f['format_id'] == format_spec
861 matches = list(filter(filter_f, available_formats))
862 if matches:
863 return matches[-1]
864 return None
865
866 def process_video_result(self, info_dict, download=True):
867 assert info_dict.get('_type', 'video') == 'video'
868
869 if 'id' not in info_dict:
870 raise ExtractorError('Missing "id" field in extractor result')
871 if 'title' not in info_dict:
872 raise ExtractorError('Missing "title" field in extractor result')
873
874 if 'playlist' not in info_dict:
875 # It isn't part of a playlist
876 info_dict['playlist'] = None
877 info_dict['playlist_index'] = None
878
879 thumbnails = info_dict.get('thumbnails')
880 if thumbnails:
881 thumbnails.sort(key=lambda t: (
882 t.get('width'), t.get('height'), t.get('url')))
883 for t in thumbnails:
884 if 'width' in t and 'height' in t:
885 t['resolution'] = '%dx%d' % (t['width'], t['height'])
886
887 if thumbnails and 'thumbnail' not in info_dict:
888 info_dict['thumbnail'] = thumbnails[-1]['url']
889
890 if 'display_id' not in info_dict and 'id' in info_dict:
891 info_dict['display_id'] = info_dict['id']
892
893 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
894 # Working around negative timestamps in Windows
895 # (see http://bugs.python.org/issue1646728)
896 if info_dict['timestamp'] < 0 and os.name == 'nt':
897 info_dict['timestamp'] = 0
898 upload_date = datetime.datetime.utcfromtimestamp(
899 info_dict['timestamp'])
900 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
901
902 # This extractors handle format selection themselves
903 if info_dict['extractor'] in ['Youku']:
904 if download:
905 self.process_info(info_dict)
906 return info_dict
907
908 # We now pick which formats have to be downloaded
909 if info_dict.get('formats') is None:
910 # There's only one format available
911 formats = [info_dict]
912 else:
913 formats = info_dict['formats']
914
915 if not formats:
916 raise ExtractorError('No video formats found!')
917
918 # We check that all the formats have the format and format_id fields
919 for i, format in enumerate(formats):
920 if 'url' not in format:
921 raise ExtractorError('Missing "url" key in result (index %d)' % i)
922
923 if format.get('format_id') is None:
924 format['format_id'] = compat_str(i)
925 if format.get('format') is None:
926 format['format'] = '{id} - {res}{note}'.format(
927 id=format['format_id'],
928 res=self.format_resolution(format),
929 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
930 )
931 # Automatically determine file extension if missing
932 if 'ext' not in format:
933 format['ext'] = determine_ext(format['url']).lower()
934
935 format_limit = self.params.get('format_limit', None)
936 if format_limit:
937 formats = list(takewhile_inclusive(
938 lambda f: f['format_id'] != format_limit, formats
939 ))
940
941 # TODO Central sorting goes here
942
943 if formats[0] is not info_dict:
944 # only set the 'formats' fields if the original info_dict list them
945 # otherwise we end up with a circular reference, the first (and unique)
946 # element in the 'formats' field in info_dict is info_dict itself,
947 # wich can't be exported to json
948 info_dict['formats'] = formats
949 if self.params.get('listformats', None):
950 self.list_formats(info_dict)
951 return
952
953 req_format = self.params.get('format')
954 if req_format is None:
955 req_format = 'best'
956 formats_to_download = []
957 # The -1 is for supporting YoutubeIE
958 if req_format in ('-1', 'all'):
959 formats_to_download = formats
960 else:
961 for rfstr in req_format.split(','):
962 # We can accept formats requested in the format: 34/5/best, we pick
963 # the first that is available, starting from left
964 req_formats = rfstr.split('/')
965 for rf in req_formats:
966 if re.match(r'.+?\+.+?', rf) is not None:
967 # Two formats have been requested like '137+139'
968 format_1, format_2 = rf.split('+')
969 formats_info = (self.select_format(format_1, formats),
970 self.select_format(format_2, formats))
971 if all(formats_info):
972 # The first format must contain the video and the
973 # second the audio
974 if formats_info[0].get('vcodec') == 'none':
975 self.report_error('The first format must '
976 'contain the video, try using '
977 '"-f %s+%s"' % (format_2, format_1))
978 return
979 output_ext = (
980 formats_info[0]['ext']
981 if self.params.get('merge_output_format') is None
982 else self.params['merge_output_format'])
983 selected_format = {
984 'requested_formats': formats_info,
985 'format': rf,
986 'ext': formats_info[0]['ext'],
987 'width': formats_info[0].get('width'),
988 'height': formats_info[0].get('height'),
989 'resolution': formats_info[0].get('resolution'),
990 'fps': formats_info[0].get('fps'),
991 'vcodec': formats_info[0].get('vcodec'),
992 'vbr': formats_info[0].get('vbr'),
993 'stretched_ratio': formats_info[0].get('stretched_ratio'),
994 'acodec': formats_info[1].get('acodec'),
995 'abr': formats_info[1].get('abr'),
996 'ext': output_ext,
997 }
998 else:
999 selected_format = None
1000 else:
1001 selected_format = self.select_format(rf, formats)
1002 if selected_format is not None:
1003 formats_to_download.append(selected_format)
1004 break
1005 if not formats_to_download:
1006 raise ExtractorError('requested format not available',
1007 expected=True)
1008
1009 if download:
1010 if len(formats_to_download) > 1:
1011 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1012 for format in formats_to_download:
1013 new_info = dict(info_dict)
1014 new_info.update(format)
1015 self.process_info(new_info)
1016 # We update the info dict with the best quality format (backwards compatibility)
1017 info_dict.update(formats_to_download[-1])
1018 return info_dict
1019
1020 def process_info(self, info_dict):
1021 """Process a single resolved IE result."""
1022
1023 assert info_dict.get('_type', 'video') == 'video'
1024
1025 max_downloads = self.params.get('max_downloads')
1026 if max_downloads is not None:
1027 if self._num_downloads >= int(max_downloads):
1028 raise MaxDownloadsReached()
1029
1030 info_dict['fulltitle'] = info_dict['title']
1031 if len(info_dict['title']) > 200:
1032 info_dict['title'] = info_dict['title'][:197] + '...'
1033
1034 # Keep for backwards compatibility
1035 info_dict['stitle'] = info_dict['title']
1036
1037 if 'format' not in info_dict:
1038 info_dict['format'] = info_dict['ext']
1039
1040 reason = self._match_entry(info_dict)
1041 if reason is not None:
1042 self.to_screen('[download] ' + reason)
1043 return
1044
1045 self._num_downloads += 1
1046
1047 filename = self.prepare_filename(info_dict)
1048
1049 # Forced printings
1050 if self.params.get('forcetitle', False):
1051 self.to_stdout(info_dict['fulltitle'])
1052 if self.params.get('forceid', False):
1053 self.to_stdout(info_dict['id'])
1054 if self.params.get('forceurl', False):
1055 if info_dict.get('requested_formats') is not None:
1056 for f in info_dict['requested_formats']:
1057 self.to_stdout(f['url'] + f.get('play_path', ''))
1058 else:
1059 # For RTMP URLs, also include the playpath
1060 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1061 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1062 self.to_stdout(info_dict['thumbnail'])
1063 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1064 self.to_stdout(info_dict['description'])
1065 if self.params.get('forcefilename', False) and filename is not None:
1066 self.to_stdout(filename)
1067 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1068 self.to_stdout(formatSeconds(info_dict['duration']))
1069 if self.params.get('forceformat', False):
1070 self.to_stdout(info_dict['format'])
1071 if self.params.get('forcejson', False):
1072 info_dict['_filename'] = filename
1073 self.to_stdout(json.dumps(info_dict))
1074 if self.params.get('dump_single_json', False):
1075 info_dict['_filename'] = filename
1076
1077 # Do nothing else if in simulate mode
1078 if self.params.get('simulate', False):
1079 return
1080
1081 if filename is None:
1082 return
1083
1084 try:
1085 dn = os.path.dirname(encodeFilename(filename))
1086 if dn and not os.path.exists(dn):
1087 os.makedirs(dn)
1088 except (OSError, IOError) as err:
1089 self.report_error('unable to create directory ' + compat_str(err))
1090 return
1091
1092 if self.params.get('writedescription', False):
1093 descfn = filename + '.description'
1094 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1095 self.to_screen('[info] Video description is already present')
1096 elif info_dict.get('description') is None:
1097 self.report_warning('There\'s no description to write.')
1098 else:
1099 try:
1100 self.to_screen('[info] Writing video description to: ' + descfn)
1101 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1102 descfile.write(info_dict['description'])
1103 except (OSError, IOError):
1104 self.report_error('Cannot write description file ' + descfn)
1105 return
1106
1107 if self.params.get('writeannotations', False):
1108 annofn = filename + '.annotations.xml'
1109 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1110 self.to_screen('[info] Video annotations are already present')
1111 else:
1112 try:
1113 self.to_screen('[info] Writing video annotations to: ' + annofn)
1114 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1115 annofile.write(info_dict['annotations'])
1116 except (KeyError, TypeError):
1117 self.report_warning('There are no annotations to write.')
1118 except (OSError, IOError):
1119 self.report_error('Cannot write annotations file: ' + annofn)
1120 return
1121
1122 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1123 self.params.get('writeautomaticsub')])
1124
1125 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1126 # subtitles download errors are already managed as troubles in relevant IE
1127 # that way it will silently go on when used with unsupporting IE
1128 subtitles = info_dict['subtitles']
1129 sub_format = self.params.get('subtitlesformat', 'srt')
1130 for sub_lang in subtitles.keys():
1131 sub = subtitles[sub_lang]
1132 if sub is None:
1133 continue
1134 try:
1135 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1136 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1137 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1138 else:
1139 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1140 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1141 subfile.write(sub)
1142 except (OSError, IOError):
1143 self.report_error('Cannot write subtitles file ' + sub_filename)
1144 return
1145
1146 if self.params.get('writeinfojson', False):
1147 infofn = os.path.splitext(filename)[0] + '.info.json'
1148 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1149 self.to_screen('[info] Video description metadata is already present')
1150 else:
1151 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1152 try:
1153 write_json_file(info_dict, infofn)
1154 except (OSError, IOError):
1155 self.report_error('Cannot write metadata to JSON file ' + infofn)
1156 return
1157
1158 if self.params.get('writethumbnail', False):
1159 if info_dict.get('thumbnail') is not None:
1160 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1161 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1162 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1163 self.to_screen('[%s] %s: Thumbnail is already present' %
1164 (info_dict['extractor'], info_dict['id']))
1165 else:
1166 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1167 (info_dict['extractor'], info_dict['id']))
1168 try:
1169 uf = self.urlopen(info_dict['thumbnail'])
1170 with open(thumb_filename, 'wb') as thumbf:
1171 shutil.copyfileobj(uf, thumbf)
1172 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1173 (info_dict['extractor'], info_dict['id'], thumb_filename))
1174 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1175 self.report_warning('Unable to download thumbnail "%s": %s' %
1176 (info_dict['thumbnail'], compat_str(err)))
1177
1178 if not self.params.get('skip_download', False):
1179 try:
1180 def dl(name, info):
1181 fd = get_suitable_downloader(info)(self, self.params)
1182 for ph in self._progress_hooks:
1183 fd.add_progress_hook(ph)
1184 if self.params.get('verbose'):
1185 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1186 return fd.download(name, info)
1187 if info_dict.get('requested_formats') is not None:
1188 downloaded = []
1189 success = True
1190 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1191 if not merger._executable:
1192 postprocessors = []
1193 self.report_warning('You have requested multiple '
1194 'formats but ffmpeg or avconv are not installed.'
1195 ' The formats won\'t be merged')
1196 else:
1197 postprocessors = [merger]
1198 for f in info_dict['requested_formats']:
1199 new_info = dict(info_dict)
1200 new_info.update(f)
1201 fname = self.prepare_filename(new_info)
1202 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1203 downloaded.append(fname)
1204 partial_success = dl(fname, new_info)
1205 success = success and partial_success
1206 info_dict['__postprocessors'] = postprocessors
1207 info_dict['__files_to_merge'] = downloaded
1208 else:
1209 # Just a single file
1210 success = dl(filename, info_dict)
1211 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1212 self.report_error('unable to download video data: %s' % str(err))
1213 return
1214 except (OSError, IOError) as err:
1215 raise UnavailableVideoError(err)
1216 except (ContentTooShortError, ) as err:
1217 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1218 return
1219
1220 if success:
1221 # Fixup content
1222 stretched_ratio = info_dict.get('stretched_ratio')
1223 if stretched_ratio is not None and stretched_ratio != 1:
1224 fixup_policy = self.params.get('fixup')
1225 if fixup_policy is None:
1226 fixup_policy = 'detect_or_warn'
1227 if fixup_policy == 'warn':
1228 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1229 info_dict['id'], stretched_ratio))
1230 elif fixup_policy == 'detect_or_warn':
1231 stretched_pp = FFmpegFixupStretchedPP(self)
1232 if stretched_pp.available:
1233 info_dict.setdefault('__postprocessors', [])
1234 info_dict['__postprocessors'].append(stretched_pp)
1235 else:
1236 self.report_warning(
1237 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1238 info_dict['id'], stretched_ratio))
1239 else:
1240 assert fixup_policy == 'ignore'
1241
1242 try:
1243 self.post_process(filename, info_dict)
1244 except (PostProcessingError) as err:
1245 self.report_error('postprocessing: %s' % str(err))
1246 return
1247 self.record_download_archive(info_dict)
1248
1249 def download(self, url_list):
1250 """Download a given list of URLs."""
1251 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1252 if (len(url_list) > 1 and
1253 '%' not in outtmpl
1254 and self.params.get('max_downloads') != 1):
1255 raise SameFileError(outtmpl)
1256
1257 for url in url_list:
1258 try:
1259 # It also downloads the videos
1260 res = self.extract_info(url)
1261 except UnavailableVideoError:
1262 self.report_error('unable to download video')
1263 except MaxDownloadsReached:
1264 self.to_screen('[info] Maximum number of downloaded files reached.')
1265 raise
1266 else:
1267 if self.params.get('dump_single_json', False):
1268 self.to_stdout(json.dumps(res))
1269
1270 return self._download_retcode
1271
1272 def download_with_info_file(self, info_filename):
1273 with io.open(info_filename, 'r', encoding='utf-8') as f:
1274 info = json.load(f)
1275 try:
1276 self.process_ie_result(info, download=True)
1277 except DownloadError:
1278 webpage_url = info.get('webpage_url')
1279 if webpage_url is not None:
1280 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1281 return self.download([webpage_url])
1282 else:
1283 raise
1284 return self._download_retcode
1285
1286 def post_process(self, filename, ie_info):
1287 """Run all the postprocessors on the given file."""
1288 info = dict(ie_info)
1289 info['filepath'] = filename
1290 pps_chain = []
1291 if ie_info.get('__postprocessors') is not None:
1292 pps_chain.extend(ie_info['__postprocessors'])
1293 pps_chain.extend(self._pps)
1294 for pp in pps_chain:
1295 keep_video = None
1296 old_filename = info['filepath']
1297 try:
1298 keep_video_wish, info = pp.run(info)
1299 if keep_video_wish is not None:
1300 if keep_video_wish:
1301 keep_video = keep_video_wish
1302 elif keep_video is None:
1303 # No clear decision yet, let IE decide
1304 keep_video = keep_video_wish
1305 except PostProcessingError as e:
1306 self.report_error(e.msg)
1307 if keep_video is False and not self.params.get('keepvideo', False):
1308 try:
1309 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1310 os.remove(encodeFilename(old_filename))
1311 except (IOError, OSError):
1312 self.report_warning('Unable to remove downloaded video file')
1313
1314 def _make_archive_id(self, info_dict):
1315 # Future-proof against any change in case
1316 # and backwards compatibility with prior versions
1317 extractor = info_dict.get('extractor_key')
1318 if extractor is None:
1319 if 'id' in info_dict:
1320 extractor = info_dict.get('ie_key') # key in a playlist
1321 if extractor is None:
1322 return None # Incomplete video information
1323 return extractor.lower() + ' ' + info_dict['id']
1324
1325 def in_download_archive(self, info_dict):
1326 fn = self.params.get('download_archive')
1327 if fn is None:
1328 return False
1329
1330 vid_id = self._make_archive_id(info_dict)
1331 if vid_id is None:
1332 return False # Incomplete video information
1333
1334 try:
1335 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1336 for line in archive_file:
1337 if line.strip() == vid_id:
1338 return True
1339 except IOError as ioe:
1340 if ioe.errno != errno.ENOENT:
1341 raise
1342 return False
1343
1344 def record_download_archive(self, info_dict):
1345 fn = self.params.get('download_archive')
1346 if fn is None:
1347 return
1348 vid_id = self._make_archive_id(info_dict)
1349 assert vid_id
1350 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1351 archive_file.write(vid_id + '\n')
1352
1353 @staticmethod
1354 def format_resolution(format, default='unknown'):
1355 if format.get('vcodec') == 'none':
1356 return 'audio only'
1357 if format.get('resolution') is not None:
1358 return format['resolution']
1359 if format.get('height') is not None:
1360 if format.get('width') is not None:
1361 res = '%sx%s' % (format['width'], format['height'])
1362 else:
1363 res = '%sp' % format['height']
1364 elif format.get('width') is not None:
1365 res = '?x%d' % format['width']
1366 else:
1367 res = default
1368 return res
1369
1370 def _format_note(self, fdict):
1371 res = ''
1372 if fdict.get('ext') in ['f4f', 'f4m']:
1373 res += '(unsupported) '
1374 if fdict.get('format_note') is not None:
1375 res += fdict['format_note'] + ' '
1376 if fdict.get('tbr') is not None:
1377 res += '%4dk ' % fdict['tbr']
1378 if fdict.get('container') is not None:
1379 if res:
1380 res += ', '
1381 res += '%s container' % fdict['container']
1382 if (fdict.get('vcodec') is not None and
1383 fdict.get('vcodec') != 'none'):
1384 if res:
1385 res += ', '
1386 res += fdict['vcodec']
1387 if fdict.get('vbr') is not None:
1388 res += '@'
1389 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1390 res += 'video@'
1391 if fdict.get('vbr') is not None:
1392 res += '%4dk' % fdict['vbr']
1393 if fdict.get('fps') is not None:
1394 res += ', %sfps' % fdict['fps']
1395 if fdict.get('acodec') is not None:
1396 if res:
1397 res += ', '
1398 if fdict['acodec'] == 'none':
1399 res += 'video only'
1400 else:
1401 res += '%-5s' % fdict['acodec']
1402 elif fdict.get('abr') is not None:
1403 if res:
1404 res += ', '
1405 res += 'audio'
1406 if fdict.get('abr') is not None:
1407 res += '@%3dk' % fdict['abr']
1408 if fdict.get('asr') is not None:
1409 res += ' (%5dHz)' % fdict['asr']
1410 if fdict.get('filesize') is not None:
1411 if res:
1412 res += ', '
1413 res += format_bytes(fdict['filesize'])
1414 elif fdict.get('filesize_approx') is not None:
1415 if res:
1416 res += ', '
1417 res += '~' + format_bytes(fdict['filesize_approx'])
1418 return res
1419
1420 def list_formats(self, info_dict):
1421 def line(format, idlen=20):
1422 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1423 format['format_id'],
1424 format['ext'],
1425 self.format_resolution(format),
1426 self._format_note(format),
1427 ))
1428
1429 formats = info_dict.get('formats', [info_dict])
1430 idlen = max(len('format code'),
1431 max(len(f['format_id']) for f in formats))
1432 formats_s = [
1433 line(f, idlen) for f in formats
1434 if f.get('preference') is None or f['preference'] >= -1000]
1435 if len(formats) > 1:
1436 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1437 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1438
1439 header_line = line({
1440 'format_id': 'format code', 'ext': 'extension',
1441 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1442 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1443 (info_dict['id'], header_line, '\n'.join(formats_s)))
1444
1445 def urlopen(self, req):
1446 """ Start an HTTP download """
1447
1448 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1449 # always respected by websites, some tend to give out URLs with non percent-encoded
1450 # non-ASCII characters (see telemb.py, ard.py [#3412])
1451 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1452 # To work around aforementioned issue we will replace request's original URL with
1453 # percent-encoded one
1454 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1455 url = req if req_is_string else req.get_full_url()
1456 url_escaped = escape_url(url)
1457
1458 # Substitute URL if any change after escaping
1459 if url != url_escaped:
1460 if req_is_string:
1461 req = url_escaped
1462 else:
1463 req = compat_urllib_request.Request(
1464 url_escaped, data=req.data, headers=req.headers,
1465 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1466
1467 return self._opener.open(req, timeout=self._socket_timeout)
1468
1469 def print_debug_header(self):
1470 if not self.params.get('verbose'):
1471 return
1472
1473 if type('') is not compat_str:
1474 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1475 self.report_warning(
1476 'Your Python is broken! Update to a newer and supported version')
1477
1478 stdout_encoding = getattr(
1479 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1480 encoding_str = (
1481 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1482 locale.getpreferredencoding(),
1483 sys.getfilesystemencoding(),
1484 stdout_encoding,
1485 self.get_encoding()))
1486 write_string(encoding_str, encoding=None)
1487
1488 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1489 try:
1490 sp = subprocess.Popen(
1491 ['git', 'rev-parse', '--short', 'HEAD'],
1492 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1493 cwd=os.path.dirname(os.path.abspath(__file__)))
1494 out, err = sp.communicate()
1495 out = out.decode().strip()
1496 if re.match('[0-9a-f]+', out):
1497 self._write_string('[debug] Git HEAD: ' + out + '\n')
1498 except:
1499 try:
1500 sys.exc_clear()
1501 except:
1502 pass
1503 self._write_string('[debug] Python version %s - %s\n' % (
1504 platform.python_version(), platform_name()))
1505
1506 exe_versions = FFmpegPostProcessor.get_versions()
1507 exe_versions['rtmpdump'] = rtmpdump_version()
1508 exe_str = ', '.join(
1509 '%s %s' % (exe, v)
1510 for exe, v in sorted(exe_versions.items())
1511 if v
1512 )
1513 if not exe_str:
1514 exe_str = 'none'
1515 self._write_string('[debug] exe versions: %s\n' % exe_str)
1516
1517 proxy_map = {}
1518 for handler in self._opener.handlers:
1519 if hasattr(handler, 'proxies'):
1520 proxy_map.update(handler.proxies)
1521 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1522
1523 if self.params.get('call_home', False):
1524 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1525 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1526 latest_version = self.urlopen(
1527 'https://yt-dl.org/latest/version').read().decode('utf-8')
1528 if version_tuple(latest_version) > version_tuple(__version__):
1529 self.report_warning(
1530 'You are using an outdated version (newest version: %s)! '
1531 'See https://yt-dl.org/update if you need help updating.' %
1532 latest_version)
1533
1534 def _setup_opener(self):
1535 timeout_val = self.params.get('socket_timeout')
1536 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1537
1538 opts_cookiefile = self.params.get('cookiefile')
1539 opts_proxy = self.params.get('proxy')
1540
1541 if opts_cookiefile is None:
1542 self.cookiejar = compat_cookiejar.CookieJar()
1543 else:
1544 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1545 opts_cookiefile)
1546 if os.access(opts_cookiefile, os.R_OK):
1547 self.cookiejar.load()
1548
1549 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1550 self.cookiejar)
1551 if opts_proxy is not None:
1552 if opts_proxy == '':
1553 proxies = {}
1554 else:
1555 proxies = {'http': opts_proxy, 'https': opts_proxy}
1556 else:
1557 proxies = compat_urllib_request.getproxies()
1558 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1559 if 'http' in proxies and 'https' not in proxies:
1560 proxies['https'] = proxies['http']
1561 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1562
1563 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1564 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1565 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1566 opener = compat_urllib_request.build_opener(
1567 https_handler, proxy_handler, cookie_processor, ydlh)
1568 # Delete the default user-agent header, which would otherwise apply in
1569 # cases where our custom HTTP handler doesn't come into play
1570 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1571 opener.addheaders = []
1572 self._opener = opener
1573
1574 def encode(self, s):
1575 if isinstance(s, bytes):
1576 return s # Already encoded
1577
1578 try:
1579 return s.encode(self.get_encoding())
1580 except UnicodeEncodeError as err:
1581 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1582 raise
1583
1584 def get_encoding(self):
1585 encoding = self.params.get('encoding')
1586 if encoding is None:
1587 encoding = preferredencoding()
1588 return encoding