]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[YoutubeDL] Add --playlist-items option (Fixes #2662)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25 import ctypes
26
27 from .compat import (
28 compat_cookiejar,
29 compat_expanduser,
30 compat_http_client,
31 compat_kwargs,
32 compat_str,
33 compat_urllib_error,
34 compat_urllib_request,
35 )
36 from .utils import (
37 escape_url,
38 ContentTooShortError,
39 date_from_str,
40 DateRange,
41 DEFAULT_OUTTMPL,
42 determine_ext,
43 DownloadError,
44 encodeFilename,
45 ExtractorError,
46 format_bytes,
47 formatSeconds,
48 get_term_width,
49 locked_file,
50 make_HTTPS_handler,
51 MaxDownloadsReached,
52 PagedList,
53 parse_filesize,
54 PostProcessingError,
55 platform_name,
56 preferredencoding,
57 render_table,
58 SameFileError,
59 sanitize_filename,
60 std_headers,
61 subtitles_filename,
62 takewhile_inclusive,
63 UnavailableVideoError,
64 url_basename,
65 version_tuple,
66 write_json_file,
67 write_string,
68 YoutubeDLHandler,
69 prepend_extension,
70 args_to_str,
71 age_restricted,
72 )
73 from .cache import Cache
74 from .extractor import get_info_extractor, gen_extractors
75 from .downloader import get_suitable_downloader
76 from .downloader.rtmp import rtmpdump_version
77 from .postprocessor import (
78 FFmpegFixupM4aPP,
79 FFmpegFixupStretchedPP,
80 FFmpegMergerPP,
81 FFmpegPostProcessor,
82 get_postprocessor,
83 )
84 from .version import __version__
85
86
87 class YoutubeDL(object):
88 """YoutubeDL class.
89
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
96
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
104
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
111
112 Available options:
113
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
116 videopassword: Password for acces a video.
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
120 no_warnings: Do not print out anything for warnings.
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
127 forceduration: Force printing duration.
128 forcejson: Force printing info_dict as JSON.
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
131 simulate: Do not download the video files.
132 format: Video format code. See options.py for more information.
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
140 playlist_items: Specific indices of playlist to download.
141 playlistreverse: Download playlist items in reverse order.
142 matchtitle: Download only matching titles.
143 rejecttitle: Reject downloads for matching titles.
144 logger: Log messages to a logging.Logger instance.
145 logtostderr: Log messages to stderr instead of stdout.
146 writedescription: Write the video description to a .description file
147 writeinfojson: Write the video description to a .info.json file
148 writeannotations: Write the video annotations to a .annotations.xml file
149 writethumbnail: Write the thumbnail image to a file
150 write_all_thumbnails: Write all thumbnail formats to files
151 writesubtitles: Write the video subtitles to a file
152 writeautomaticsub: Write the automatic subtitles to a file
153 allsubtitles: Downloads all the subtitles of the video
154 (requires writesubtitles or writeautomaticsub)
155 listsubtitles: Lists all available subtitles for the video
156 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
157 subtitleslangs: List of languages of the subtitles to download
158 keepvideo: Keep the video file after post-processing
159 daterange: A DateRange object, download only if the upload_date is in the range.
160 skip_download: Skip the actual download of the video file
161 cachedir: Location of the cache files in the filesystem.
162 False to disable filesystem cache.
163 noplaylist: Download single video instead of a playlist if in doubt.
164 age_limit: An integer representing the user's age in years.
165 Unsuitable videos for the given age are skipped.
166 min_views: An integer representing the minimum view count the video
167 must have in order to not be skipped.
168 Videos without view count information are always
169 downloaded. None for no limit.
170 max_views: An integer representing the maximum view count.
171 Videos that are more popular than that are not
172 downloaded.
173 Videos without view count information are always
174 downloaded. None for no limit.
175 download_archive: File name of a file where all downloads are recorded.
176 Videos already present in the file are not downloaded
177 again.
178 cookiefile: File name where cookies should be read from and dumped to.
179 nocheckcertificate:Do not verify SSL certificates
180 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
181 At the moment, this is only supported by YouTube.
182 proxy: URL of the proxy server to use
183 socket_timeout: Time to wait for unresponsive hosts, in seconds
184 bidi_workaround: Work around buggy terminals without bidirectional text
185 support, using fridibi
186 debug_printtraffic:Print out sent and received HTTP traffic
187 include_ads: Download ads as well
188 default_search: Prepend this string if an input url is not valid.
189 'auto' for elaborate guessing
190 encoding: Use this encoding instead of the system-specified.
191 extract_flat: Do not resolve URLs, return the immediate result.
192 Pass in 'in_playlist' to only show this behavior for
193 playlist items.
194 postprocessors: A list of dictionaries, each with an entry
195 * key: The name of the postprocessor. See
196 youtube_dl/postprocessor/__init__.py for a list.
197 as well as any further keyword arguments for the
198 postprocessor.
199 progress_hooks: A list of functions that get called on download
200 progress, with a dictionary with the entries
201 * filename: The final filename
202 * status: One of "downloading" and "finished"
203
204 The dict may also have some of the following entries:
205
206 * downloaded_bytes: Bytes on disk
207 * total_bytes: Size of the whole file, None if unknown
208 * tmpfilename: The filename we're currently writing to
209 * eta: The estimated time in seconds, None if unknown
210 * speed: The download speed in bytes/second, None if
211 unknown
212
213 Progress hooks are guaranteed to be called at least once
214 (with status "finished") if the download is successful.
215 merge_output_format: Extension to use when merging formats.
216 fixup: Automatically correct known faults of the file.
217 One of:
218 - "never": do nothing
219 - "warn": only emit a warning
220 - "detect_or_warn": check whether we can do anything
221 about it, warn otherwise (default)
222 source_address: (Experimental) Client-side IP address to bind to.
223 call_home: Boolean, true iff we are allowed to contact the
224 youtube-dl servers for debugging.
225 sleep_interval: Number of seconds to sleep before each download.
226 external_downloader: Executable of the external downloader to call.
227 listformats: Print an overview of available video formats and exit.
228 list_thumbnails: Print a table of all thumbnails and exit.
229
230
231 The following parameters are not used by YoutubeDL itself, they are used by
232 the FileDownloader:
233 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
234 noresizebuffer, retries, continuedl, noprogress, consoletitle
235
236 The following options are used by the post processors:
237 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
238 otherwise prefer avconv.
239 exec_cmd: Arbitrary command to run after downloading
240 """
241
242 params = None
243 _ies = []
244 _pps = []
245 _download_retcode = None
246 _num_downloads = None
247 _screen_file = None
248
249 def __init__(self, params=None, auto_init=True):
250 """Create a FileDownloader object with the given options."""
251 if params is None:
252 params = {}
253 self._ies = []
254 self._ies_instances = {}
255 self._pps = []
256 self._progress_hooks = []
257 self._download_retcode = 0
258 self._num_downloads = 0
259 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
260 self._err_file = sys.stderr
261 self.params = params
262 self.cache = Cache(self)
263
264 if params.get('bidi_workaround', False):
265 try:
266 import pty
267 master, slave = pty.openpty()
268 width = get_term_width()
269 if width is None:
270 width_args = []
271 else:
272 width_args = ['-w', str(width)]
273 sp_kwargs = dict(
274 stdin=subprocess.PIPE,
275 stdout=slave,
276 stderr=self._err_file)
277 try:
278 self._output_process = subprocess.Popen(
279 ['bidiv'] + width_args, **sp_kwargs
280 )
281 except OSError:
282 self._output_process = subprocess.Popen(
283 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
284 self._output_channel = os.fdopen(master, 'rb')
285 except OSError as ose:
286 if ose.errno == 2:
287 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
288 else:
289 raise
290
291 if (sys.version_info >= (3,) and sys.platform != 'win32' and
292 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
293 and not params.get('restrictfilenames', False)):
294 # On Python 3, the Unicode filesystem API will throw errors (#1474)
295 self.report_warning(
296 'Assuming --restrict-filenames since file system encoding '
297 'cannot encode all characters. '
298 'Set the LC_ALL environment variable to fix this.')
299 self.params['restrictfilenames'] = True
300
301 if '%(stitle)s' in self.params.get('outtmpl', ''):
302 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
303
304 self._setup_opener()
305
306 if auto_init:
307 self.print_debug_header()
308 self.add_default_info_extractors()
309
310 for pp_def_raw in self.params.get('postprocessors', []):
311 pp_class = get_postprocessor(pp_def_raw['key'])
312 pp_def = dict(pp_def_raw)
313 del pp_def['key']
314 pp = pp_class(self, **compat_kwargs(pp_def))
315 self.add_post_processor(pp)
316
317 for ph in self.params.get('progress_hooks', []):
318 self.add_progress_hook(ph)
319
320 def warn_if_short_id(self, argv):
321 # short YouTube ID starting with dash?
322 idxs = [
323 i for i, a in enumerate(argv)
324 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
325 if idxs:
326 correct_argv = (
327 ['youtube-dl'] +
328 [a for i, a in enumerate(argv) if i not in idxs] +
329 ['--'] + [argv[i] for i in idxs]
330 )
331 self.report_warning(
332 'Long argument string detected. '
333 'Use -- to separate parameters and URLs, like this:\n%s\n' %
334 args_to_str(correct_argv))
335
336 def add_info_extractor(self, ie):
337 """Add an InfoExtractor object to the end of the list."""
338 self._ies.append(ie)
339 self._ies_instances[ie.ie_key()] = ie
340 ie.set_downloader(self)
341
342 def get_info_extractor(self, ie_key):
343 """
344 Get an instance of an IE with name ie_key, it will try to get one from
345 the _ies list, if there's no instance it will create a new one and add
346 it to the extractor list.
347 """
348 ie = self._ies_instances.get(ie_key)
349 if ie is None:
350 ie = get_info_extractor(ie_key)()
351 self.add_info_extractor(ie)
352 return ie
353
354 def add_default_info_extractors(self):
355 """
356 Add the InfoExtractors returned by gen_extractors to the end of the list
357 """
358 for ie in gen_extractors():
359 self.add_info_extractor(ie)
360
361 def add_post_processor(self, pp):
362 """Add a PostProcessor object to the end of the chain."""
363 self._pps.append(pp)
364 pp.set_downloader(self)
365
366 def add_progress_hook(self, ph):
367 """Add the progress hook (currently only for the file downloader)"""
368 self._progress_hooks.append(ph)
369
370 def _bidi_workaround(self, message):
371 if not hasattr(self, '_output_channel'):
372 return message
373
374 assert hasattr(self, '_output_process')
375 assert isinstance(message, compat_str)
376 line_count = message.count('\n') + 1
377 self._output_process.stdin.write((message + '\n').encode('utf-8'))
378 self._output_process.stdin.flush()
379 res = ''.join(self._output_channel.readline().decode('utf-8')
380 for _ in range(line_count))
381 return res[:-len('\n')]
382
383 def to_screen(self, message, skip_eol=False):
384 """Print message to stdout if not in quiet mode."""
385 return self.to_stdout(message, skip_eol, check_quiet=True)
386
387 def _write_string(self, s, out=None):
388 write_string(s, out=out, encoding=self.params.get('encoding'))
389
390 def to_stdout(self, message, skip_eol=False, check_quiet=False):
391 """Print message to stdout if not in quiet mode."""
392 if self.params.get('logger'):
393 self.params['logger'].debug(message)
394 elif not check_quiet or not self.params.get('quiet', False):
395 message = self._bidi_workaround(message)
396 terminator = ['\n', ''][skip_eol]
397 output = message + terminator
398
399 self._write_string(output, self._screen_file)
400
401 def to_stderr(self, message):
402 """Print message to stderr."""
403 assert isinstance(message, compat_str)
404 if self.params.get('logger'):
405 self.params['logger'].error(message)
406 else:
407 message = self._bidi_workaround(message)
408 output = message + '\n'
409 self._write_string(output, self._err_file)
410
411 def to_console_title(self, message):
412 if not self.params.get('consoletitle', False):
413 return
414 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
415 # c_wchar_p() might not be necessary if `message` is
416 # already of type unicode()
417 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
418 elif 'TERM' in os.environ:
419 self._write_string('\033]0;%s\007' % message, self._screen_file)
420
421 def save_console_title(self):
422 if not self.params.get('consoletitle', False):
423 return
424 if 'TERM' in os.environ:
425 # Save the title on stack
426 self._write_string('\033[22;0t', self._screen_file)
427
428 def restore_console_title(self):
429 if not self.params.get('consoletitle', False):
430 return
431 if 'TERM' in os.environ:
432 # Restore the title from stack
433 self._write_string('\033[23;0t', self._screen_file)
434
435 def __enter__(self):
436 self.save_console_title()
437 return self
438
439 def __exit__(self, *args):
440 self.restore_console_title()
441
442 if self.params.get('cookiefile') is not None:
443 self.cookiejar.save()
444
445 def trouble(self, message=None, tb=None):
446 """Determine action to take when a download problem appears.
447
448 Depending on if the downloader has been configured to ignore
449 download errors or not, this method may throw an exception or
450 not when errors are found, after printing the message.
451
452 tb, if given, is additional traceback information.
453 """
454 if message is not None:
455 self.to_stderr(message)
456 if self.params.get('verbose'):
457 if tb is None:
458 if sys.exc_info()[0]: # if .trouble has been called from an except block
459 tb = ''
460 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
461 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
462 tb += compat_str(traceback.format_exc())
463 else:
464 tb_data = traceback.format_list(traceback.extract_stack())
465 tb = ''.join(tb_data)
466 self.to_stderr(tb)
467 if not self.params.get('ignoreerrors', False):
468 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
469 exc_info = sys.exc_info()[1].exc_info
470 else:
471 exc_info = sys.exc_info()
472 raise DownloadError(message, exc_info)
473 self._download_retcode = 1
474
475 def report_warning(self, message):
476 '''
477 Print the message to stderr, it will be prefixed with 'WARNING:'
478 If stderr is a tty file the 'WARNING:' will be colored
479 '''
480 if self.params.get('logger') is not None:
481 self.params['logger'].warning(message)
482 else:
483 if self.params.get('no_warnings'):
484 return
485 if self._err_file.isatty() and os.name != 'nt':
486 _msg_header = '\033[0;33mWARNING:\033[0m'
487 else:
488 _msg_header = 'WARNING:'
489 warning_message = '%s %s' % (_msg_header, message)
490 self.to_stderr(warning_message)
491
492 def report_error(self, message, tb=None):
493 '''
494 Do the same as trouble, but prefixes the message with 'ERROR:', colored
495 in red if stderr is a tty file.
496 '''
497 if self._err_file.isatty() and os.name != 'nt':
498 _msg_header = '\033[0;31mERROR:\033[0m'
499 else:
500 _msg_header = 'ERROR:'
501 error_message = '%s %s' % (_msg_header, message)
502 self.trouble(error_message, tb)
503
504 def report_file_already_downloaded(self, file_name):
505 """Report file has already been fully downloaded."""
506 try:
507 self.to_screen('[download] %s has already been downloaded' % file_name)
508 except UnicodeEncodeError:
509 self.to_screen('[download] The file has already been downloaded')
510
511 def prepare_filename(self, info_dict):
512 """Generate the output filename."""
513 try:
514 template_dict = dict(info_dict)
515
516 template_dict['epoch'] = int(time.time())
517 autonumber_size = self.params.get('autonumber_size')
518 if autonumber_size is None:
519 autonumber_size = 5
520 autonumber_templ = '%0' + str(autonumber_size) + 'd'
521 template_dict['autonumber'] = autonumber_templ % self._num_downloads
522 if template_dict.get('playlist_index') is not None:
523 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
524 if template_dict.get('resolution') is None:
525 if template_dict.get('width') and template_dict.get('height'):
526 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
527 elif template_dict.get('height'):
528 template_dict['resolution'] = '%sp' % template_dict['height']
529 elif template_dict.get('width'):
530 template_dict['resolution'] = '?x%d' % template_dict['width']
531
532 sanitize = lambda k, v: sanitize_filename(
533 compat_str(v),
534 restricted=self.params.get('restrictfilenames'),
535 is_id=(k == 'id'))
536 template_dict = dict((k, sanitize(k, v))
537 for k, v in template_dict.items()
538 if v is not None)
539 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
540
541 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
542 tmpl = compat_expanduser(outtmpl)
543 filename = tmpl % template_dict
544 return filename
545 except ValueError as err:
546 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
547 return None
548
549 def _match_entry(self, info_dict):
550 """ Returns None iff the file should be downloaded """
551
552 video_title = info_dict.get('title', info_dict.get('id', 'video'))
553 if 'title' in info_dict:
554 # This can happen when we're just evaluating the playlist
555 title = info_dict['title']
556 matchtitle = self.params.get('matchtitle', False)
557 if matchtitle:
558 if not re.search(matchtitle, title, re.IGNORECASE):
559 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
560 rejecttitle = self.params.get('rejecttitle', False)
561 if rejecttitle:
562 if re.search(rejecttitle, title, re.IGNORECASE):
563 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
564 date = info_dict.get('upload_date', None)
565 if date is not None:
566 dateRange = self.params.get('daterange', DateRange())
567 if date not in dateRange:
568 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
569 view_count = info_dict.get('view_count', None)
570 if view_count is not None:
571 min_views = self.params.get('min_views')
572 if min_views is not None and view_count < min_views:
573 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
574 max_views = self.params.get('max_views')
575 if max_views is not None and view_count > max_views:
576 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
577 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
578 return 'Skipping "%s" because it is age restricted' % title
579 if self.in_download_archive(info_dict):
580 return '%s has already been recorded in archive' % video_title
581 return None
582
583 @staticmethod
584 def add_extra_info(info_dict, extra_info):
585 '''Set the keys from extra_info in info dict if they are missing'''
586 for key, value in extra_info.items():
587 info_dict.setdefault(key, value)
588
589 def extract_info(self, url, download=True, ie_key=None, extra_info={},
590 process=True):
591 '''
592 Returns a list with a dictionary for each video we find.
593 If 'download', also downloads the videos.
594 extra_info is a dict containing the extra values to add to each result
595 '''
596
597 if ie_key:
598 ies = [self.get_info_extractor(ie_key)]
599 else:
600 ies = self._ies
601
602 for ie in ies:
603 if not ie.suitable(url):
604 continue
605
606 if not ie.working():
607 self.report_warning('The program functionality for this site has been marked as broken, '
608 'and will probably not work.')
609
610 try:
611 ie_result = ie.extract(url)
612 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
613 break
614 if isinstance(ie_result, list):
615 # Backwards compatibility: old IE result format
616 ie_result = {
617 '_type': 'compat_list',
618 'entries': ie_result,
619 }
620 self.add_default_extra_info(ie_result, ie, url)
621 if process:
622 return self.process_ie_result(ie_result, download, extra_info)
623 else:
624 return ie_result
625 except ExtractorError as de: # An error we somewhat expected
626 self.report_error(compat_str(de), de.format_traceback())
627 break
628 except MaxDownloadsReached:
629 raise
630 except Exception as e:
631 if self.params.get('ignoreerrors', False):
632 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
633 break
634 else:
635 raise
636 else:
637 self.report_error('no suitable InfoExtractor for URL %s' % url)
638
639 def add_default_extra_info(self, ie_result, ie, url):
640 self.add_extra_info(ie_result, {
641 'extractor': ie.IE_NAME,
642 'webpage_url': url,
643 'webpage_url_basename': url_basename(url),
644 'extractor_key': ie.ie_key(),
645 })
646
647 def process_ie_result(self, ie_result, download=True, extra_info={}):
648 """
649 Take the result of the ie(may be modified) and resolve all unresolved
650 references (URLs, playlist items).
651
652 It will also download the videos if 'download'.
653 Returns the resolved ie_result.
654 """
655
656 result_type = ie_result.get('_type', 'video')
657
658 if result_type in ('url', 'url_transparent'):
659 extract_flat = self.params.get('extract_flat', False)
660 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
661 extract_flat is True):
662 if self.params.get('forcejson', False):
663 self.to_stdout(json.dumps(ie_result))
664 return ie_result
665
666 if result_type == 'video':
667 self.add_extra_info(ie_result, extra_info)
668 return self.process_video_result(ie_result, download=download)
669 elif result_type == 'url':
670 # We have to add extra_info to the results because it may be
671 # contained in a playlist
672 return self.extract_info(ie_result['url'],
673 download,
674 ie_key=ie_result.get('ie_key'),
675 extra_info=extra_info)
676 elif result_type == 'url_transparent':
677 # Use the information from the embedding page
678 info = self.extract_info(
679 ie_result['url'], ie_key=ie_result.get('ie_key'),
680 extra_info=extra_info, download=False, process=False)
681
682 force_properties = dict(
683 (k, v) for k, v in ie_result.items() if v is not None)
684 for f in ('_type', 'url'):
685 if f in force_properties:
686 del force_properties[f]
687 new_result = info.copy()
688 new_result.update(force_properties)
689
690 assert new_result.get('_type') != 'url_transparent'
691
692 return self.process_ie_result(
693 new_result, download=download, extra_info=extra_info)
694 elif result_type == 'playlist' or result_type == 'multi_video':
695 # We process each entry in the playlist
696 playlist = ie_result.get('title', None) or ie_result.get('id', None)
697 self.to_screen('[download] Downloading playlist: %s' % playlist)
698
699 playlist_results = []
700
701 playliststart = self.params.get('playliststart', 1) - 1
702 playlistend = self.params.get('playlistend', None)
703 # For backwards compatibility, interpret -1 as whole list
704 if playlistend == -1:
705 playlistend = None
706
707 playlistitems_str = self.params.get('playlist_items', None)
708 playlistitems = None
709 if playlistitems_str is not None:
710 def iter_playlistitems(format):
711 for string_segment in format.split(','):
712 if '-' in string_segment:
713 start, end = string_segment.split('-')
714 for item in range(int(start), int(end) + 1):
715 yield int(item)
716 else:
717 yield int(string_segment)
718 playlistitems = iter_playlistitems(playlistitems_str)
719
720 ie_entries = ie_result['entries']
721 if isinstance(ie_entries, list):
722 n_all_entries = len(ie_entries)
723 if playlistitems:
724 entries = [ie_entries[i - 1] for i in playlistitems]
725 else:
726 entries = ie_entries[playliststart:playlistend]
727 n_entries = len(entries)
728 self.to_screen(
729 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
730 (ie_result['extractor'], playlist, n_all_entries, n_entries))
731 elif isinstance(ie_entries, PagedList):
732 if playlistitems:
733 entries = []
734 for item in playlistitems:
735 entries.extend(ie_entries.getslice(
736 item - 1, item
737 ))
738 else:
739 entries = ie_entries.getslice(
740 playliststart, playlistend)
741 n_entries = len(entries)
742 self.to_screen(
743 "[%s] playlist %s: Downloading %d videos" %
744 (ie_result['extractor'], playlist, n_entries))
745 else: # iterable
746 if playlistitems:
747 entry_list = list(ie_entries)
748 entries = [entry_list[i - 1] for i in playlistitems]
749 else:
750 entries = list(itertools.islice(
751 ie_entries, playliststart, playlistend))
752 n_entries = len(entries)
753 self.to_screen(
754 "[%s] playlist %s: Downloading %d videos" %
755 (ie_result['extractor'], playlist, n_entries))
756
757 if self.params.get('playlistreverse', False):
758 entries = entries[::-1]
759
760 for i, entry in enumerate(entries, 1):
761 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
762 extra = {
763 'n_entries': n_entries,
764 'playlist': playlist,
765 'playlist_id': ie_result.get('id'),
766 'playlist_title': ie_result.get('title'),
767 'playlist_index': i + playliststart,
768 'extractor': ie_result['extractor'],
769 'webpage_url': ie_result['webpage_url'],
770 'webpage_url_basename': url_basename(ie_result['webpage_url']),
771 'extractor_key': ie_result['extractor_key'],
772 }
773
774 reason = self._match_entry(entry)
775 if reason is not None:
776 self.to_screen('[download] ' + reason)
777 continue
778
779 entry_result = self.process_ie_result(entry,
780 download=download,
781 extra_info=extra)
782 playlist_results.append(entry_result)
783 ie_result['entries'] = playlist_results
784 return ie_result
785 elif result_type == 'compat_list':
786 self.report_warning(
787 'Extractor %s returned a compat_list result. '
788 'It needs to be updated.' % ie_result.get('extractor'))
789
790 def _fixup(r):
791 self.add_extra_info(
792 r,
793 {
794 'extractor': ie_result['extractor'],
795 'webpage_url': ie_result['webpage_url'],
796 'webpage_url_basename': url_basename(ie_result['webpage_url']),
797 'extractor_key': ie_result['extractor_key'],
798 }
799 )
800 return r
801 ie_result['entries'] = [
802 self.process_ie_result(_fixup(r), download, extra_info)
803 for r in ie_result['entries']
804 ]
805 return ie_result
806 else:
807 raise Exception('Invalid result type: %s' % result_type)
808
809 def _apply_format_filter(self, format_spec, available_formats):
810 " Returns a tuple of the remaining format_spec and filtered formats "
811
812 OPERATORS = {
813 '<': operator.lt,
814 '<=': operator.le,
815 '>': operator.gt,
816 '>=': operator.ge,
817 '=': operator.eq,
818 '!=': operator.ne,
819 }
820 operator_rex = re.compile(r'''(?x)\s*\[
821 (?P<key>width|height|tbr|abr|vbr|filesize)
822 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
823 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
824 \]$
825 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
826 m = operator_rex.search(format_spec)
827 if not m:
828 raise ValueError('Invalid format specification %r' % format_spec)
829
830 try:
831 comparison_value = int(m.group('value'))
832 except ValueError:
833 comparison_value = parse_filesize(m.group('value'))
834 if comparison_value is None:
835 comparison_value = parse_filesize(m.group('value') + 'B')
836 if comparison_value is None:
837 raise ValueError(
838 'Invalid value %r in format specification %r' % (
839 m.group('value'), format_spec))
840 op = OPERATORS[m.group('op')]
841
842 def _filter(f):
843 actual_value = f.get(m.group('key'))
844 if actual_value is None:
845 return m.group('none_inclusive')
846 return op(actual_value, comparison_value)
847 new_formats = [f for f in available_formats if _filter(f)]
848
849 new_format_spec = format_spec[:-len(m.group(0))]
850 if not new_format_spec:
851 new_format_spec = 'best'
852
853 return (new_format_spec, new_formats)
854
855 def select_format(self, format_spec, available_formats):
856 while format_spec.endswith(']'):
857 format_spec, available_formats = self._apply_format_filter(
858 format_spec, available_formats)
859 if not available_formats:
860 return None
861
862 if format_spec == 'best' or format_spec is None:
863 return available_formats[-1]
864 elif format_spec == 'worst':
865 return available_formats[0]
866 elif format_spec == 'bestaudio':
867 audio_formats = [
868 f for f in available_formats
869 if f.get('vcodec') == 'none']
870 if audio_formats:
871 return audio_formats[-1]
872 elif format_spec == 'worstaudio':
873 audio_formats = [
874 f for f in available_formats
875 if f.get('vcodec') == 'none']
876 if audio_formats:
877 return audio_formats[0]
878 elif format_spec == 'bestvideo':
879 video_formats = [
880 f for f in available_formats
881 if f.get('acodec') == 'none']
882 if video_formats:
883 return video_formats[-1]
884 elif format_spec == 'worstvideo':
885 video_formats = [
886 f for f in available_formats
887 if f.get('acodec') == 'none']
888 if video_formats:
889 return video_formats[0]
890 else:
891 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
892 if format_spec in extensions:
893 filter_f = lambda f: f['ext'] == format_spec
894 else:
895 filter_f = lambda f: f['format_id'] == format_spec
896 matches = list(filter(filter_f, available_formats))
897 if matches:
898 return matches[-1]
899 return None
900
901 def _calc_headers(self, info_dict):
902 res = std_headers.copy()
903
904 add_headers = info_dict.get('http_headers')
905 if add_headers:
906 res.update(add_headers)
907
908 cookies = self._calc_cookies(info_dict)
909 if cookies:
910 res['Cookie'] = cookies
911
912 return res
913
914 def _calc_cookies(self, info_dict):
915 class _PseudoRequest(object):
916 def __init__(self, url):
917 self.url = url
918 self.headers = {}
919 self.unverifiable = False
920
921 def add_unredirected_header(self, k, v):
922 self.headers[k] = v
923
924 def get_full_url(self):
925 return self.url
926
927 def is_unverifiable(self):
928 return self.unverifiable
929
930 def has_header(self, h):
931 return h in self.headers
932
933 pr = _PseudoRequest(info_dict['url'])
934 self.cookiejar.add_cookie_header(pr)
935 return pr.headers.get('Cookie')
936
937 def process_video_result(self, info_dict, download=True):
938 assert info_dict.get('_type', 'video') == 'video'
939
940 if 'id' not in info_dict:
941 raise ExtractorError('Missing "id" field in extractor result')
942 if 'title' not in info_dict:
943 raise ExtractorError('Missing "title" field in extractor result')
944
945 if 'playlist' not in info_dict:
946 # It isn't part of a playlist
947 info_dict['playlist'] = None
948 info_dict['playlist_index'] = None
949
950 thumbnails = info_dict.get('thumbnails')
951 if thumbnails is None:
952 thumbnail = info_dict.get('thumbnail')
953 if thumbnail:
954 thumbnails = [{'url': thumbnail}]
955 if thumbnails:
956 thumbnails.sort(key=lambda t: (
957 t.get('preference'), t.get('width'), t.get('height'),
958 t.get('id'), t.get('url')))
959 for t in thumbnails:
960 if 'width' in t and 'height' in t:
961 t['resolution'] = '%dx%d' % (t['width'], t['height'])
962
963 if thumbnails and 'thumbnail' not in info_dict:
964 info_dict['thumbnail'] = thumbnails[-1]['url']
965
966 if 'display_id' not in info_dict and 'id' in info_dict:
967 info_dict['display_id'] = info_dict['id']
968
969 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
970 # Working around negative timestamps in Windows
971 # (see http://bugs.python.org/issue1646728)
972 if info_dict['timestamp'] < 0 and os.name == 'nt':
973 info_dict['timestamp'] = 0
974 upload_date = datetime.datetime.utcfromtimestamp(
975 info_dict['timestamp'])
976 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
977
978 # This extractors handle format selection themselves
979 if info_dict['extractor'] in ['Youku']:
980 if download:
981 self.process_info(info_dict)
982 return info_dict
983
984 # We now pick which formats have to be downloaded
985 if info_dict.get('formats') is None:
986 # There's only one format available
987 formats = [info_dict]
988 else:
989 formats = info_dict['formats']
990
991 if not formats:
992 raise ExtractorError('No video formats found!')
993
994 # We check that all the formats have the format and format_id fields
995 for i, format in enumerate(formats):
996 if 'url' not in format:
997 raise ExtractorError('Missing "url" key in result (index %d)' % i)
998
999 if format.get('format_id') is None:
1000 format['format_id'] = compat_str(i)
1001 if format.get('format') is None:
1002 format['format'] = '{id} - {res}{note}'.format(
1003 id=format['format_id'],
1004 res=self.format_resolution(format),
1005 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1006 )
1007 # Automatically determine file extension if missing
1008 if 'ext' not in format:
1009 format['ext'] = determine_ext(format['url']).lower()
1010 # Add HTTP headers, so that external programs can use them from the
1011 # json output
1012 full_format_info = info_dict.copy()
1013 full_format_info.update(format)
1014 format['http_headers'] = self._calc_headers(full_format_info)
1015
1016 format_limit = self.params.get('format_limit', None)
1017 if format_limit:
1018 formats = list(takewhile_inclusive(
1019 lambda f: f['format_id'] != format_limit, formats
1020 ))
1021
1022 # TODO Central sorting goes here
1023
1024 if formats[0] is not info_dict:
1025 # only set the 'formats' fields if the original info_dict list them
1026 # otherwise we end up with a circular reference, the first (and unique)
1027 # element in the 'formats' field in info_dict is info_dict itself,
1028 # wich can't be exported to json
1029 info_dict['formats'] = formats
1030 if self.params.get('listformats'):
1031 self.list_formats(info_dict)
1032 return
1033 if self.params.get('list_thumbnails'):
1034 self.list_thumbnails(info_dict)
1035 return
1036
1037 req_format = self.params.get('format')
1038 if req_format is None:
1039 req_format = 'best'
1040 formats_to_download = []
1041 # The -1 is for supporting YoutubeIE
1042 if req_format in ('-1', 'all'):
1043 formats_to_download = formats
1044 else:
1045 for rfstr in req_format.split(','):
1046 # We can accept formats requested in the format: 34/5/best, we pick
1047 # the first that is available, starting from left
1048 req_formats = rfstr.split('/')
1049 for rf in req_formats:
1050 if re.match(r'.+?\+.+?', rf) is not None:
1051 # Two formats have been requested like '137+139'
1052 format_1, format_2 = rf.split('+')
1053 formats_info = (self.select_format(format_1, formats),
1054 self.select_format(format_2, formats))
1055 if all(formats_info):
1056 # The first format must contain the video and the
1057 # second the audio
1058 if formats_info[0].get('vcodec') == 'none':
1059 self.report_error('The first format must '
1060 'contain the video, try using '
1061 '"-f %s+%s"' % (format_2, format_1))
1062 return
1063 output_ext = (
1064 formats_info[0]['ext']
1065 if self.params.get('merge_output_format') is None
1066 else self.params['merge_output_format'])
1067 selected_format = {
1068 'requested_formats': formats_info,
1069 'format': rf,
1070 'ext': formats_info[0]['ext'],
1071 'width': formats_info[0].get('width'),
1072 'height': formats_info[0].get('height'),
1073 'resolution': formats_info[0].get('resolution'),
1074 'fps': formats_info[0].get('fps'),
1075 'vcodec': formats_info[0].get('vcodec'),
1076 'vbr': formats_info[0].get('vbr'),
1077 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1078 'acodec': formats_info[1].get('acodec'),
1079 'abr': formats_info[1].get('abr'),
1080 'ext': output_ext,
1081 }
1082 else:
1083 selected_format = None
1084 else:
1085 selected_format = self.select_format(rf, formats)
1086 if selected_format is not None:
1087 formats_to_download.append(selected_format)
1088 break
1089 if not formats_to_download:
1090 raise ExtractorError('requested format not available',
1091 expected=True)
1092
1093 if download:
1094 if len(formats_to_download) > 1:
1095 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1096 for format in formats_to_download:
1097 new_info = dict(info_dict)
1098 new_info.update(format)
1099 self.process_info(new_info)
1100 # We update the info dict with the best quality format (backwards compatibility)
1101 info_dict.update(formats_to_download[-1])
1102 return info_dict
1103
1104 def process_info(self, info_dict):
1105 """Process a single resolved IE result."""
1106
1107 assert info_dict.get('_type', 'video') == 'video'
1108
1109 max_downloads = self.params.get('max_downloads')
1110 if max_downloads is not None:
1111 if self._num_downloads >= int(max_downloads):
1112 raise MaxDownloadsReached()
1113
1114 info_dict['fulltitle'] = info_dict['title']
1115 if len(info_dict['title']) > 200:
1116 info_dict['title'] = info_dict['title'][:197] + '...'
1117
1118 # Keep for backwards compatibility
1119 info_dict['stitle'] = info_dict['title']
1120
1121 if 'format' not in info_dict:
1122 info_dict['format'] = info_dict['ext']
1123
1124 reason = self._match_entry(info_dict)
1125 if reason is not None:
1126 self.to_screen('[download] ' + reason)
1127 return
1128
1129 self._num_downloads += 1
1130
1131 filename = self.prepare_filename(info_dict)
1132
1133 # Forced printings
1134 if self.params.get('forcetitle', False):
1135 self.to_stdout(info_dict['fulltitle'])
1136 if self.params.get('forceid', False):
1137 self.to_stdout(info_dict['id'])
1138 if self.params.get('forceurl', False):
1139 if info_dict.get('requested_formats') is not None:
1140 for f in info_dict['requested_formats']:
1141 self.to_stdout(f['url'] + f.get('play_path', ''))
1142 else:
1143 # For RTMP URLs, also include the playpath
1144 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1145 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1146 self.to_stdout(info_dict['thumbnail'])
1147 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1148 self.to_stdout(info_dict['description'])
1149 if self.params.get('forcefilename', False) and filename is not None:
1150 self.to_stdout(filename)
1151 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1152 self.to_stdout(formatSeconds(info_dict['duration']))
1153 if self.params.get('forceformat', False):
1154 self.to_stdout(info_dict['format'])
1155 if self.params.get('forcejson', False):
1156 info_dict['_filename'] = filename
1157 self.to_stdout(json.dumps(info_dict))
1158 if self.params.get('dump_single_json', False):
1159 info_dict['_filename'] = filename
1160
1161 # Do nothing else if in simulate mode
1162 if self.params.get('simulate', False):
1163 return
1164
1165 if filename is None:
1166 return
1167
1168 try:
1169 dn = os.path.dirname(encodeFilename(filename))
1170 if dn and not os.path.exists(dn):
1171 os.makedirs(dn)
1172 except (OSError, IOError) as err:
1173 self.report_error('unable to create directory ' + compat_str(err))
1174 return
1175
1176 if self.params.get('writedescription', False):
1177 descfn = filename + '.description'
1178 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1179 self.to_screen('[info] Video description is already present')
1180 elif info_dict.get('description') is None:
1181 self.report_warning('There\'s no description to write.')
1182 else:
1183 try:
1184 self.to_screen('[info] Writing video description to: ' + descfn)
1185 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1186 descfile.write(info_dict['description'])
1187 except (OSError, IOError):
1188 self.report_error('Cannot write description file ' + descfn)
1189 return
1190
1191 if self.params.get('writeannotations', False):
1192 annofn = filename + '.annotations.xml'
1193 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1194 self.to_screen('[info] Video annotations are already present')
1195 else:
1196 try:
1197 self.to_screen('[info] Writing video annotations to: ' + annofn)
1198 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1199 annofile.write(info_dict['annotations'])
1200 except (KeyError, TypeError):
1201 self.report_warning('There are no annotations to write.')
1202 except (OSError, IOError):
1203 self.report_error('Cannot write annotations file: ' + annofn)
1204 return
1205
1206 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1207 self.params.get('writeautomaticsub')])
1208
1209 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1210 # subtitles download errors are already managed as troubles in relevant IE
1211 # that way it will silently go on when used with unsupporting IE
1212 subtitles = info_dict['subtitles']
1213 sub_format = self.params.get('subtitlesformat', 'srt')
1214 for sub_lang in subtitles.keys():
1215 sub = subtitles[sub_lang]
1216 if sub is None:
1217 continue
1218 try:
1219 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1220 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1221 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1222 else:
1223 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1224 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1225 subfile.write(sub)
1226 except (OSError, IOError):
1227 self.report_error('Cannot write subtitles file ' + sub_filename)
1228 return
1229
1230 if self.params.get('writeinfojson', False):
1231 infofn = os.path.splitext(filename)[0] + '.info.json'
1232 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1233 self.to_screen('[info] Video description metadata is already present')
1234 else:
1235 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1236 try:
1237 write_json_file(info_dict, infofn)
1238 except (OSError, IOError):
1239 self.report_error('Cannot write metadata to JSON file ' + infofn)
1240 return
1241
1242 self._write_thumbnails(info_dict, filename)
1243
1244 if not self.params.get('skip_download', False):
1245 try:
1246 def dl(name, info):
1247 fd = get_suitable_downloader(info, self.params)(self, self.params)
1248 for ph in self._progress_hooks:
1249 fd.add_progress_hook(ph)
1250 if self.params.get('verbose'):
1251 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1252 return fd.download(name, info)
1253 if info_dict.get('requested_formats') is not None:
1254 downloaded = []
1255 success = True
1256 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1257 if not merger._executable:
1258 postprocessors = []
1259 self.report_warning('You have requested multiple '
1260 'formats but ffmpeg or avconv are not installed.'
1261 ' The formats won\'t be merged')
1262 else:
1263 postprocessors = [merger]
1264 for f in info_dict['requested_formats']:
1265 new_info = dict(info_dict)
1266 new_info.update(f)
1267 fname = self.prepare_filename(new_info)
1268 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1269 downloaded.append(fname)
1270 partial_success = dl(fname, new_info)
1271 success = success and partial_success
1272 info_dict['__postprocessors'] = postprocessors
1273 info_dict['__files_to_merge'] = downloaded
1274 else:
1275 # Just a single file
1276 success = dl(filename, info_dict)
1277 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1278 self.report_error('unable to download video data: %s' % str(err))
1279 return
1280 except (OSError, IOError) as err:
1281 raise UnavailableVideoError(err)
1282 except (ContentTooShortError, ) as err:
1283 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1284 return
1285
1286 if success:
1287 # Fixup content
1288 fixup_policy = self.params.get('fixup')
1289 if fixup_policy is None:
1290 fixup_policy = 'detect_or_warn'
1291
1292 stretched_ratio = info_dict.get('stretched_ratio')
1293 if stretched_ratio is not None and stretched_ratio != 1:
1294 if fixup_policy == 'warn':
1295 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1296 info_dict['id'], stretched_ratio))
1297 elif fixup_policy == 'detect_or_warn':
1298 stretched_pp = FFmpegFixupStretchedPP(self)
1299 if stretched_pp.available:
1300 info_dict.setdefault('__postprocessors', [])
1301 info_dict['__postprocessors'].append(stretched_pp)
1302 else:
1303 self.report_warning(
1304 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1305 info_dict['id'], stretched_ratio))
1306 else:
1307 assert fixup_policy in ('ignore', 'never')
1308
1309 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1310 if fixup_policy == 'warn':
1311 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1312 info_dict['id']))
1313 elif fixup_policy == 'detect_or_warn':
1314 fixup_pp = FFmpegFixupM4aPP(self)
1315 if fixup_pp.available:
1316 info_dict.setdefault('__postprocessors', [])
1317 info_dict['__postprocessors'].append(fixup_pp)
1318 else:
1319 self.report_warning(
1320 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1321 info_dict['id']))
1322 else:
1323 assert fixup_policy in ('ignore', 'never')
1324
1325 try:
1326 self.post_process(filename, info_dict)
1327 except (PostProcessingError) as err:
1328 self.report_error('postprocessing: %s' % str(err))
1329 return
1330 self.record_download_archive(info_dict)
1331
1332 def download(self, url_list):
1333 """Download a given list of URLs."""
1334 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1335 if (len(url_list) > 1 and
1336 '%' not in outtmpl
1337 and self.params.get('max_downloads') != 1):
1338 raise SameFileError(outtmpl)
1339
1340 for url in url_list:
1341 try:
1342 # It also downloads the videos
1343 res = self.extract_info(url)
1344 except UnavailableVideoError:
1345 self.report_error('unable to download video')
1346 except MaxDownloadsReached:
1347 self.to_screen('[info] Maximum number of downloaded files reached.')
1348 raise
1349 else:
1350 if self.params.get('dump_single_json', False):
1351 self.to_stdout(json.dumps(res))
1352
1353 return self._download_retcode
1354
1355 def download_with_info_file(self, info_filename):
1356 with io.open(info_filename, 'r', encoding='utf-8') as f:
1357 info = json.load(f)
1358 try:
1359 self.process_ie_result(info, download=True)
1360 except DownloadError:
1361 webpage_url = info.get('webpage_url')
1362 if webpage_url is not None:
1363 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1364 return self.download([webpage_url])
1365 else:
1366 raise
1367 return self._download_retcode
1368
1369 def post_process(self, filename, ie_info):
1370 """Run all the postprocessors on the given file."""
1371 info = dict(ie_info)
1372 info['filepath'] = filename
1373 pps_chain = []
1374 if ie_info.get('__postprocessors') is not None:
1375 pps_chain.extend(ie_info['__postprocessors'])
1376 pps_chain.extend(self._pps)
1377 for pp in pps_chain:
1378 keep_video = None
1379 old_filename = info['filepath']
1380 try:
1381 keep_video_wish, info = pp.run(info)
1382 if keep_video_wish is not None:
1383 if keep_video_wish:
1384 keep_video = keep_video_wish
1385 elif keep_video is None:
1386 # No clear decision yet, let IE decide
1387 keep_video = keep_video_wish
1388 except PostProcessingError as e:
1389 self.report_error(e.msg)
1390 if keep_video is False and not self.params.get('keepvideo', False):
1391 try:
1392 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1393 os.remove(encodeFilename(old_filename))
1394 except (IOError, OSError):
1395 self.report_warning('Unable to remove downloaded video file')
1396
1397 def _make_archive_id(self, info_dict):
1398 # Future-proof against any change in case
1399 # and backwards compatibility with prior versions
1400 extractor = info_dict.get('extractor_key')
1401 if extractor is None:
1402 if 'id' in info_dict:
1403 extractor = info_dict.get('ie_key') # key in a playlist
1404 if extractor is None:
1405 return None # Incomplete video information
1406 return extractor.lower() + ' ' + info_dict['id']
1407
1408 def in_download_archive(self, info_dict):
1409 fn = self.params.get('download_archive')
1410 if fn is None:
1411 return False
1412
1413 vid_id = self._make_archive_id(info_dict)
1414 if vid_id is None:
1415 return False # Incomplete video information
1416
1417 try:
1418 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1419 for line in archive_file:
1420 if line.strip() == vid_id:
1421 return True
1422 except IOError as ioe:
1423 if ioe.errno != errno.ENOENT:
1424 raise
1425 return False
1426
1427 def record_download_archive(self, info_dict):
1428 fn = self.params.get('download_archive')
1429 if fn is None:
1430 return
1431 vid_id = self._make_archive_id(info_dict)
1432 assert vid_id
1433 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1434 archive_file.write(vid_id + '\n')
1435
1436 @staticmethod
1437 def format_resolution(format, default='unknown'):
1438 if format.get('vcodec') == 'none':
1439 return 'audio only'
1440 if format.get('resolution') is not None:
1441 return format['resolution']
1442 if format.get('height') is not None:
1443 if format.get('width') is not None:
1444 res = '%sx%s' % (format['width'], format['height'])
1445 else:
1446 res = '%sp' % format['height']
1447 elif format.get('width') is not None:
1448 res = '?x%d' % format['width']
1449 else:
1450 res = default
1451 return res
1452
1453 def _format_note(self, fdict):
1454 res = ''
1455 if fdict.get('ext') in ['f4f', 'f4m']:
1456 res += '(unsupported) '
1457 if fdict.get('format_note') is not None:
1458 res += fdict['format_note'] + ' '
1459 if fdict.get('tbr') is not None:
1460 res += '%4dk ' % fdict['tbr']
1461 if fdict.get('container') is not None:
1462 if res:
1463 res += ', '
1464 res += '%s container' % fdict['container']
1465 if (fdict.get('vcodec') is not None and
1466 fdict.get('vcodec') != 'none'):
1467 if res:
1468 res += ', '
1469 res += fdict['vcodec']
1470 if fdict.get('vbr') is not None:
1471 res += '@'
1472 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1473 res += 'video@'
1474 if fdict.get('vbr') is not None:
1475 res += '%4dk' % fdict['vbr']
1476 if fdict.get('fps') is not None:
1477 res += ', %sfps' % fdict['fps']
1478 if fdict.get('acodec') is not None:
1479 if res:
1480 res += ', '
1481 if fdict['acodec'] == 'none':
1482 res += 'video only'
1483 else:
1484 res += '%-5s' % fdict['acodec']
1485 elif fdict.get('abr') is not None:
1486 if res:
1487 res += ', '
1488 res += 'audio'
1489 if fdict.get('abr') is not None:
1490 res += '@%3dk' % fdict['abr']
1491 if fdict.get('asr') is not None:
1492 res += ' (%5dHz)' % fdict['asr']
1493 if fdict.get('filesize') is not None:
1494 if res:
1495 res += ', '
1496 res += format_bytes(fdict['filesize'])
1497 elif fdict.get('filesize_approx') is not None:
1498 if res:
1499 res += ', '
1500 res += '~' + format_bytes(fdict['filesize_approx'])
1501 return res
1502
1503 def list_formats(self, info_dict):
1504 def line(format, idlen=20):
1505 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1506 format['format_id'],
1507 format['ext'],
1508 self.format_resolution(format),
1509 self._format_note(format),
1510 ))
1511
1512 formats = info_dict.get('formats', [info_dict])
1513 idlen = max(len('format code'),
1514 max(len(f['format_id']) for f in formats))
1515 formats_s = [
1516 line(f, idlen) for f in formats
1517 if f.get('preference') is None or f['preference'] >= -1000]
1518 if len(formats) > 1:
1519 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1520 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1521
1522 header_line = line({
1523 'format_id': 'format code', 'ext': 'extension',
1524 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1525 self.to_screen(
1526 '[info] Available formats for %s:\n%s\n%s' %
1527 (info_dict['id'], header_line, '\n'.join(formats_s)))
1528
1529 def list_thumbnails(self, info_dict):
1530 thumbnails = info_dict.get('thumbnails')
1531 if not thumbnails:
1532 tn_url = info_dict.get('thumbnail')
1533 if tn_url:
1534 thumbnails = [{'id': '0', 'url': tn_url}]
1535 else:
1536 self.to_screen(
1537 '[info] No thumbnails present for %s' % info_dict['id'])
1538 return
1539
1540 self.to_screen(
1541 '[info] Thumbnails for %s:' % info_dict['id'])
1542 self.to_screen(render_table(
1543 ['ID', 'width', 'height', 'URL'],
1544 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1545
1546 def urlopen(self, req):
1547 """ Start an HTTP download """
1548
1549 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1550 # always respected by websites, some tend to give out URLs with non percent-encoded
1551 # non-ASCII characters (see telemb.py, ard.py [#3412])
1552 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1553 # To work around aforementioned issue we will replace request's original URL with
1554 # percent-encoded one
1555 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1556 url = req if req_is_string else req.get_full_url()
1557 url_escaped = escape_url(url)
1558
1559 # Substitute URL if any change after escaping
1560 if url != url_escaped:
1561 if req_is_string:
1562 req = url_escaped
1563 else:
1564 req = compat_urllib_request.Request(
1565 url_escaped, data=req.data, headers=req.headers,
1566 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1567
1568 return self._opener.open(req, timeout=self._socket_timeout)
1569
1570 def print_debug_header(self):
1571 if not self.params.get('verbose'):
1572 return
1573
1574 if type('') is not compat_str:
1575 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1576 self.report_warning(
1577 'Your Python is broken! Update to a newer and supported version')
1578
1579 stdout_encoding = getattr(
1580 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1581 encoding_str = (
1582 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1583 locale.getpreferredencoding(),
1584 sys.getfilesystemencoding(),
1585 stdout_encoding,
1586 self.get_encoding()))
1587 write_string(encoding_str, encoding=None)
1588
1589 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1590 try:
1591 sp = subprocess.Popen(
1592 ['git', 'rev-parse', '--short', 'HEAD'],
1593 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1594 cwd=os.path.dirname(os.path.abspath(__file__)))
1595 out, err = sp.communicate()
1596 out = out.decode().strip()
1597 if re.match('[0-9a-f]+', out):
1598 self._write_string('[debug] Git HEAD: ' + out + '\n')
1599 except:
1600 try:
1601 sys.exc_clear()
1602 except:
1603 pass
1604 self._write_string('[debug] Python version %s - %s\n' % (
1605 platform.python_version(), platform_name()))
1606
1607 exe_versions = FFmpegPostProcessor.get_versions()
1608 exe_versions['rtmpdump'] = rtmpdump_version()
1609 exe_str = ', '.join(
1610 '%s %s' % (exe, v)
1611 for exe, v in sorted(exe_versions.items())
1612 if v
1613 )
1614 if not exe_str:
1615 exe_str = 'none'
1616 self._write_string('[debug] exe versions: %s\n' % exe_str)
1617
1618 proxy_map = {}
1619 for handler in self._opener.handlers:
1620 if hasattr(handler, 'proxies'):
1621 proxy_map.update(handler.proxies)
1622 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1623
1624 if self.params.get('call_home', False):
1625 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1626 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1627 latest_version = self.urlopen(
1628 'https://yt-dl.org/latest/version').read().decode('utf-8')
1629 if version_tuple(latest_version) > version_tuple(__version__):
1630 self.report_warning(
1631 'You are using an outdated version (newest version: %s)! '
1632 'See https://yt-dl.org/update if you need help updating.' %
1633 latest_version)
1634
1635 def _setup_opener(self):
1636 timeout_val = self.params.get('socket_timeout')
1637 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1638
1639 opts_cookiefile = self.params.get('cookiefile')
1640 opts_proxy = self.params.get('proxy')
1641
1642 if opts_cookiefile is None:
1643 self.cookiejar = compat_cookiejar.CookieJar()
1644 else:
1645 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1646 opts_cookiefile)
1647 if os.access(opts_cookiefile, os.R_OK):
1648 self.cookiejar.load()
1649
1650 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1651 self.cookiejar)
1652 if opts_proxy is not None:
1653 if opts_proxy == '':
1654 proxies = {}
1655 else:
1656 proxies = {'http': opts_proxy, 'https': opts_proxy}
1657 else:
1658 proxies = compat_urllib_request.getproxies()
1659 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1660 if 'http' in proxies and 'https' not in proxies:
1661 proxies['https'] = proxies['http']
1662 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1663
1664 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1665 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1666 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1667 opener = compat_urllib_request.build_opener(
1668 https_handler, proxy_handler, cookie_processor, ydlh)
1669 # Delete the default user-agent header, which would otherwise apply in
1670 # cases where our custom HTTP handler doesn't come into play
1671 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1672 opener.addheaders = []
1673 self._opener = opener
1674
1675 def encode(self, s):
1676 if isinstance(s, bytes):
1677 return s # Already encoded
1678
1679 try:
1680 return s.encode(self.get_encoding())
1681 except UnicodeEncodeError as err:
1682 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1683 raise
1684
1685 def get_encoding(self):
1686 encoding = self.params.get('encoding')
1687 if encoding is None:
1688 encoding = preferredencoding()
1689 return encoding
1690
1691 def _write_thumbnails(self, info_dict, filename):
1692 if self.params.get('writethumbnail', False):
1693 thumbnails = info_dict.get('thumbnails')
1694 if thumbnails:
1695 thumbnails = [thumbnails[-1]]
1696 elif self.params.get('write_all_thumbnails', False):
1697 thumbnails = info_dict.get('thumbnails')
1698 else:
1699 return
1700
1701 if not thumbnails:
1702 # No thumbnails present, so return immediately
1703 return
1704
1705 for t in thumbnails:
1706 thumb_ext = determine_ext(t['url'], 'jpg')
1707 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1708 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1709 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1710
1711 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1712 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1713 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1714 else:
1715 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1716 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1717 try:
1718 uf = self.urlopen(t['url'])
1719 with open(thumb_filename, 'wb') as thumbf:
1720 shutil.copyfileobj(uf, thumbf)
1721 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1722 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1723 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1724 self.report_warning('Unable to download thumbnail "%s": %s' %
1725 (t['url'], compat_str(err)))