]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Use the option in preparing the merge output filename
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import os
14 import platform
15 import re
16 import shutil
17 import subprocess
18 import socket
19 import sys
20 import time
21 import traceback
22
23 if os.name == 'nt':
24 import ctypes
25
26 from .compat import (
27 compat_cookiejar,
28 compat_expanduser,
29 compat_http_client,
30 compat_kwargs,
31 compat_str,
32 compat_urllib_error,
33 compat_urllib_request,
34 )
35 from .utils import (
36 escape_url,
37 ContentTooShortError,
38 date_from_str,
39 DateRange,
40 DEFAULT_OUTTMPL,
41 determine_ext,
42 DownloadError,
43 encodeFilename,
44 ExtractorError,
45 format_bytes,
46 formatSeconds,
47 get_term_width,
48 locked_file,
49 make_HTTPS_handler,
50 MaxDownloadsReached,
51 PagedList,
52 PostProcessingError,
53 platform_name,
54 preferredencoding,
55 SameFileError,
56 sanitize_filename,
57 subtitles_filename,
58 takewhile_inclusive,
59 UnavailableVideoError,
60 url_basename,
61 write_json_file,
62 write_string,
63 YoutubeDLHandler,
64 prepend_extension,
65 args_to_str,
66 age_restricted,
67 )
68 from .cache import Cache
69 from .extractor import get_info_extractor, gen_extractors
70 from .downloader import get_suitable_downloader
71 from .downloader.rtmp import rtmpdump_version
72 from .postprocessor import (
73 FFmpegMergerPP,
74 FFmpegPostProcessor,
75 get_postprocessor,
76 )
77 from .version import __version__
78
79
80 class YoutubeDL(object):
81 """YoutubeDL class.
82
83 YoutubeDL objects are the ones responsible of downloading the
84 actual video file and writing it to disk if the user has requested
85 it, among some other tasks. In most cases there should be one per
86 program. As, given a video URL, the downloader doesn't know how to
87 extract all the needed information, task that InfoExtractors do, it
88 has to pass the URL to one of them.
89
90 For this, YoutubeDL objects have a method that allows
91 InfoExtractors to be registered in a given order. When it is passed
92 a URL, the YoutubeDL object handles it to the first InfoExtractor it
93 finds that reports being able to handle it. The InfoExtractor extracts
94 all the information about the video or videos the URL refers to, and
95 YoutubeDL process the extracted information, possibly using a File
96 Downloader to download the video.
97
98 YoutubeDL objects accept a lot of parameters. In order not to saturate
99 the object constructor with arguments, it receives a dictionary of
100 options instead. These options are available through the params
101 attribute for the InfoExtractors to use. The YoutubeDL also
102 registers itself as the downloader in charge for the InfoExtractors
103 that are added to it, so this is a "mutual registration".
104
105 Available options:
106
107 username: Username for authentication purposes.
108 password: Password for authentication purposes.
109 videopassword: Password for acces a video.
110 usenetrc: Use netrc for authentication instead.
111 verbose: Print additional info to stdout.
112 quiet: Do not print messages to stdout.
113 no_warnings: Do not print out anything for warnings.
114 forceurl: Force printing final URL.
115 forcetitle: Force printing title.
116 forceid: Force printing ID.
117 forcethumbnail: Force printing thumbnail URL.
118 forcedescription: Force printing description.
119 forcefilename: Force printing final filename.
120 forceduration: Force printing duration.
121 forcejson: Force printing info_dict as JSON.
122 dump_single_json: Force printing the info_dict of the whole playlist
123 (or video) as a single JSON line.
124 simulate: Do not download the video files.
125 format: Video format code. See options.py for more information.
126 format_limit: Highest quality format to try.
127 outtmpl: Template for output names.
128 restrictfilenames: Do not allow "&" and spaces in file names
129 ignoreerrors: Do not stop on download errors.
130 nooverwrites: Prevent overwriting files.
131 playliststart: Playlist item to start at.
132 playlistend: Playlist item to end at.
133 playlistreverse: Download playlist items in reverse order.
134 matchtitle: Download only matching titles.
135 rejecttitle: Reject downloads for matching titles.
136 logger: Log messages to a logging.Logger instance.
137 logtostderr: Log messages to stderr instead of stdout.
138 writedescription: Write the video description to a .description file
139 writeinfojson: Write the video description to a .info.json file
140 writeannotations: Write the video annotations to a .annotations.xml file
141 writethumbnail: Write the thumbnail image to a file
142 writesubtitles: Write the video subtitles to a file
143 writeautomaticsub: Write the automatic subtitles to a file
144 allsubtitles: Downloads all the subtitles of the video
145 (requires writesubtitles or writeautomaticsub)
146 listsubtitles: Lists all available subtitles for the video
147 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
148 subtitleslangs: List of languages of the subtitles to download
149 keepvideo: Keep the video file after post-processing
150 daterange: A DateRange object, download only if the upload_date is in the range.
151 skip_download: Skip the actual download of the video file
152 cachedir: Location of the cache files in the filesystem.
153 False to disable filesystem cache.
154 noplaylist: Download single video instead of a playlist if in doubt.
155 age_limit: An integer representing the user's age in years.
156 Unsuitable videos for the given age are skipped.
157 min_views: An integer representing the minimum view count the video
158 must have in order to not be skipped.
159 Videos without view count information are always
160 downloaded. None for no limit.
161 max_views: An integer representing the maximum view count.
162 Videos that are more popular than that are not
163 downloaded.
164 Videos without view count information are always
165 downloaded. None for no limit.
166 download_archive: File name of a file where all downloads are recorded.
167 Videos already present in the file are not downloaded
168 again.
169 cookiefile: File name where cookies should be read from and dumped to.
170 nocheckcertificate:Do not verify SSL certificates
171 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
172 At the moment, this is only supported by YouTube.
173 proxy: URL of the proxy server to use
174 socket_timeout: Time to wait for unresponsive hosts, in seconds
175 bidi_workaround: Work around buggy terminals without bidirectional text
176 support, using fridibi
177 debug_printtraffic:Print out sent and received HTTP traffic
178 include_ads: Download ads as well
179 default_search: Prepend this string if an input url is not valid.
180 'auto' for elaborate guessing
181 encoding: Use this encoding instead of the system-specified.
182 extract_flat: Do not resolve URLs, return the immediate result.
183 Pass in 'in_playlist' to only show this behavior for
184 playlist items.
185 postprocessors: A list of dictionaries, each with an entry
186 * key: The name of the postprocessor. See
187 youtube_dl/postprocessor/__init__.py for a list.
188 as well as any further keyword arguments for the
189 postprocessor.
190 progress_hooks: A list of functions that get called on download
191 progress, with a dictionary with the entries
192 * filename: The final filename
193 * status: One of "downloading" and "finished"
194
195 The dict may also have some of the following entries:
196
197 * downloaded_bytes: Bytes on disk
198 * total_bytes: Size of the whole file, None if unknown
199 * tmpfilename: The filename we're currently writing to
200 * eta: The estimated time in seconds, None if unknown
201 * speed: The download speed in bytes/second, None if
202 unknown
203
204 Progress hooks are guaranteed to be called at least once
205 (with status "finished") if the download is successful.
206
207
208 The following parameters are not used by YoutubeDL itself, they are used by
209 the FileDownloader:
210 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
211 noresizebuffer, retries, continuedl, noprogress, consoletitle
212
213 The following options are used by the post processors:
214 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
215 otherwise prefer avconv.
216 exec_cmd: Arbitrary command to run after downloading
217 """
218
219 params = None
220 _ies = []
221 _pps = []
222 _download_retcode = None
223 _num_downloads = None
224 _screen_file = None
225
226 def __init__(self, params=None, auto_init=True):
227 """Create a FileDownloader object with the given options."""
228 if params is None:
229 params = {}
230 self._ies = []
231 self._ies_instances = {}
232 self._pps = []
233 self._progress_hooks = []
234 self._download_retcode = 0
235 self._num_downloads = 0
236 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
237 self._err_file = sys.stderr
238 self.params = params
239 self.cache = Cache(self)
240
241 if params.get('bidi_workaround', False):
242 try:
243 import pty
244 master, slave = pty.openpty()
245 width = get_term_width()
246 if width is None:
247 width_args = []
248 else:
249 width_args = ['-w', str(width)]
250 sp_kwargs = dict(
251 stdin=subprocess.PIPE,
252 stdout=slave,
253 stderr=self._err_file)
254 try:
255 self._output_process = subprocess.Popen(
256 ['bidiv'] + width_args, **sp_kwargs
257 )
258 except OSError:
259 self._output_process = subprocess.Popen(
260 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
261 self._output_channel = os.fdopen(master, 'rb')
262 except OSError as ose:
263 if ose.errno == 2:
264 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
265 else:
266 raise
267
268 if (sys.version_info >= (3,) and sys.platform != 'win32' and
269 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
270 and not params.get('restrictfilenames', False)):
271 # On Python 3, the Unicode filesystem API will throw errors (#1474)
272 self.report_warning(
273 'Assuming --restrict-filenames since file system encoding '
274 'cannot encode all characters. '
275 'Set the LC_ALL environment variable to fix this.')
276 self.params['restrictfilenames'] = True
277
278 if '%(stitle)s' in self.params.get('outtmpl', ''):
279 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
280
281 self._setup_opener()
282
283 if auto_init:
284 self.print_debug_header()
285 self.add_default_info_extractors()
286
287 for pp_def_raw in self.params.get('postprocessors', []):
288 pp_class = get_postprocessor(pp_def_raw['key'])
289 pp_def = dict(pp_def_raw)
290 del pp_def['key']
291 pp = pp_class(self, **compat_kwargs(pp_def))
292 self.add_post_processor(pp)
293
294 for ph in self.params.get('progress_hooks', []):
295 self.add_progress_hook(ph)
296
297 def warn_if_short_id(self, argv):
298 # short YouTube ID starting with dash?
299 idxs = [
300 i for i, a in enumerate(argv)
301 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
302 if idxs:
303 correct_argv = (
304 ['youtube-dl'] +
305 [a for i, a in enumerate(argv) if i not in idxs] +
306 ['--'] + [argv[i] for i in idxs]
307 )
308 self.report_warning(
309 'Long argument string detected. '
310 'Use -- to separate parameters and URLs, like this:\n%s\n' %
311 args_to_str(correct_argv))
312
313 def add_info_extractor(self, ie):
314 """Add an InfoExtractor object to the end of the list."""
315 self._ies.append(ie)
316 self._ies_instances[ie.ie_key()] = ie
317 ie.set_downloader(self)
318
319 def get_info_extractor(self, ie_key):
320 """
321 Get an instance of an IE with name ie_key, it will try to get one from
322 the _ies list, if there's no instance it will create a new one and add
323 it to the extractor list.
324 """
325 ie = self._ies_instances.get(ie_key)
326 if ie is None:
327 ie = get_info_extractor(ie_key)()
328 self.add_info_extractor(ie)
329 return ie
330
331 def add_default_info_extractors(self):
332 """
333 Add the InfoExtractors returned by gen_extractors to the end of the list
334 """
335 for ie in gen_extractors():
336 self.add_info_extractor(ie)
337
338 def add_post_processor(self, pp):
339 """Add a PostProcessor object to the end of the chain."""
340 self._pps.append(pp)
341 pp.set_downloader(self)
342
343 def add_progress_hook(self, ph):
344 """Add the progress hook (currently only for the file downloader)"""
345 self._progress_hooks.append(ph)
346
347 def _bidi_workaround(self, message):
348 if not hasattr(self, '_output_channel'):
349 return message
350
351 assert hasattr(self, '_output_process')
352 assert isinstance(message, compat_str)
353 line_count = message.count('\n') + 1
354 self._output_process.stdin.write((message + '\n').encode('utf-8'))
355 self._output_process.stdin.flush()
356 res = ''.join(self._output_channel.readline().decode('utf-8')
357 for _ in range(line_count))
358 return res[:-len('\n')]
359
360 def to_screen(self, message, skip_eol=False):
361 """Print message to stdout if not in quiet mode."""
362 return self.to_stdout(message, skip_eol, check_quiet=True)
363
364 def _write_string(self, s, out=None):
365 write_string(s, out=out, encoding=self.params.get('encoding'))
366
367 def to_stdout(self, message, skip_eol=False, check_quiet=False):
368 """Print message to stdout if not in quiet mode."""
369 if self.params.get('logger'):
370 self.params['logger'].debug(message)
371 elif not check_quiet or not self.params.get('quiet', False):
372 message = self._bidi_workaround(message)
373 terminator = ['\n', ''][skip_eol]
374 output = message + terminator
375
376 self._write_string(output, self._screen_file)
377
378 def to_stderr(self, message):
379 """Print message to stderr."""
380 assert isinstance(message, compat_str)
381 if self.params.get('logger'):
382 self.params['logger'].error(message)
383 else:
384 message = self._bidi_workaround(message)
385 output = message + '\n'
386 self._write_string(output, self._err_file)
387
388 def to_console_title(self, message):
389 if not self.params.get('consoletitle', False):
390 return
391 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
392 # c_wchar_p() might not be necessary if `message` is
393 # already of type unicode()
394 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
395 elif 'TERM' in os.environ:
396 self._write_string('\033]0;%s\007' % message, self._screen_file)
397
398 def save_console_title(self):
399 if not self.params.get('consoletitle', False):
400 return
401 if 'TERM' in os.environ:
402 # Save the title on stack
403 self._write_string('\033[22;0t', self._screen_file)
404
405 def restore_console_title(self):
406 if not self.params.get('consoletitle', False):
407 return
408 if 'TERM' in os.environ:
409 # Restore the title from stack
410 self._write_string('\033[23;0t', self._screen_file)
411
412 def __enter__(self):
413 self.save_console_title()
414 return self
415
416 def __exit__(self, *args):
417 self.restore_console_title()
418
419 if self.params.get('cookiefile') is not None:
420 self.cookiejar.save()
421
422 def trouble(self, message=None, tb=None):
423 """Determine action to take when a download problem appears.
424
425 Depending on if the downloader has been configured to ignore
426 download errors or not, this method may throw an exception or
427 not when errors are found, after printing the message.
428
429 tb, if given, is additional traceback information.
430 """
431 if message is not None:
432 self.to_stderr(message)
433 if self.params.get('verbose'):
434 if tb is None:
435 if sys.exc_info()[0]: # if .trouble has been called from an except block
436 tb = ''
437 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
438 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
439 tb += compat_str(traceback.format_exc())
440 else:
441 tb_data = traceback.format_list(traceback.extract_stack())
442 tb = ''.join(tb_data)
443 self.to_stderr(tb)
444 if not self.params.get('ignoreerrors', False):
445 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
446 exc_info = sys.exc_info()[1].exc_info
447 else:
448 exc_info = sys.exc_info()
449 raise DownloadError(message, exc_info)
450 self._download_retcode = 1
451
452 def report_warning(self, message):
453 '''
454 Print the message to stderr, it will be prefixed with 'WARNING:'
455 If stderr is a tty file the 'WARNING:' will be colored
456 '''
457 if self.params.get('logger') is not None:
458 self.params['logger'].warning(message)
459 else:
460 if self.params.get('no_warnings'):
461 return
462 if self._err_file.isatty() and os.name != 'nt':
463 _msg_header = '\033[0;33mWARNING:\033[0m'
464 else:
465 _msg_header = 'WARNING:'
466 warning_message = '%s %s' % (_msg_header, message)
467 self.to_stderr(warning_message)
468
469 def report_error(self, message, tb=None):
470 '''
471 Do the same as trouble, but prefixes the message with 'ERROR:', colored
472 in red if stderr is a tty file.
473 '''
474 if self._err_file.isatty() and os.name != 'nt':
475 _msg_header = '\033[0;31mERROR:\033[0m'
476 else:
477 _msg_header = 'ERROR:'
478 error_message = '%s %s' % (_msg_header, message)
479 self.trouble(error_message, tb)
480
481 def report_file_already_downloaded(self, file_name):
482 """Report file has already been fully downloaded."""
483 try:
484 self.to_screen('[download] %s has already been downloaded' % file_name)
485 except UnicodeEncodeError:
486 self.to_screen('[download] The file has already been downloaded')
487
488 def prepare_filename(self, info_dict):
489 """Generate the output filename."""
490 try:
491 template_dict = dict(info_dict)
492
493 template_dict['epoch'] = int(time.time())
494 autonumber_size = self.params.get('autonumber_size')
495 if autonumber_size is None:
496 autonumber_size = 5
497 autonumber_templ = '%0' + str(autonumber_size) + 'd'
498 template_dict['autonumber'] = autonumber_templ % self._num_downloads
499 if template_dict.get('playlist_index') is not None:
500 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
501 if template_dict.get('resolution') is None:
502 if template_dict.get('width') and template_dict.get('height'):
503 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
504 elif template_dict.get('height'):
505 template_dict['resolution'] = '%sp' % template_dict['height']
506 elif template_dict.get('width'):
507 template_dict['resolution'] = '?x%d' % template_dict['width']
508
509 sanitize = lambda k, v: sanitize_filename(
510 compat_str(v),
511 restricted=self.params.get('restrictfilenames'),
512 is_id=(k == 'id'))
513 template_dict = dict((k, sanitize(k, v))
514 for k, v in template_dict.items()
515 if v is not None)
516 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
517
518 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
519 tmpl = compat_expanduser(outtmpl)
520 filename = tmpl % template_dict
521 return filename
522 except ValueError as err:
523 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
524 return None
525
526 def _match_entry(self, info_dict):
527 """ Returns None iff the file should be downloaded """
528
529 video_title = info_dict.get('title', info_dict.get('id', 'video'))
530 if 'title' in info_dict:
531 # This can happen when we're just evaluating the playlist
532 title = info_dict['title']
533 matchtitle = self.params.get('matchtitle', False)
534 if matchtitle:
535 if not re.search(matchtitle, title, re.IGNORECASE):
536 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
537 rejecttitle = self.params.get('rejecttitle', False)
538 if rejecttitle:
539 if re.search(rejecttitle, title, re.IGNORECASE):
540 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
541 date = info_dict.get('upload_date', None)
542 if date is not None:
543 dateRange = self.params.get('daterange', DateRange())
544 if date not in dateRange:
545 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
546 view_count = info_dict.get('view_count', None)
547 if view_count is not None:
548 min_views = self.params.get('min_views')
549 if min_views is not None and view_count < min_views:
550 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
551 max_views = self.params.get('max_views')
552 if max_views is not None and view_count > max_views:
553 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
554 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
555 return 'Skipping "%s" because it is age restricted' % title
556 if self.in_download_archive(info_dict):
557 return '%s has already been recorded in archive' % video_title
558 return None
559
560 @staticmethod
561 def add_extra_info(info_dict, extra_info):
562 '''Set the keys from extra_info in info dict if they are missing'''
563 for key, value in extra_info.items():
564 info_dict.setdefault(key, value)
565
566 def extract_info(self, url, download=True, ie_key=None, extra_info={},
567 process=True):
568 '''
569 Returns a list with a dictionary for each video we find.
570 If 'download', also downloads the videos.
571 extra_info is a dict containing the extra values to add to each result
572 '''
573
574 if ie_key:
575 ies = [self.get_info_extractor(ie_key)]
576 else:
577 ies = self._ies
578
579 for ie in ies:
580 if not ie.suitable(url):
581 continue
582
583 if not ie.working():
584 self.report_warning('The program functionality for this site has been marked as broken, '
585 'and will probably not work.')
586
587 try:
588 ie_result = ie.extract(url)
589 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
590 break
591 if isinstance(ie_result, list):
592 # Backwards compatibility: old IE result format
593 ie_result = {
594 '_type': 'compat_list',
595 'entries': ie_result,
596 }
597 self.add_default_extra_info(ie_result, ie, url)
598 if process:
599 return self.process_ie_result(ie_result, download, extra_info)
600 else:
601 return ie_result
602 except ExtractorError as de: # An error we somewhat expected
603 self.report_error(compat_str(de), de.format_traceback())
604 break
605 except MaxDownloadsReached:
606 raise
607 except Exception as e:
608 if self.params.get('ignoreerrors', False):
609 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
610 break
611 else:
612 raise
613 else:
614 self.report_error('no suitable InfoExtractor for URL %s' % url)
615
616 def add_default_extra_info(self, ie_result, ie, url):
617 self.add_extra_info(ie_result, {
618 'extractor': ie.IE_NAME,
619 'webpage_url': url,
620 'webpage_url_basename': url_basename(url),
621 'extractor_key': ie.ie_key(),
622 })
623
624 def process_ie_result(self, ie_result, download=True, extra_info={}):
625 """
626 Take the result of the ie(may be modified) and resolve all unresolved
627 references (URLs, playlist items).
628
629 It will also download the videos if 'download'.
630 Returns the resolved ie_result.
631 """
632
633 result_type = ie_result.get('_type', 'video')
634
635 if result_type in ('url', 'url_transparent'):
636 extract_flat = self.params.get('extract_flat', False)
637 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
638 extract_flat is True):
639 if self.params.get('forcejson', False):
640 self.to_stdout(json.dumps(ie_result))
641 return ie_result
642
643 if result_type == 'video':
644 self.add_extra_info(ie_result, extra_info)
645 return self.process_video_result(ie_result, download=download)
646 elif result_type == 'url':
647 # We have to add extra_info to the results because it may be
648 # contained in a playlist
649 return self.extract_info(ie_result['url'],
650 download,
651 ie_key=ie_result.get('ie_key'),
652 extra_info=extra_info)
653 elif result_type == 'url_transparent':
654 # Use the information from the embedding page
655 info = self.extract_info(
656 ie_result['url'], ie_key=ie_result.get('ie_key'),
657 extra_info=extra_info, download=False, process=False)
658
659 force_properties = dict(
660 (k, v) for k, v in ie_result.items() if v is not None)
661 for f in ('_type', 'url'):
662 if f in force_properties:
663 del force_properties[f]
664 new_result = info.copy()
665 new_result.update(force_properties)
666
667 assert new_result.get('_type') != 'url_transparent'
668
669 return self.process_ie_result(
670 new_result, download=download, extra_info=extra_info)
671 elif result_type == 'playlist' or result_type == 'multi_video':
672 # We process each entry in the playlist
673 playlist = ie_result.get('title', None) or ie_result.get('id', None)
674 self.to_screen('[download] Downloading playlist: %s' % playlist)
675
676 playlist_results = []
677
678 playliststart = self.params.get('playliststart', 1) - 1
679 playlistend = self.params.get('playlistend', None)
680 # For backwards compatibility, interpret -1 as whole list
681 if playlistend == -1:
682 playlistend = None
683
684 ie_entries = ie_result['entries']
685 if isinstance(ie_entries, list):
686 n_all_entries = len(ie_entries)
687 entries = ie_entries[playliststart:playlistend]
688 n_entries = len(entries)
689 self.to_screen(
690 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
691 (ie_result['extractor'], playlist, n_all_entries, n_entries))
692 elif isinstance(ie_entries, PagedList):
693 entries = ie_entries.getslice(
694 playliststart, playlistend)
695 n_entries = len(entries)
696 self.to_screen(
697 "[%s] playlist %s: Downloading %d videos" %
698 (ie_result['extractor'], playlist, n_entries))
699 else: # iterable
700 entries = list(itertools.islice(
701 ie_entries, playliststart, playlistend))
702 n_entries = len(entries)
703 self.to_screen(
704 "[%s] playlist %s: Downloading %d videos" %
705 (ie_result['extractor'], playlist, n_entries))
706
707 if self.params.get('playlistreverse', False):
708 entries = entries[::-1]
709
710 for i, entry in enumerate(entries, 1):
711 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
712 extra = {
713 'n_entries': n_entries,
714 'playlist': playlist,
715 'playlist_id': ie_result.get('id'),
716 'playlist_title': ie_result.get('title'),
717 'playlist_index': i + playliststart,
718 'extractor': ie_result['extractor'],
719 'webpage_url': ie_result['webpage_url'],
720 'webpage_url_basename': url_basename(ie_result['webpage_url']),
721 'extractor_key': ie_result['extractor_key'],
722 }
723
724 reason = self._match_entry(entry)
725 if reason is not None:
726 self.to_screen('[download] ' + reason)
727 continue
728
729 entry_result = self.process_ie_result(entry,
730 download=download,
731 extra_info=extra)
732 playlist_results.append(entry_result)
733 ie_result['entries'] = playlist_results
734 return ie_result
735 elif result_type == 'compat_list':
736 self.report_warning(
737 'Extractor %s returned a compat_list result. '
738 'It needs to be updated.' % ie_result.get('extractor'))
739
740 def _fixup(r):
741 self.add_extra_info(
742 r,
743 {
744 'extractor': ie_result['extractor'],
745 'webpage_url': ie_result['webpage_url'],
746 'webpage_url_basename': url_basename(ie_result['webpage_url']),
747 'extractor_key': ie_result['extractor_key'],
748 }
749 )
750 return r
751 ie_result['entries'] = [
752 self.process_ie_result(_fixup(r), download, extra_info)
753 for r in ie_result['entries']
754 ]
755 return ie_result
756 else:
757 raise Exception('Invalid result type: %s' % result_type)
758
759 def select_format(self, format_spec, available_formats):
760 if format_spec == 'best' or format_spec is None:
761 return available_formats[-1]
762 elif format_spec == 'worst':
763 return available_formats[0]
764 elif format_spec == 'bestaudio':
765 audio_formats = [
766 f for f in available_formats
767 if f.get('vcodec') == 'none']
768 if audio_formats:
769 return audio_formats[-1]
770 elif format_spec == 'worstaudio':
771 audio_formats = [
772 f for f in available_formats
773 if f.get('vcodec') == 'none']
774 if audio_formats:
775 return audio_formats[0]
776 elif format_spec == 'bestvideo':
777 video_formats = [
778 f for f in available_formats
779 if f.get('acodec') == 'none']
780 if video_formats:
781 return video_formats[-1]
782 elif format_spec == 'worstvideo':
783 video_formats = [
784 f for f in available_formats
785 if f.get('acodec') == 'none']
786 if video_formats:
787 return video_formats[0]
788 else:
789 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
790 if format_spec in extensions:
791 filter_f = lambda f: f['ext'] == format_spec
792 else:
793 filter_f = lambda f: f['format_id'] == format_spec
794 matches = list(filter(filter_f, available_formats))
795 if matches:
796 return matches[-1]
797 return None
798
799 def process_video_result(self, info_dict, download=True):
800 assert info_dict.get('_type', 'video') == 'video'
801
802 if 'id' not in info_dict:
803 raise ExtractorError('Missing "id" field in extractor result')
804 if 'title' not in info_dict:
805 raise ExtractorError('Missing "title" field in extractor result')
806
807 if 'playlist' not in info_dict:
808 # It isn't part of a playlist
809 info_dict['playlist'] = None
810 info_dict['playlist_index'] = None
811
812 thumbnails = info_dict.get('thumbnails')
813 if thumbnails:
814 thumbnails.sort(key=lambda t: (
815 t.get('width'), t.get('height'), t.get('url')))
816 for t in thumbnails:
817 if 'width' in t and 'height' in t:
818 t['resolution'] = '%dx%d' % (t['width'], t['height'])
819
820 if thumbnails and 'thumbnail' not in info_dict:
821 info_dict['thumbnail'] = thumbnails[-1]['url']
822
823 if 'display_id' not in info_dict and 'id' in info_dict:
824 info_dict['display_id'] = info_dict['id']
825
826 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
827 # Working around negative timestamps in Windows
828 # (see http://bugs.python.org/issue1646728)
829 if info_dict['timestamp'] < 0 and os.name == 'nt':
830 info_dict['timestamp'] = 0
831 upload_date = datetime.datetime.utcfromtimestamp(
832 info_dict['timestamp'])
833 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
834
835 # This extractors handle format selection themselves
836 if info_dict['extractor'] in ['Youku']:
837 if download:
838 self.process_info(info_dict)
839 return info_dict
840
841 # We now pick which formats have to be downloaded
842 if info_dict.get('formats') is None:
843 # There's only one format available
844 formats = [info_dict]
845 else:
846 formats = info_dict['formats']
847
848 if not formats:
849 raise ExtractorError('No video formats found!')
850
851 # We check that all the formats have the format and format_id fields
852 for i, format in enumerate(formats):
853 if 'url' not in format:
854 raise ExtractorError('Missing "url" key in result (index %d)' % i)
855
856 if format.get('format_id') is None:
857 format['format_id'] = compat_str(i)
858 if format.get('format') is None:
859 format['format'] = '{id} - {res}{note}'.format(
860 id=format['format_id'],
861 res=self.format_resolution(format),
862 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
863 )
864 # Automatically determine file extension if missing
865 if 'ext' not in format:
866 format['ext'] = determine_ext(format['url']).lower()
867
868 format_limit = self.params.get('format_limit', None)
869 if format_limit:
870 formats = list(takewhile_inclusive(
871 lambda f: f['format_id'] != format_limit, formats
872 ))
873
874 # TODO Central sorting goes here
875
876 if formats[0] is not info_dict:
877 # only set the 'formats' fields if the original info_dict list them
878 # otherwise we end up with a circular reference, the first (and unique)
879 # element in the 'formats' field in info_dict is info_dict itself,
880 # wich can't be exported to json
881 info_dict['formats'] = formats
882 if self.params.get('listformats', None):
883 self.list_formats(info_dict)
884 return
885
886 req_format = self.params.get('format')
887 if req_format is None:
888 req_format = 'best'
889 formats_to_download = []
890 # The -1 is for supporting YoutubeIE
891 if req_format in ('-1', 'all'):
892 formats_to_download = formats
893 else:
894 for rfstr in req_format.split(','):
895 # We can accept formats requested in the format: 34/5/best, we pick
896 # the first that is available, starting from left
897 req_formats = rfstr.split('/')
898 for rf in req_formats:
899 if re.match(r'.+?\+.+?', rf) is not None:
900 # Two formats have been requested like '137+139'
901 format_1, format_2 = rf.split('+')
902 formats_info = (self.select_format(format_1, formats),
903 self.select_format(format_2, formats))
904 if all(formats_info):
905 # The first format must contain the video and the
906 # second the audio
907 if formats_info[0].get('vcodec') == 'none':
908 self.report_error('The first format must '
909 'contain the video, try using '
910 '"-f %s+%s"' % (format_2, format_1))
911 return
912 selected_format = {
913 'requested_formats': formats_info,
914 'format': rf,
915 'ext': self.params['merge_output_format'] if self.params['merge_output_format'] is not None else formats_info[0]['ext'],
916 }
917 else:
918 selected_format = None
919 else:
920 selected_format = self.select_format(rf, formats)
921 if selected_format is not None:
922 formats_to_download.append(selected_format)
923 break
924 if not formats_to_download:
925 raise ExtractorError('requested format not available',
926 expected=True)
927
928 if download:
929 if len(formats_to_download) > 1:
930 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
931 for format in formats_to_download:
932 new_info = dict(info_dict)
933 new_info.update(format)
934 self.process_info(new_info)
935 # We update the info dict with the best quality format (backwards compatibility)
936 info_dict.update(formats_to_download[-1])
937 return info_dict
938
939 def process_info(self, info_dict):
940 """Process a single resolved IE result."""
941
942 assert info_dict.get('_type', 'video') == 'video'
943
944 max_downloads = self.params.get('max_downloads')
945 if max_downloads is not None:
946 if self._num_downloads >= int(max_downloads):
947 raise MaxDownloadsReached()
948
949 info_dict['fulltitle'] = info_dict['title']
950 if len(info_dict['title']) > 200:
951 info_dict['title'] = info_dict['title'][:197] + '...'
952
953 # Keep for backwards compatibility
954 info_dict['stitle'] = info_dict['title']
955
956 if 'format' not in info_dict:
957 info_dict['format'] = info_dict['ext']
958
959 reason = self._match_entry(info_dict)
960 if reason is not None:
961 self.to_screen('[download] ' + reason)
962 return
963
964 self._num_downloads += 1
965
966 filename = self.prepare_filename(info_dict)
967
968 # Forced printings
969 if self.params.get('forcetitle', False):
970 self.to_stdout(info_dict['fulltitle'])
971 if self.params.get('forceid', False):
972 self.to_stdout(info_dict['id'])
973 if self.params.get('forceurl', False):
974 if info_dict.get('requested_formats') is not None:
975 for f in info_dict['requested_formats']:
976 self.to_stdout(f['url'] + f.get('play_path', ''))
977 else:
978 # For RTMP URLs, also include the playpath
979 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
980 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
981 self.to_stdout(info_dict['thumbnail'])
982 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
983 self.to_stdout(info_dict['description'])
984 if self.params.get('forcefilename', False) and filename is not None:
985 self.to_stdout(filename)
986 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
987 self.to_stdout(formatSeconds(info_dict['duration']))
988 if self.params.get('forceformat', False):
989 self.to_stdout(info_dict['format'])
990 if self.params.get('forcejson', False):
991 info_dict['_filename'] = filename
992 self.to_stdout(json.dumps(info_dict))
993 if self.params.get('dump_single_json', False):
994 info_dict['_filename'] = filename
995
996 # Do nothing else if in simulate mode
997 if self.params.get('simulate', False):
998 return
999
1000 if filename is None:
1001 return
1002
1003 try:
1004 dn = os.path.dirname(encodeFilename(filename))
1005 if dn and not os.path.exists(dn):
1006 os.makedirs(dn)
1007 except (OSError, IOError) as err:
1008 self.report_error('unable to create directory ' + compat_str(err))
1009 return
1010
1011 if self.params.get('writedescription', False):
1012 descfn = filename + '.description'
1013 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1014 self.to_screen('[info] Video description is already present')
1015 elif info_dict.get('description') is None:
1016 self.report_warning('There\'s no description to write.')
1017 else:
1018 try:
1019 self.to_screen('[info] Writing video description to: ' + descfn)
1020 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1021 descfile.write(info_dict['description'])
1022 except (OSError, IOError):
1023 self.report_error('Cannot write description file ' + descfn)
1024 return
1025
1026 if self.params.get('writeannotations', False):
1027 annofn = filename + '.annotations.xml'
1028 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1029 self.to_screen('[info] Video annotations are already present')
1030 else:
1031 try:
1032 self.to_screen('[info] Writing video annotations to: ' + annofn)
1033 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1034 annofile.write(info_dict['annotations'])
1035 except (KeyError, TypeError):
1036 self.report_warning('There are no annotations to write.')
1037 except (OSError, IOError):
1038 self.report_error('Cannot write annotations file: ' + annofn)
1039 return
1040
1041 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1042 self.params.get('writeautomaticsub')])
1043
1044 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1045 # subtitles download errors are already managed as troubles in relevant IE
1046 # that way it will silently go on when used with unsupporting IE
1047 subtitles = info_dict['subtitles']
1048 sub_format = self.params.get('subtitlesformat', 'srt')
1049 for sub_lang in subtitles.keys():
1050 sub = subtitles[sub_lang]
1051 if sub is None:
1052 continue
1053 try:
1054 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1055 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1056 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1057 else:
1058 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1059 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1060 subfile.write(sub)
1061 except (OSError, IOError):
1062 self.report_error('Cannot write subtitles file ' + sub_filename)
1063 return
1064
1065 if self.params.get('writeinfojson', False):
1066 infofn = os.path.splitext(filename)[0] + '.info.json'
1067 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1068 self.to_screen('[info] Video description metadata is already present')
1069 else:
1070 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1071 try:
1072 write_json_file(info_dict, infofn)
1073 except (OSError, IOError):
1074 self.report_error('Cannot write metadata to JSON file ' + infofn)
1075 return
1076
1077 if self.params.get('writethumbnail', False):
1078 if info_dict.get('thumbnail') is not None:
1079 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1080 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1081 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1082 self.to_screen('[%s] %s: Thumbnail is already present' %
1083 (info_dict['extractor'], info_dict['id']))
1084 else:
1085 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1086 (info_dict['extractor'], info_dict['id']))
1087 try:
1088 uf = self.urlopen(info_dict['thumbnail'])
1089 with open(thumb_filename, 'wb') as thumbf:
1090 shutil.copyfileobj(uf, thumbf)
1091 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1092 (info_dict['extractor'], info_dict['id'], thumb_filename))
1093 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1094 self.report_warning('Unable to download thumbnail "%s": %s' %
1095 (info_dict['thumbnail'], compat_str(err)))
1096
1097 if not self.params.get('skip_download', False):
1098 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1099 success = True
1100 else:
1101 try:
1102 def dl(name, info):
1103 fd = get_suitable_downloader(info)(self, self.params)
1104 for ph in self._progress_hooks:
1105 fd.add_progress_hook(ph)
1106 if self.params.get('verbose'):
1107 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1108 return fd.download(name, info)
1109 if info_dict.get('requested_formats') is not None:
1110 downloaded = []
1111 success = True
1112 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1113 if not merger._executable:
1114 postprocessors = []
1115 self.report_warning('You have requested multiple '
1116 'formats but ffmpeg or avconv are not installed.'
1117 ' The formats won\'t be merged')
1118 else:
1119 postprocessors = [merger]
1120 for f in info_dict['requested_formats']:
1121 new_info = dict(info_dict)
1122 new_info.update(f)
1123 fname = self.prepare_filename(new_info)
1124 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1125 downloaded.append(fname)
1126 partial_success = dl(fname, new_info)
1127 success = success and partial_success
1128 info_dict['__postprocessors'] = postprocessors
1129 info_dict['__files_to_merge'] = downloaded
1130 else:
1131 # Just a single file
1132 success = dl(filename, info_dict)
1133 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1134 self.report_error('unable to download video data: %s' % str(err))
1135 return
1136 except (OSError, IOError) as err:
1137 raise UnavailableVideoError(err)
1138 except (ContentTooShortError, ) as err:
1139 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1140 return
1141
1142 if success:
1143 try:
1144 self.post_process(filename, info_dict)
1145 except (PostProcessingError) as err:
1146 self.report_error('postprocessing: %s' % str(err))
1147 return
1148 self.record_download_archive(info_dict)
1149
1150 def download(self, url_list):
1151 """Download a given list of URLs."""
1152 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1153 if (len(url_list) > 1 and
1154 '%' not in outtmpl
1155 and self.params.get('max_downloads') != 1):
1156 raise SameFileError(outtmpl)
1157
1158 for url in url_list:
1159 try:
1160 # It also downloads the videos
1161 res = self.extract_info(url)
1162 except UnavailableVideoError:
1163 self.report_error('unable to download video')
1164 except MaxDownloadsReached:
1165 self.to_screen('[info] Maximum number of downloaded files reached.')
1166 raise
1167 else:
1168 if self.params.get('dump_single_json', False):
1169 self.to_stdout(json.dumps(res))
1170
1171 return self._download_retcode
1172
1173 def download_with_info_file(self, info_filename):
1174 with io.open(info_filename, 'r', encoding='utf-8') as f:
1175 info = json.load(f)
1176 try:
1177 self.process_ie_result(info, download=True)
1178 except DownloadError:
1179 webpage_url = info.get('webpage_url')
1180 if webpage_url is not None:
1181 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1182 return self.download([webpage_url])
1183 else:
1184 raise
1185 return self._download_retcode
1186
1187 def post_process(self, filename, ie_info):
1188 """Run all the postprocessors on the given file."""
1189 info = dict(ie_info)
1190 info['filepath'] = filename
1191 keep_video = None
1192 pps_chain = []
1193 if ie_info.get('__postprocessors') is not None:
1194 pps_chain.extend(ie_info['__postprocessors'])
1195 pps_chain.extend(self._pps)
1196 for pp in pps_chain:
1197 try:
1198 keep_video_wish, new_info = pp.run(info)
1199 if keep_video_wish is not None:
1200 if keep_video_wish:
1201 keep_video = keep_video_wish
1202 elif keep_video is None:
1203 # No clear decision yet, let IE decide
1204 keep_video = keep_video_wish
1205 except PostProcessingError as e:
1206 self.report_error(e.msg)
1207 if keep_video is False and not self.params.get('keepvideo', False):
1208 try:
1209 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1210 os.remove(encodeFilename(filename))
1211 except (IOError, OSError):
1212 self.report_warning('Unable to remove downloaded video file')
1213
1214 def _make_archive_id(self, info_dict):
1215 # Future-proof against any change in case
1216 # and backwards compatibility with prior versions
1217 extractor = info_dict.get('extractor_key')
1218 if extractor is None:
1219 if 'id' in info_dict:
1220 extractor = info_dict.get('ie_key') # key in a playlist
1221 if extractor is None:
1222 return None # Incomplete video information
1223 return extractor.lower() + ' ' + info_dict['id']
1224
1225 def in_download_archive(self, info_dict):
1226 fn = self.params.get('download_archive')
1227 if fn is None:
1228 return False
1229
1230 vid_id = self._make_archive_id(info_dict)
1231 if vid_id is None:
1232 return False # Incomplete video information
1233
1234 try:
1235 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1236 for line in archive_file:
1237 if line.strip() == vid_id:
1238 return True
1239 except IOError as ioe:
1240 if ioe.errno != errno.ENOENT:
1241 raise
1242 return False
1243
1244 def record_download_archive(self, info_dict):
1245 fn = self.params.get('download_archive')
1246 if fn is None:
1247 return
1248 vid_id = self._make_archive_id(info_dict)
1249 assert vid_id
1250 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1251 archive_file.write(vid_id + '\n')
1252
1253 @staticmethod
1254 def format_resolution(format, default='unknown'):
1255 if format.get('vcodec') == 'none':
1256 return 'audio only'
1257 if format.get('resolution') is not None:
1258 return format['resolution']
1259 if format.get('height') is not None:
1260 if format.get('width') is not None:
1261 res = '%sx%s' % (format['width'], format['height'])
1262 else:
1263 res = '%sp' % format['height']
1264 elif format.get('width') is not None:
1265 res = '?x%d' % format['width']
1266 else:
1267 res = default
1268 return res
1269
1270 def _format_note(self, fdict):
1271 res = ''
1272 if fdict.get('ext') in ['f4f', 'f4m']:
1273 res += '(unsupported) '
1274 if fdict.get('format_note') is not None:
1275 res += fdict['format_note'] + ' '
1276 if fdict.get('tbr') is not None:
1277 res += '%4dk ' % fdict['tbr']
1278 if fdict.get('container') is not None:
1279 if res:
1280 res += ', '
1281 res += '%s container' % fdict['container']
1282 if (fdict.get('vcodec') is not None and
1283 fdict.get('vcodec') != 'none'):
1284 if res:
1285 res += ', '
1286 res += fdict['vcodec']
1287 if fdict.get('vbr') is not None:
1288 res += '@'
1289 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1290 res += 'video@'
1291 if fdict.get('vbr') is not None:
1292 res += '%4dk' % fdict['vbr']
1293 if fdict.get('fps') is not None:
1294 res += ', %sfps' % fdict['fps']
1295 if fdict.get('acodec') is not None:
1296 if res:
1297 res += ', '
1298 if fdict['acodec'] == 'none':
1299 res += 'video only'
1300 else:
1301 res += '%-5s' % fdict['acodec']
1302 elif fdict.get('abr') is not None:
1303 if res:
1304 res += ', '
1305 res += 'audio'
1306 if fdict.get('abr') is not None:
1307 res += '@%3dk' % fdict['abr']
1308 if fdict.get('asr') is not None:
1309 res += ' (%5dHz)' % fdict['asr']
1310 if fdict.get('filesize') is not None:
1311 if res:
1312 res += ', '
1313 res += format_bytes(fdict['filesize'])
1314 elif fdict.get('filesize_approx') is not None:
1315 if res:
1316 res += ', '
1317 res += '~' + format_bytes(fdict['filesize_approx'])
1318 return res
1319
1320 def list_formats(self, info_dict):
1321 def line(format, idlen=20):
1322 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1323 format['format_id'],
1324 format['ext'],
1325 self.format_resolution(format),
1326 self._format_note(format),
1327 ))
1328
1329 formats = info_dict.get('formats', [info_dict])
1330 idlen = max(len('format code'),
1331 max(len(f['format_id']) for f in formats))
1332 formats_s = [
1333 line(f, idlen) for f in formats
1334 if f.get('preference') is None or f['preference'] >= -1000]
1335 if len(formats) > 1:
1336 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1337 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1338
1339 header_line = line({
1340 'format_id': 'format code', 'ext': 'extension',
1341 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1342 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1343 (info_dict['id'], header_line, '\n'.join(formats_s)))
1344
1345 def urlopen(self, req):
1346 """ Start an HTTP download """
1347
1348 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1349 # always respected by websites, some tend to give out URLs with non percent-encoded
1350 # non-ASCII characters (see telemb.py, ard.py [#3412])
1351 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1352 # To work around aforementioned issue we will replace request's original URL with
1353 # percent-encoded one
1354 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1355 url = req if req_is_string else req.get_full_url()
1356 url_escaped = escape_url(url)
1357
1358 # Substitute URL if any change after escaping
1359 if url != url_escaped:
1360 if req_is_string:
1361 req = url_escaped
1362 else:
1363 req = compat_urllib_request.Request(
1364 url_escaped, data=req.data, headers=req.headers,
1365 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1366
1367 return self._opener.open(req, timeout=self._socket_timeout)
1368
1369 def print_debug_header(self):
1370 if not self.params.get('verbose'):
1371 return
1372
1373 if type('') is not compat_str:
1374 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1375 self.report_warning(
1376 'Your Python is broken! Update to a newer and supported version')
1377
1378 stdout_encoding = getattr(
1379 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1380 encoding_str = (
1381 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1382 locale.getpreferredencoding(),
1383 sys.getfilesystemencoding(),
1384 stdout_encoding,
1385 self.get_encoding()))
1386 write_string(encoding_str, encoding=None)
1387
1388 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1389 try:
1390 sp = subprocess.Popen(
1391 ['git', 'rev-parse', '--short', 'HEAD'],
1392 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1393 cwd=os.path.dirname(os.path.abspath(__file__)))
1394 out, err = sp.communicate()
1395 out = out.decode().strip()
1396 if re.match('[0-9a-f]+', out):
1397 self._write_string('[debug] Git HEAD: ' + out + '\n')
1398 except:
1399 try:
1400 sys.exc_clear()
1401 except:
1402 pass
1403 self._write_string('[debug] Python version %s - %s\n' % (
1404 platform.python_version(), platform_name()))
1405
1406 exe_versions = FFmpegPostProcessor.get_versions()
1407 exe_versions['rtmpdump'] = rtmpdump_version()
1408 exe_str = ', '.join(
1409 '%s %s' % (exe, v)
1410 for exe, v in sorted(exe_versions.items())
1411 if v
1412 )
1413 if not exe_str:
1414 exe_str = 'none'
1415 self._write_string('[debug] exe versions: %s\n' % exe_str)
1416
1417 proxy_map = {}
1418 for handler in self._opener.handlers:
1419 if hasattr(handler, 'proxies'):
1420 proxy_map.update(handler.proxies)
1421 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1422
1423 def _setup_opener(self):
1424 timeout_val = self.params.get('socket_timeout')
1425 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1426
1427 opts_cookiefile = self.params.get('cookiefile')
1428 opts_proxy = self.params.get('proxy')
1429
1430 if opts_cookiefile is None:
1431 self.cookiejar = compat_cookiejar.CookieJar()
1432 else:
1433 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1434 opts_cookiefile)
1435 if os.access(opts_cookiefile, os.R_OK):
1436 self.cookiejar.load()
1437
1438 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1439 self.cookiejar)
1440 if opts_proxy is not None:
1441 if opts_proxy == '':
1442 proxies = {}
1443 else:
1444 proxies = {'http': opts_proxy, 'https': opts_proxy}
1445 else:
1446 proxies = compat_urllib_request.getproxies()
1447 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1448 if 'http' in proxies and 'https' not in proxies:
1449 proxies['https'] = proxies['http']
1450 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1451
1452 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1453 https_handler = make_HTTPS_handler(
1454 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1455 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1456 opener = compat_urllib_request.build_opener(
1457 https_handler, proxy_handler, cookie_processor, ydlh)
1458 # Delete the default user-agent header, which would otherwise apply in
1459 # cases where our custom HTTP handler doesn't come into play
1460 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1461 opener.addheaders = []
1462 self._opener = opener
1463
1464 def encode(self, s):
1465 if isinstance(s, bytes):
1466 return s # Already encoded
1467
1468 try:
1469 return s.encode(self.get_encoding())
1470 except UnicodeEncodeError as err:
1471 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1472 raise
1473
1474 def get_encoding(self):
1475 encoding = self.params.get('encoding')
1476 if encoding is None:
1477 encoding = preferredencoding()
1478 return encoding