]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[compat] Add compat_urllib_request_Request
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28 import ctypes
29
30 from .compat import (
31 compat_cookiejar,
32 compat_expanduser,
33 compat_get_terminal_size,
34 compat_http_client,
35 compat_kwargs,
36 compat_str,
37 compat_tokenize_tokenize,
38 compat_urllib_error,
39 compat_urllib_request,
40 compat_urllib_request_DataHandler,
41 )
42 from .utils import (
43 ContentTooShortError,
44 date_from_str,
45 DateRange,
46 DEFAULT_OUTTMPL,
47 determine_ext,
48 DownloadError,
49 encodeFilename,
50 ExtractorError,
51 format_bytes,
52 formatSeconds,
53 locked_file,
54 make_HTTPS_handler,
55 MaxDownloadsReached,
56 PagedList,
57 parse_filesize,
58 PerRequestProxyHandler,
59 PostProcessingError,
60 platform_name,
61 preferredencoding,
62 render_table,
63 SameFileError,
64 sanitize_filename,
65 sanitize_path,
66 std_headers,
67 subtitles_filename,
68 UnavailableVideoError,
69 url_basename,
70 version_tuple,
71 write_json_file,
72 write_string,
73 YoutubeDLCookieProcessor,
74 YoutubeDLHandler,
75 prepend_extension,
76 replace_extension,
77 args_to_str,
78 age_restricted,
79 )
80 from .cache import Cache
81 from .extractor import get_info_extractor, gen_extractors
82 from .downloader import get_suitable_downloader
83 from .downloader.rtmp import rtmpdump_version
84 from .postprocessor import (
85 FFmpegFixupM4aPP,
86 FFmpegFixupStretchedPP,
87 FFmpegMergerPP,
88 FFmpegPostProcessor,
89 get_postprocessor,
90 )
91 from .version import __version__
92
93
94 class YoutubeDL(object):
95 """YoutubeDL class.
96
97 YoutubeDL objects are the ones responsible of downloading the
98 actual video file and writing it to disk if the user has requested
99 it, among some other tasks. In most cases there should be one per
100 program. As, given a video URL, the downloader doesn't know how to
101 extract all the needed information, task that InfoExtractors do, it
102 has to pass the URL to one of them.
103
104 For this, YoutubeDL objects have a method that allows
105 InfoExtractors to be registered in a given order. When it is passed
106 a URL, the YoutubeDL object handles it to the first InfoExtractor it
107 finds that reports being able to handle it. The InfoExtractor extracts
108 all the information about the video or videos the URL refers to, and
109 YoutubeDL process the extracted information, possibly using a File
110 Downloader to download the video.
111
112 YoutubeDL objects accept a lot of parameters. In order not to saturate
113 the object constructor with arguments, it receives a dictionary of
114 options instead. These options are available through the params
115 attribute for the InfoExtractors to use. The YoutubeDL also
116 registers itself as the downloader in charge for the InfoExtractors
117 that are added to it, so this is a "mutual registration".
118
119 Available options:
120
121 username: Username for authentication purposes.
122 password: Password for authentication purposes.
123 videopassword: Password for accessing a video.
124 usenetrc: Use netrc for authentication instead.
125 verbose: Print additional info to stdout.
126 quiet: Do not print messages to stdout.
127 no_warnings: Do not print out anything for warnings.
128 forceurl: Force printing final URL.
129 forcetitle: Force printing title.
130 forceid: Force printing ID.
131 forcethumbnail: Force printing thumbnail URL.
132 forcedescription: Force printing description.
133 forcefilename: Force printing final filename.
134 forceduration: Force printing duration.
135 forcejson: Force printing info_dict as JSON.
136 dump_single_json: Force printing the info_dict of the whole playlist
137 (or video) as a single JSON line.
138 simulate: Do not download the video files.
139 format: Video format code. See options.py for more information.
140 outtmpl: Template for output names.
141 restrictfilenames: Do not allow "&" and spaces in file names
142 ignoreerrors: Do not stop on download errors.
143 force_generic_extractor: Force downloader to use the generic extractor
144 nooverwrites: Prevent overwriting files.
145 playliststart: Playlist item to start at.
146 playlistend: Playlist item to end at.
147 playlist_items: Specific indices of playlist to download.
148 playlistreverse: Download playlist items in reverse order.
149 matchtitle: Download only matching titles.
150 rejecttitle: Reject downloads for matching titles.
151 logger: Log messages to a logging.Logger instance.
152 logtostderr: Log messages to stderr instead of stdout.
153 writedescription: Write the video description to a .description file
154 writeinfojson: Write the video description to a .info.json file
155 writeannotations: Write the video annotations to a .annotations.xml file
156 writethumbnail: Write the thumbnail image to a file
157 write_all_thumbnails: Write all thumbnail formats to files
158 writesubtitles: Write the video subtitles to a file
159 writeautomaticsub: Write the automatically generated subtitles to a file
160 allsubtitles: Downloads all the subtitles of the video
161 (requires writesubtitles or writeautomaticsub)
162 listsubtitles: Lists all available subtitles for the video
163 subtitlesformat: The format code for subtitles
164 subtitleslangs: List of languages of the subtitles to download
165 keepvideo: Keep the video file after post-processing
166 daterange: A DateRange object, download only if the upload_date is in the range.
167 skip_download: Skip the actual download of the video file
168 cachedir: Location of the cache files in the filesystem.
169 False to disable filesystem cache.
170 noplaylist: Download single video instead of a playlist if in doubt.
171 age_limit: An integer representing the user's age in years.
172 Unsuitable videos for the given age are skipped.
173 min_views: An integer representing the minimum view count the video
174 must have in order to not be skipped.
175 Videos without view count information are always
176 downloaded. None for no limit.
177 max_views: An integer representing the maximum view count.
178 Videos that are more popular than that are not
179 downloaded.
180 Videos without view count information are always
181 downloaded. None for no limit.
182 download_archive: File name of a file where all downloads are recorded.
183 Videos already present in the file are not downloaded
184 again.
185 cookiefile: File name where cookies should be read from and dumped to.
186 nocheckcertificate:Do not verify SSL certificates
187 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
188 At the moment, this is only supported by YouTube.
189 proxy: URL of the proxy server to use
190 cn_verification_proxy: URL of the proxy to use for IP address verification
191 on Chinese sites. (Experimental)
192 socket_timeout: Time to wait for unresponsive hosts, in seconds
193 bidi_workaround: Work around buggy terminals without bidirectional text
194 support, using fridibi
195 debug_printtraffic:Print out sent and received HTTP traffic
196 include_ads: Download ads as well
197 default_search: Prepend this string if an input url is not valid.
198 'auto' for elaborate guessing
199 encoding: Use this encoding instead of the system-specified.
200 extract_flat: Do not resolve URLs, return the immediate result.
201 Pass in 'in_playlist' to only show this behavior for
202 playlist items.
203 postprocessors: A list of dictionaries, each with an entry
204 * key: The name of the postprocessor. See
205 youtube_dl/postprocessor/__init__.py for a list.
206 as well as any further keyword arguments for the
207 postprocessor.
208 progress_hooks: A list of functions that get called on download
209 progress, with a dictionary with the entries
210 * status: One of "downloading", "error", or "finished".
211 Check this first and ignore unknown values.
212
213 If status is one of "downloading", or "finished", the
214 following properties may also be present:
215 * filename: The final filename (always present)
216 * tmpfilename: The filename we're currently writing to
217 * downloaded_bytes: Bytes on disk
218 * total_bytes: Size of the whole file, None if unknown
219 * total_bytes_estimate: Guess of the eventual file size,
220 None if unavailable.
221 * elapsed: The number of seconds since download started.
222 * eta: The estimated time in seconds, None if unknown
223 * speed: The download speed in bytes/second, None if
224 unknown
225 * fragment_index: The counter of the currently
226 downloaded video fragment.
227 * fragment_count: The number of fragments (= individual
228 files that will be merged)
229
230 Progress hooks are guaranteed to be called at least once
231 (with status "finished") if the download is successful.
232 merge_output_format: Extension to use when merging formats.
233 fixup: Automatically correct known faults of the file.
234 One of:
235 - "never": do nothing
236 - "warn": only emit a warning
237 - "detect_or_warn": check whether we can do anything
238 about it, warn otherwise (default)
239 source_address: (Experimental) Client-side IP address to bind to.
240 call_home: Boolean, true iff we are allowed to contact the
241 youtube-dl servers for debugging.
242 sleep_interval: Number of seconds to sleep before each download.
243 listformats: Print an overview of available video formats and exit.
244 list_thumbnails: Print a table of all thumbnails and exit.
245 match_filter: A function that gets called with the info_dict of
246 every video.
247 If it returns a message, the video is ignored.
248 If it returns None, the video is downloaded.
249 match_filter_func in utils.py is one example for this.
250 no_color: Do not emit color codes in output.
251
252 The following options determine which downloader is picked:
253 external_downloader: Executable of the external downloader to call.
254 None or unset for standard (built-in) downloader.
255 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
256
257 The following parameters are not used by YoutubeDL itself, they are used by
258 the downloader (see youtube_dl/downloader/common.py):
259 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
260 noresizebuffer, retries, continuedl, noprogress, consoletitle,
261 xattr_set_filesize, external_downloader_args.
262
263 The following options are used by the post processors:
264 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
265 otherwise prefer avconv.
266 postprocessor_args: A list of additional command-line arguments for the
267 postprocessor.
268 """
269
270 params = None
271 _ies = []
272 _pps = []
273 _download_retcode = None
274 _num_downloads = None
275 _screen_file = None
276
277 def __init__(self, params=None, auto_init=True):
278 """Create a FileDownloader object with the given options."""
279 if params is None:
280 params = {}
281 self._ies = []
282 self._ies_instances = {}
283 self._pps = []
284 self._progress_hooks = []
285 self._download_retcode = 0
286 self._num_downloads = 0
287 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
288 self._err_file = sys.stderr
289 self.params = {
290 # Default parameters
291 'nocheckcertificate': False,
292 }
293 self.params.update(params)
294 self.cache = Cache(self)
295
296 if params.get('bidi_workaround', False):
297 try:
298 import pty
299 master, slave = pty.openpty()
300 width = compat_get_terminal_size().columns
301 if width is None:
302 width_args = []
303 else:
304 width_args = ['-w', str(width)]
305 sp_kwargs = dict(
306 stdin=subprocess.PIPE,
307 stdout=slave,
308 stderr=self._err_file)
309 try:
310 self._output_process = subprocess.Popen(
311 ['bidiv'] + width_args, **sp_kwargs
312 )
313 except OSError:
314 self._output_process = subprocess.Popen(
315 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
316 self._output_channel = os.fdopen(master, 'rb')
317 except OSError as ose:
318 if ose.errno == 2:
319 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
320 else:
321 raise
322
323 if (sys.version_info >= (3,) and sys.platform != 'win32' and
324 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
325 not params.get('restrictfilenames', False)):
326 # On Python 3, the Unicode filesystem API will throw errors (#1474)
327 self.report_warning(
328 'Assuming --restrict-filenames since file system encoding '
329 'cannot encode all characters. '
330 'Set the LC_ALL environment variable to fix this.')
331 self.params['restrictfilenames'] = True
332
333 if isinstance(params.get('outtmpl'), bytes):
334 self.report_warning(
335 'Parameter outtmpl is bytes, but should be a unicode string. '
336 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
337
338 self._setup_opener()
339
340 if auto_init:
341 self.print_debug_header()
342 self.add_default_info_extractors()
343
344 for pp_def_raw in self.params.get('postprocessors', []):
345 pp_class = get_postprocessor(pp_def_raw['key'])
346 pp_def = dict(pp_def_raw)
347 del pp_def['key']
348 pp = pp_class(self, **compat_kwargs(pp_def))
349 self.add_post_processor(pp)
350
351 for ph in self.params.get('progress_hooks', []):
352 self.add_progress_hook(ph)
353
354 def warn_if_short_id(self, argv):
355 # short YouTube ID starting with dash?
356 idxs = [
357 i for i, a in enumerate(argv)
358 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359 if idxs:
360 correct_argv = (
361 ['youtube-dl'] +
362 [a for i, a in enumerate(argv) if i not in idxs] +
363 ['--'] + [argv[i] for i in idxs]
364 )
365 self.report_warning(
366 'Long argument string detected. '
367 'Use -- to separate parameters and URLs, like this:\n%s\n' %
368 args_to_str(correct_argv))
369
370 def add_info_extractor(self, ie):
371 """Add an InfoExtractor object to the end of the list."""
372 self._ies.append(ie)
373 self._ies_instances[ie.ie_key()] = ie
374 ie.set_downloader(self)
375
376 def get_info_extractor(self, ie_key):
377 """
378 Get an instance of an IE with name ie_key, it will try to get one from
379 the _ies list, if there's no instance it will create a new one and add
380 it to the extractor list.
381 """
382 ie = self._ies_instances.get(ie_key)
383 if ie is None:
384 ie = get_info_extractor(ie_key)()
385 self.add_info_extractor(ie)
386 return ie
387
388 def add_default_info_extractors(self):
389 """
390 Add the InfoExtractors returned by gen_extractors to the end of the list
391 """
392 for ie in gen_extractors():
393 self.add_info_extractor(ie)
394
395 def add_post_processor(self, pp):
396 """Add a PostProcessor object to the end of the chain."""
397 self._pps.append(pp)
398 pp.set_downloader(self)
399
400 def add_progress_hook(self, ph):
401 """Add the progress hook (currently only for the file downloader)"""
402 self._progress_hooks.append(ph)
403
404 def _bidi_workaround(self, message):
405 if not hasattr(self, '_output_channel'):
406 return message
407
408 assert hasattr(self, '_output_process')
409 assert isinstance(message, compat_str)
410 line_count = message.count('\n') + 1
411 self._output_process.stdin.write((message + '\n').encode('utf-8'))
412 self._output_process.stdin.flush()
413 res = ''.join(self._output_channel.readline().decode('utf-8')
414 for _ in range(line_count))
415 return res[:-len('\n')]
416
417 def to_screen(self, message, skip_eol=False):
418 """Print message to stdout if not in quiet mode."""
419 return self.to_stdout(message, skip_eol, check_quiet=True)
420
421 def _write_string(self, s, out=None):
422 write_string(s, out=out, encoding=self.params.get('encoding'))
423
424 def to_stdout(self, message, skip_eol=False, check_quiet=False):
425 """Print message to stdout if not in quiet mode."""
426 if self.params.get('logger'):
427 self.params['logger'].debug(message)
428 elif not check_quiet or not self.params.get('quiet', False):
429 message = self._bidi_workaround(message)
430 terminator = ['\n', ''][skip_eol]
431 output = message + terminator
432
433 self._write_string(output, self._screen_file)
434
435 def to_stderr(self, message):
436 """Print message to stderr."""
437 assert isinstance(message, compat_str)
438 if self.params.get('logger'):
439 self.params['logger'].error(message)
440 else:
441 message = self._bidi_workaround(message)
442 output = message + '\n'
443 self._write_string(output, self._err_file)
444
445 def to_console_title(self, message):
446 if not self.params.get('consoletitle', False):
447 return
448 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
449 # c_wchar_p() might not be necessary if `message` is
450 # already of type unicode()
451 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
452 elif 'TERM' in os.environ:
453 self._write_string('\033]0;%s\007' % message, self._screen_file)
454
455 def save_console_title(self):
456 if not self.params.get('consoletitle', False):
457 return
458 if 'TERM' in os.environ:
459 # Save the title on stack
460 self._write_string('\033[22;0t', self._screen_file)
461
462 def restore_console_title(self):
463 if not self.params.get('consoletitle', False):
464 return
465 if 'TERM' in os.environ:
466 # Restore the title from stack
467 self._write_string('\033[23;0t', self._screen_file)
468
469 def __enter__(self):
470 self.save_console_title()
471 return self
472
473 def __exit__(self, *args):
474 self.restore_console_title()
475
476 if self.params.get('cookiefile') is not None:
477 self.cookiejar.save()
478
479 def trouble(self, message=None, tb=None):
480 """Determine action to take when a download problem appears.
481
482 Depending on if the downloader has been configured to ignore
483 download errors or not, this method may throw an exception or
484 not when errors are found, after printing the message.
485
486 tb, if given, is additional traceback information.
487 """
488 if message is not None:
489 self.to_stderr(message)
490 if self.params.get('verbose'):
491 if tb is None:
492 if sys.exc_info()[0]: # if .trouble has been called from an except block
493 tb = ''
494 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
495 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
496 tb += compat_str(traceback.format_exc())
497 else:
498 tb_data = traceback.format_list(traceback.extract_stack())
499 tb = ''.join(tb_data)
500 self.to_stderr(tb)
501 if not self.params.get('ignoreerrors', False):
502 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
503 exc_info = sys.exc_info()[1].exc_info
504 else:
505 exc_info = sys.exc_info()
506 raise DownloadError(message, exc_info)
507 self._download_retcode = 1
508
509 def report_warning(self, message):
510 '''
511 Print the message to stderr, it will be prefixed with 'WARNING:'
512 If stderr is a tty file the 'WARNING:' will be colored
513 '''
514 if self.params.get('logger') is not None:
515 self.params['logger'].warning(message)
516 else:
517 if self.params.get('no_warnings'):
518 return
519 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
520 _msg_header = '\033[0;33mWARNING:\033[0m'
521 else:
522 _msg_header = 'WARNING:'
523 warning_message = '%s %s' % (_msg_header, message)
524 self.to_stderr(warning_message)
525
526 def report_error(self, message, tb=None):
527 '''
528 Do the same as trouble, but prefixes the message with 'ERROR:', colored
529 in red if stderr is a tty file.
530 '''
531 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
532 _msg_header = '\033[0;31mERROR:\033[0m'
533 else:
534 _msg_header = 'ERROR:'
535 error_message = '%s %s' % (_msg_header, message)
536 self.trouble(error_message, tb)
537
538 def report_file_already_downloaded(self, file_name):
539 """Report file has already been fully downloaded."""
540 try:
541 self.to_screen('[download] %s has already been downloaded' % file_name)
542 except UnicodeEncodeError:
543 self.to_screen('[download] The file has already been downloaded')
544
545 def prepare_filename(self, info_dict):
546 """Generate the output filename."""
547 try:
548 template_dict = dict(info_dict)
549
550 template_dict['epoch'] = int(time.time())
551 autonumber_size = self.params.get('autonumber_size')
552 if autonumber_size is None:
553 autonumber_size = 5
554 autonumber_templ = '%0' + str(autonumber_size) + 'd'
555 template_dict['autonumber'] = autonumber_templ % self._num_downloads
556 if template_dict.get('playlist_index') is not None:
557 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
558 if template_dict.get('resolution') is None:
559 if template_dict.get('width') and template_dict.get('height'):
560 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
561 elif template_dict.get('height'):
562 template_dict['resolution'] = '%sp' % template_dict['height']
563 elif template_dict.get('width'):
564 template_dict['resolution'] = '?x%d' % template_dict['width']
565
566 sanitize = lambda k, v: sanitize_filename(
567 compat_str(v),
568 restricted=self.params.get('restrictfilenames'),
569 is_id=(k == 'id'))
570 template_dict = dict((k, sanitize(k, v))
571 for k, v in template_dict.items()
572 if v is not None)
573 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
574
575 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
576 tmpl = compat_expanduser(outtmpl)
577 filename = tmpl % template_dict
578 # Temporary fix for #4787
579 # 'Treat' all problem characters by passing filename through preferredencoding
580 # to workaround encoding issues with subprocess on python2 @ Windows
581 if sys.version_info < (3, 0) and sys.platform == 'win32':
582 filename = encodeFilename(filename, True).decode(preferredencoding())
583 return sanitize_path(filename)
584 except ValueError as err:
585 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
586 return None
587
588 def _match_entry(self, info_dict, incomplete):
589 """ Returns None iff the file should be downloaded """
590
591 video_title = info_dict.get('title', info_dict.get('id', 'video'))
592 if 'title' in info_dict:
593 # This can happen when we're just evaluating the playlist
594 title = info_dict['title']
595 matchtitle = self.params.get('matchtitle', False)
596 if matchtitle:
597 if not re.search(matchtitle, title, re.IGNORECASE):
598 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
599 rejecttitle = self.params.get('rejecttitle', False)
600 if rejecttitle:
601 if re.search(rejecttitle, title, re.IGNORECASE):
602 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
603 date = info_dict.get('upload_date', None)
604 if date is not None:
605 dateRange = self.params.get('daterange', DateRange())
606 if date not in dateRange:
607 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
608 view_count = info_dict.get('view_count', None)
609 if view_count is not None:
610 min_views = self.params.get('min_views')
611 if min_views is not None and view_count < min_views:
612 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
613 max_views = self.params.get('max_views')
614 if max_views is not None and view_count > max_views:
615 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
616 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
617 return 'Skipping "%s" because it is age restricted' % video_title
618 if self.in_download_archive(info_dict):
619 return '%s has already been recorded in archive' % video_title
620
621 if not incomplete:
622 match_filter = self.params.get('match_filter')
623 if match_filter is not None:
624 ret = match_filter(info_dict)
625 if ret is not None:
626 return ret
627
628 return None
629
630 @staticmethod
631 def add_extra_info(info_dict, extra_info):
632 '''Set the keys from extra_info in info dict if they are missing'''
633 for key, value in extra_info.items():
634 info_dict.setdefault(key, value)
635
636 def extract_info(self, url, download=True, ie_key=None, extra_info={},
637 process=True, force_generic_extractor=False):
638 '''
639 Returns a list with a dictionary for each video we find.
640 If 'download', also downloads the videos.
641 extra_info is a dict containing the extra values to add to each result
642 '''
643
644 if not ie_key and force_generic_extractor:
645 ie_key = 'Generic'
646
647 if ie_key:
648 ies = [self.get_info_extractor(ie_key)]
649 else:
650 ies = self._ies
651
652 for ie in ies:
653 if not ie.suitable(url):
654 continue
655
656 if not ie.working():
657 self.report_warning('The program functionality for this site has been marked as broken, '
658 'and will probably not work.')
659
660 try:
661 ie_result = ie.extract(url)
662 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
663 break
664 if isinstance(ie_result, list):
665 # Backwards compatibility: old IE result format
666 ie_result = {
667 '_type': 'compat_list',
668 'entries': ie_result,
669 }
670 self.add_default_extra_info(ie_result, ie, url)
671 if process:
672 return self.process_ie_result(ie_result, download, extra_info)
673 else:
674 return ie_result
675 except ExtractorError as de: # An error we somewhat expected
676 self.report_error(compat_str(de), de.format_traceback())
677 break
678 except MaxDownloadsReached:
679 raise
680 except Exception as e:
681 if self.params.get('ignoreerrors', False):
682 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
683 break
684 else:
685 raise
686 else:
687 self.report_error('no suitable InfoExtractor for URL %s' % url)
688
689 def add_default_extra_info(self, ie_result, ie, url):
690 self.add_extra_info(ie_result, {
691 'extractor': ie.IE_NAME,
692 'webpage_url': url,
693 'webpage_url_basename': url_basename(url),
694 'extractor_key': ie.ie_key(),
695 })
696
697 def process_ie_result(self, ie_result, download=True, extra_info={}):
698 """
699 Take the result of the ie(may be modified) and resolve all unresolved
700 references (URLs, playlist items).
701
702 It will also download the videos if 'download'.
703 Returns the resolved ie_result.
704 """
705
706 result_type = ie_result.get('_type', 'video')
707
708 if result_type in ('url', 'url_transparent'):
709 extract_flat = self.params.get('extract_flat', False)
710 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
711 extract_flat is True):
712 if self.params.get('forcejson', False):
713 self.to_stdout(json.dumps(ie_result))
714 return ie_result
715
716 if result_type == 'video':
717 self.add_extra_info(ie_result, extra_info)
718 return self.process_video_result(ie_result, download=download)
719 elif result_type == 'url':
720 # We have to add extra_info to the results because it may be
721 # contained in a playlist
722 return self.extract_info(ie_result['url'],
723 download,
724 ie_key=ie_result.get('ie_key'),
725 extra_info=extra_info)
726 elif result_type == 'url_transparent':
727 # Use the information from the embedding page
728 info = self.extract_info(
729 ie_result['url'], ie_key=ie_result.get('ie_key'),
730 extra_info=extra_info, download=False, process=False)
731
732 force_properties = dict(
733 (k, v) for k, v in ie_result.items() if v is not None)
734 for f in ('_type', 'url'):
735 if f in force_properties:
736 del force_properties[f]
737 new_result = info.copy()
738 new_result.update(force_properties)
739
740 assert new_result.get('_type') != 'url_transparent'
741
742 return self.process_ie_result(
743 new_result, download=download, extra_info=extra_info)
744 elif result_type == 'playlist' or result_type == 'multi_video':
745 # We process each entry in the playlist
746 playlist = ie_result.get('title', None) or ie_result.get('id', None)
747 self.to_screen('[download] Downloading playlist: %s' % playlist)
748
749 playlist_results = []
750
751 playliststart = self.params.get('playliststart', 1) - 1
752 playlistend = self.params.get('playlistend', None)
753 # For backwards compatibility, interpret -1 as whole list
754 if playlistend == -1:
755 playlistend = None
756
757 playlistitems_str = self.params.get('playlist_items', None)
758 playlistitems = None
759 if playlistitems_str is not None:
760 def iter_playlistitems(format):
761 for string_segment in format.split(','):
762 if '-' in string_segment:
763 start, end = string_segment.split('-')
764 for item in range(int(start), int(end) + 1):
765 yield int(item)
766 else:
767 yield int(string_segment)
768 playlistitems = iter_playlistitems(playlistitems_str)
769
770 ie_entries = ie_result['entries']
771 if isinstance(ie_entries, list):
772 n_all_entries = len(ie_entries)
773 if playlistitems:
774 entries = [
775 ie_entries[i - 1] for i in playlistitems
776 if -n_all_entries <= i - 1 < n_all_entries]
777 else:
778 entries = ie_entries[playliststart:playlistend]
779 n_entries = len(entries)
780 self.to_screen(
781 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
782 (ie_result['extractor'], playlist, n_all_entries, n_entries))
783 elif isinstance(ie_entries, PagedList):
784 if playlistitems:
785 entries = []
786 for item in playlistitems:
787 entries.extend(ie_entries.getslice(
788 item - 1, item
789 ))
790 else:
791 entries = ie_entries.getslice(
792 playliststart, playlistend)
793 n_entries = len(entries)
794 self.to_screen(
795 "[%s] playlist %s: Downloading %d videos" %
796 (ie_result['extractor'], playlist, n_entries))
797 else: # iterable
798 if playlistitems:
799 entry_list = list(ie_entries)
800 entries = [entry_list[i - 1] for i in playlistitems]
801 else:
802 entries = list(itertools.islice(
803 ie_entries, playliststart, playlistend))
804 n_entries = len(entries)
805 self.to_screen(
806 "[%s] playlist %s: Downloading %d videos" %
807 (ie_result['extractor'], playlist, n_entries))
808
809 if self.params.get('playlistreverse', False):
810 entries = entries[::-1]
811
812 for i, entry in enumerate(entries, 1):
813 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
814 extra = {
815 'n_entries': n_entries,
816 'playlist': playlist,
817 'playlist_id': ie_result.get('id'),
818 'playlist_title': ie_result.get('title'),
819 'playlist_index': i + playliststart,
820 'extractor': ie_result['extractor'],
821 'webpage_url': ie_result['webpage_url'],
822 'webpage_url_basename': url_basename(ie_result['webpage_url']),
823 'extractor_key': ie_result['extractor_key'],
824 }
825
826 reason = self._match_entry(entry, incomplete=True)
827 if reason is not None:
828 self.to_screen('[download] ' + reason)
829 continue
830
831 entry_result = self.process_ie_result(entry,
832 download=download,
833 extra_info=extra)
834 playlist_results.append(entry_result)
835 ie_result['entries'] = playlist_results
836 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
837 return ie_result
838 elif result_type == 'compat_list':
839 self.report_warning(
840 'Extractor %s returned a compat_list result. '
841 'It needs to be updated.' % ie_result.get('extractor'))
842
843 def _fixup(r):
844 self.add_extra_info(
845 r,
846 {
847 'extractor': ie_result['extractor'],
848 'webpage_url': ie_result['webpage_url'],
849 'webpage_url_basename': url_basename(ie_result['webpage_url']),
850 'extractor_key': ie_result['extractor_key'],
851 }
852 )
853 return r
854 ie_result['entries'] = [
855 self.process_ie_result(_fixup(r), download, extra_info)
856 for r in ie_result['entries']
857 ]
858 return ie_result
859 else:
860 raise Exception('Invalid result type: %s' % result_type)
861
862 def _build_format_filter(self, filter_spec):
863 " Returns a function to filter the formats according to the filter_spec "
864
865 OPERATORS = {
866 '<': operator.lt,
867 '<=': operator.le,
868 '>': operator.gt,
869 '>=': operator.ge,
870 '=': operator.eq,
871 '!=': operator.ne,
872 }
873 operator_rex = re.compile(r'''(?x)\s*
874 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
875 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
876 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
877 $
878 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
879 m = operator_rex.search(filter_spec)
880 if m:
881 try:
882 comparison_value = int(m.group('value'))
883 except ValueError:
884 comparison_value = parse_filesize(m.group('value'))
885 if comparison_value is None:
886 comparison_value = parse_filesize(m.group('value') + 'B')
887 if comparison_value is None:
888 raise ValueError(
889 'Invalid value %r in format specification %r' % (
890 m.group('value'), filter_spec))
891 op = OPERATORS[m.group('op')]
892
893 if not m:
894 STR_OPERATORS = {
895 '=': operator.eq,
896 '!=': operator.ne,
897 }
898 str_operator_rex = re.compile(r'''(?x)
899 \s*(?P<key>ext|acodec|vcodec|container|protocol)
900 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
901 \s*(?P<value>[a-zA-Z0-9_-]+)
902 \s*$
903 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
904 m = str_operator_rex.search(filter_spec)
905 if m:
906 comparison_value = m.group('value')
907 op = STR_OPERATORS[m.group('op')]
908
909 if not m:
910 raise ValueError('Invalid filter specification %r' % filter_spec)
911
912 def _filter(f):
913 actual_value = f.get(m.group('key'))
914 if actual_value is None:
915 return m.group('none_inclusive')
916 return op(actual_value, comparison_value)
917 return _filter
918
919 def build_format_selector(self, format_spec):
920 def syntax_error(note, start):
921 message = (
922 'Invalid format specification: '
923 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
924 return SyntaxError(message)
925
926 PICKFIRST = 'PICKFIRST'
927 MERGE = 'MERGE'
928 SINGLE = 'SINGLE'
929 GROUP = 'GROUP'
930 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
931
932 def _parse_filter(tokens):
933 filter_parts = []
934 for type, string, start, _, _ in tokens:
935 if type == tokenize.OP and string == ']':
936 return ''.join(filter_parts)
937 else:
938 filter_parts.append(string)
939
940 def _remove_unused_ops(tokens):
941 # Remove operators that we don't use and join them with the surrounding strings
942 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
943 ALLOWED_OPS = ('/', '+', ',', '(', ')')
944 last_string, last_start, last_end, last_line = None, None, None, None
945 for type, string, start, end, line in tokens:
946 if type == tokenize.OP and string == '[':
947 if last_string:
948 yield tokenize.NAME, last_string, last_start, last_end, last_line
949 last_string = None
950 yield type, string, start, end, line
951 # everything inside brackets will be handled by _parse_filter
952 for type, string, start, end, line in tokens:
953 yield type, string, start, end, line
954 if type == tokenize.OP and string == ']':
955 break
956 elif type == tokenize.OP and string in ALLOWED_OPS:
957 if last_string:
958 yield tokenize.NAME, last_string, last_start, last_end, last_line
959 last_string = None
960 yield type, string, start, end, line
961 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
962 if not last_string:
963 last_string = string
964 last_start = start
965 last_end = end
966 else:
967 last_string += string
968 if last_string:
969 yield tokenize.NAME, last_string, last_start, last_end, last_line
970
971 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
972 selectors = []
973 current_selector = None
974 for type, string, start, _, _ in tokens:
975 # ENCODING is only defined in python 3.x
976 if type == getattr(tokenize, 'ENCODING', None):
977 continue
978 elif type in [tokenize.NAME, tokenize.NUMBER]:
979 current_selector = FormatSelector(SINGLE, string, [])
980 elif type == tokenize.OP:
981 if string == ')':
982 if not inside_group:
983 # ')' will be handled by the parentheses group
984 tokens.restore_last_token()
985 break
986 elif inside_merge and string in ['/', ',']:
987 tokens.restore_last_token()
988 break
989 elif inside_choice and string == ',':
990 tokens.restore_last_token()
991 break
992 elif string == ',':
993 if not current_selector:
994 raise syntax_error('"," must follow a format selector', start)
995 selectors.append(current_selector)
996 current_selector = None
997 elif string == '/':
998 if not current_selector:
999 raise syntax_error('"/" must follow a format selector', start)
1000 first_choice = current_selector
1001 second_choice = _parse_format_selection(tokens, inside_choice=True)
1002 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1003 elif string == '[':
1004 if not current_selector:
1005 current_selector = FormatSelector(SINGLE, 'best', [])
1006 format_filter = _parse_filter(tokens)
1007 current_selector.filters.append(format_filter)
1008 elif string == '(':
1009 if current_selector:
1010 raise syntax_error('Unexpected "("', start)
1011 group = _parse_format_selection(tokens, inside_group=True)
1012 current_selector = FormatSelector(GROUP, group, [])
1013 elif string == '+':
1014 video_selector = current_selector
1015 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1016 if not video_selector or not audio_selector:
1017 raise syntax_error('"+" must be between two format selectors', start)
1018 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1019 else:
1020 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1021 elif type == tokenize.ENDMARKER:
1022 break
1023 if current_selector:
1024 selectors.append(current_selector)
1025 return selectors
1026
1027 def _build_selector_function(selector):
1028 if isinstance(selector, list):
1029 fs = [_build_selector_function(s) for s in selector]
1030
1031 def selector_function(formats):
1032 for f in fs:
1033 for format in f(formats):
1034 yield format
1035 return selector_function
1036 elif selector.type == GROUP:
1037 selector_function = _build_selector_function(selector.selector)
1038 elif selector.type == PICKFIRST:
1039 fs = [_build_selector_function(s) for s in selector.selector]
1040
1041 def selector_function(formats):
1042 for f in fs:
1043 picked_formats = list(f(formats))
1044 if picked_formats:
1045 return picked_formats
1046 return []
1047 elif selector.type == SINGLE:
1048 format_spec = selector.selector
1049
1050 def selector_function(formats):
1051 formats = list(formats)
1052 if not formats:
1053 return
1054 if format_spec == 'all':
1055 for f in formats:
1056 yield f
1057 elif format_spec in ['best', 'worst', None]:
1058 format_idx = 0 if format_spec == 'worst' else -1
1059 audiovideo_formats = [
1060 f for f in formats
1061 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1062 if audiovideo_formats:
1063 yield audiovideo_formats[format_idx]
1064 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1065 elif (all(f.get('acodec') != 'none' for f in formats) or
1066 all(f.get('vcodec') != 'none' for f in formats)):
1067 yield formats[format_idx]
1068 elif format_spec == 'bestaudio':
1069 audio_formats = [
1070 f for f in formats
1071 if f.get('vcodec') == 'none']
1072 if audio_formats:
1073 yield audio_formats[-1]
1074 elif format_spec == 'worstaudio':
1075 audio_formats = [
1076 f for f in formats
1077 if f.get('vcodec') == 'none']
1078 if audio_formats:
1079 yield audio_formats[0]
1080 elif format_spec == 'bestvideo':
1081 video_formats = [
1082 f for f in formats
1083 if f.get('acodec') == 'none']
1084 if video_formats:
1085 yield video_formats[-1]
1086 elif format_spec == 'worstvideo':
1087 video_formats = [
1088 f for f in formats
1089 if f.get('acodec') == 'none']
1090 if video_formats:
1091 yield video_formats[0]
1092 else:
1093 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1094 if format_spec in extensions:
1095 filter_f = lambda f: f['ext'] == format_spec
1096 else:
1097 filter_f = lambda f: f['format_id'] == format_spec
1098 matches = list(filter(filter_f, formats))
1099 if matches:
1100 yield matches[-1]
1101 elif selector.type == MERGE:
1102 def _merge(formats_info):
1103 format_1, format_2 = [f['format_id'] for f in formats_info]
1104 # The first format must contain the video and the
1105 # second the audio
1106 if formats_info[0].get('vcodec') == 'none':
1107 self.report_error('The first format must '
1108 'contain the video, try using '
1109 '"-f %s+%s"' % (format_2, format_1))
1110 return
1111 output_ext = (
1112 formats_info[0]['ext']
1113 if self.params.get('merge_output_format') is None
1114 else self.params['merge_output_format'])
1115 return {
1116 'requested_formats': formats_info,
1117 'format': '%s+%s' % (formats_info[0].get('format'),
1118 formats_info[1].get('format')),
1119 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1120 formats_info[1].get('format_id')),
1121 'width': formats_info[0].get('width'),
1122 'height': formats_info[0].get('height'),
1123 'resolution': formats_info[0].get('resolution'),
1124 'fps': formats_info[0].get('fps'),
1125 'vcodec': formats_info[0].get('vcodec'),
1126 'vbr': formats_info[0].get('vbr'),
1127 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1128 'acodec': formats_info[1].get('acodec'),
1129 'abr': formats_info[1].get('abr'),
1130 'ext': output_ext,
1131 }
1132 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1133
1134 def selector_function(formats):
1135 formats = list(formats)
1136 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1137 yield _merge(pair)
1138
1139 filters = [self._build_format_filter(f) for f in selector.filters]
1140
1141 def final_selector(formats):
1142 for _filter in filters:
1143 formats = list(filter(_filter, formats))
1144 return selector_function(formats)
1145 return final_selector
1146
1147 stream = io.BytesIO(format_spec.encode('utf-8'))
1148 try:
1149 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1150 except tokenize.TokenError:
1151 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1152
1153 class TokenIterator(object):
1154 def __init__(self, tokens):
1155 self.tokens = tokens
1156 self.counter = 0
1157
1158 def __iter__(self):
1159 return self
1160
1161 def __next__(self):
1162 if self.counter >= len(self.tokens):
1163 raise StopIteration()
1164 value = self.tokens[self.counter]
1165 self.counter += 1
1166 return value
1167
1168 next = __next__
1169
1170 def restore_last_token(self):
1171 self.counter -= 1
1172
1173 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1174 return _build_selector_function(parsed_selector)
1175
1176 def _calc_headers(self, info_dict):
1177 res = std_headers.copy()
1178
1179 add_headers = info_dict.get('http_headers')
1180 if add_headers:
1181 res.update(add_headers)
1182
1183 cookies = self._calc_cookies(info_dict)
1184 if cookies:
1185 res['Cookie'] = cookies
1186
1187 return res
1188
1189 def _calc_cookies(self, info_dict):
1190 pr = compat_urllib_request.Request(info_dict['url'])
1191 self.cookiejar.add_cookie_header(pr)
1192 return pr.get_header('Cookie')
1193
1194 def process_video_result(self, info_dict, download=True):
1195 assert info_dict.get('_type', 'video') == 'video'
1196
1197 if 'id' not in info_dict:
1198 raise ExtractorError('Missing "id" field in extractor result')
1199 if 'title' not in info_dict:
1200 raise ExtractorError('Missing "title" field in extractor result')
1201
1202 if 'playlist' not in info_dict:
1203 # It isn't part of a playlist
1204 info_dict['playlist'] = None
1205 info_dict['playlist_index'] = None
1206
1207 thumbnails = info_dict.get('thumbnails')
1208 if thumbnails is None:
1209 thumbnail = info_dict.get('thumbnail')
1210 if thumbnail:
1211 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1212 if thumbnails:
1213 thumbnails.sort(key=lambda t: (
1214 t.get('preference'), t.get('width'), t.get('height'),
1215 t.get('id'), t.get('url')))
1216 for i, t in enumerate(thumbnails):
1217 if t.get('width') and t.get('height'):
1218 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1219 if t.get('id') is None:
1220 t['id'] = '%d' % i
1221
1222 if thumbnails and 'thumbnail' not in info_dict:
1223 info_dict['thumbnail'] = thumbnails[-1]['url']
1224
1225 if 'display_id' not in info_dict and 'id' in info_dict:
1226 info_dict['display_id'] = info_dict['id']
1227
1228 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1229 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1230 # see http://bugs.python.org/issue1646728)
1231 try:
1232 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1233 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1234 except (ValueError, OverflowError, OSError):
1235 pass
1236
1237 subtitles = info_dict.get('subtitles')
1238 if subtitles:
1239 for _, subtitle in subtitles.items():
1240 for subtitle_format in subtitle:
1241 if 'ext' not in subtitle_format:
1242 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1243
1244 if self.params.get('listsubtitles', False):
1245 if 'automatic_captions' in info_dict:
1246 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1247 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1248 return
1249 info_dict['requested_subtitles'] = self.process_subtitles(
1250 info_dict['id'], subtitles,
1251 info_dict.get('automatic_captions'))
1252
1253 # We now pick which formats have to be downloaded
1254 if info_dict.get('formats') is None:
1255 # There's only one format available
1256 formats = [info_dict]
1257 else:
1258 formats = info_dict['formats']
1259
1260 if not formats:
1261 raise ExtractorError('No video formats found!')
1262
1263 formats_dict = {}
1264
1265 # We check that all the formats have the format and format_id fields
1266 for i, format in enumerate(formats):
1267 if 'url' not in format:
1268 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1269
1270 if format.get('format_id') is None:
1271 format['format_id'] = compat_str(i)
1272 format_id = format['format_id']
1273 if format_id not in formats_dict:
1274 formats_dict[format_id] = []
1275 formats_dict[format_id].append(format)
1276
1277 # Make sure all formats have unique format_id
1278 for format_id, ambiguous_formats in formats_dict.items():
1279 if len(ambiguous_formats) > 1:
1280 for i, format in enumerate(ambiguous_formats):
1281 format['format_id'] = '%s-%d' % (format_id, i)
1282
1283 for i, format in enumerate(formats):
1284 if format.get('format') is None:
1285 format['format'] = '{id} - {res}{note}'.format(
1286 id=format['format_id'],
1287 res=self.format_resolution(format),
1288 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1289 )
1290 # Automatically determine file extension if missing
1291 if 'ext' not in format:
1292 format['ext'] = determine_ext(format['url']).lower()
1293 # Add HTTP headers, so that external programs can use them from the
1294 # json output
1295 full_format_info = info_dict.copy()
1296 full_format_info.update(format)
1297 format['http_headers'] = self._calc_headers(full_format_info)
1298
1299 # TODO Central sorting goes here
1300
1301 if formats[0] is not info_dict:
1302 # only set the 'formats' fields if the original info_dict list them
1303 # otherwise we end up with a circular reference, the first (and unique)
1304 # element in the 'formats' field in info_dict is info_dict itself,
1305 # wich can't be exported to json
1306 info_dict['formats'] = formats
1307 if self.params.get('listformats'):
1308 self.list_formats(info_dict)
1309 return
1310 if self.params.get('list_thumbnails'):
1311 self.list_thumbnails(info_dict)
1312 return
1313
1314 req_format = self.params.get('format')
1315 if req_format is None:
1316 req_format_list = []
1317 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1318 info_dict['extractor'] in ['youtube', 'ted'] and
1319 not info_dict.get('is_live')):
1320 merger = FFmpegMergerPP(self)
1321 if merger.available and merger.can_merge():
1322 req_format_list.append('bestvideo+bestaudio')
1323 req_format_list.append('best')
1324 req_format = '/'.join(req_format_list)
1325 format_selector = self.build_format_selector(req_format)
1326 formats_to_download = list(format_selector(formats))
1327 if not formats_to_download:
1328 raise ExtractorError('requested format not available',
1329 expected=True)
1330
1331 if download:
1332 if len(formats_to_download) > 1:
1333 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1334 for format in formats_to_download:
1335 new_info = dict(info_dict)
1336 new_info.update(format)
1337 self.process_info(new_info)
1338 # We update the info dict with the best quality format (backwards compatibility)
1339 info_dict.update(formats_to_download[-1])
1340 return info_dict
1341
1342 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1343 """Select the requested subtitles and their format"""
1344 available_subs = {}
1345 if normal_subtitles and self.params.get('writesubtitles'):
1346 available_subs.update(normal_subtitles)
1347 if automatic_captions and self.params.get('writeautomaticsub'):
1348 for lang, cap_info in automatic_captions.items():
1349 if lang not in available_subs:
1350 available_subs[lang] = cap_info
1351
1352 if (not self.params.get('writesubtitles') and not
1353 self.params.get('writeautomaticsub') or not
1354 available_subs):
1355 return None
1356
1357 if self.params.get('allsubtitles', False):
1358 requested_langs = available_subs.keys()
1359 else:
1360 if self.params.get('subtitleslangs', False):
1361 requested_langs = self.params.get('subtitleslangs')
1362 elif 'en' in available_subs:
1363 requested_langs = ['en']
1364 else:
1365 requested_langs = [list(available_subs.keys())[0]]
1366
1367 formats_query = self.params.get('subtitlesformat', 'best')
1368 formats_preference = formats_query.split('/') if formats_query else []
1369 subs = {}
1370 for lang in requested_langs:
1371 formats = available_subs.get(lang)
1372 if formats is None:
1373 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1374 continue
1375 for ext in formats_preference:
1376 if ext == 'best':
1377 f = formats[-1]
1378 break
1379 matches = list(filter(lambda f: f['ext'] == ext, formats))
1380 if matches:
1381 f = matches[-1]
1382 break
1383 else:
1384 f = formats[-1]
1385 self.report_warning(
1386 'No subtitle format found matching "%s" for language %s, '
1387 'using %s' % (formats_query, lang, f['ext']))
1388 subs[lang] = f
1389 return subs
1390
1391 def process_info(self, info_dict):
1392 """Process a single resolved IE result."""
1393
1394 assert info_dict.get('_type', 'video') == 'video'
1395
1396 max_downloads = self.params.get('max_downloads')
1397 if max_downloads is not None:
1398 if self._num_downloads >= int(max_downloads):
1399 raise MaxDownloadsReached()
1400
1401 info_dict['fulltitle'] = info_dict['title']
1402 if len(info_dict['title']) > 200:
1403 info_dict['title'] = info_dict['title'][:197] + '...'
1404
1405 if 'format' not in info_dict:
1406 info_dict['format'] = info_dict['ext']
1407
1408 reason = self._match_entry(info_dict, incomplete=False)
1409 if reason is not None:
1410 self.to_screen('[download] ' + reason)
1411 return
1412
1413 self._num_downloads += 1
1414
1415 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1416
1417 # Forced printings
1418 if self.params.get('forcetitle', False):
1419 self.to_stdout(info_dict['fulltitle'])
1420 if self.params.get('forceid', False):
1421 self.to_stdout(info_dict['id'])
1422 if self.params.get('forceurl', False):
1423 if info_dict.get('requested_formats') is not None:
1424 for f in info_dict['requested_formats']:
1425 self.to_stdout(f['url'] + f.get('play_path', ''))
1426 else:
1427 # For RTMP URLs, also include the playpath
1428 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1429 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1430 self.to_stdout(info_dict['thumbnail'])
1431 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1432 self.to_stdout(info_dict['description'])
1433 if self.params.get('forcefilename', False) and filename is not None:
1434 self.to_stdout(filename)
1435 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1436 self.to_stdout(formatSeconds(info_dict['duration']))
1437 if self.params.get('forceformat', False):
1438 self.to_stdout(info_dict['format'])
1439 if self.params.get('forcejson', False):
1440 self.to_stdout(json.dumps(info_dict))
1441
1442 # Do nothing else if in simulate mode
1443 if self.params.get('simulate', False):
1444 return
1445
1446 if filename is None:
1447 return
1448
1449 try:
1450 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1451 if dn and not os.path.exists(dn):
1452 os.makedirs(dn)
1453 except (OSError, IOError) as err:
1454 self.report_error('unable to create directory ' + compat_str(err))
1455 return
1456
1457 if self.params.get('writedescription', False):
1458 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1459 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1460 self.to_screen('[info] Video description is already present')
1461 elif info_dict.get('description') is None:
1462 self.report_warning('There\'s no description to write.')
1463 else:
1464 try:
1465 self.to_screen('[info] Writing video description to: ' + descfn)
1466 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1467 descfile.write(info_dict['description'])
1468 except (OSError, IOError):
1469 self.report_error('Cannot write description file ' + descfn)
1470 return
1471
1472 if self.params.get('writeannotations', False):
1473 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1474 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1475 self.to_screen('[info] Video annotations are already present')
1476 else:
1477 try:
1478 self.to_screen('[info] Writing video annotations to: ' + annofn)
1479 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1480 annofile.write(info_dict['annotations'])
1481 except (KeyError, TypeError):
1482 self.report_warning('There are no annotations to write.')
1483 except (OSError, IOError):
1484 self.report_error('Cannot write annotations file: ' + annofn)
1485 return
1486
1487 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1488 self.params.get('writeautomaticsub')])
1489
1490 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1491 # subtitles download errors are already managed as troubles in relevant IE
1492 # that way it will silently go on when used with unsupporting IE
1493 subtitles = info_dict['requested_subtitles']
1494 ie = self.get_info_extractor(info_dict['extractor_key'])
1495 for sub_lang, sub_info in subtitles.items():
1496 sub_format = sub_info['ext']
1497 if sub_info.get('data') is not None:
1498 sub_data = sub_info['data']
1499 else:
1500 try:
1501 sub_data = ie._download_webpage(
1502 sub_info['url'], info_dict['id'], note=False)
1503 except ExtractorError as err:
1504 self.report_warning('Unable to download subtitle for "%s": %s' %
1505 (sub_lang, compat_str(err.cause)))
1506 continue
1507 try:
1508 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1509 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1510 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1511 else:
1512 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1513 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1514 subfile.write(sub_data)
1515 except (OSError, IOError):
1516 self.report_error('Cannot write subtitles file ' + sub_filename)
1517 return
1518
1519 if self.params.get('writeinfojson', False):
1520 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1521 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1522 self.to_screen('[info] Video description metadata is already present')
1523 else:
1524 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1525 try:
1526 write_json_file(self.filter_requested_info(info_dict), infofn)
1527 except (OSError, IOError):
1528 self.report_error('Cannot write metadata to JSON file ' + infofn)
1529 return
1530
1531 self._write_thumbnails(info_dict, filename)
1532
1533 if not self.params.get('skip_download', False):
1534 try:
1535 def dl(name, info):
1536 fd = get_suitable_downloader(info, self.params)(self, self.params)
1537 for ph in self._progress_hooks:
1538 fd.add_progress_hook(ph)
1539 if self.params.get('verbose'):
1540 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1541 return fd.download(name, info)
1542
1543 if info_dict.get('requested_formats') is not None:
1544 downloaded = []
1545 success = True
1546 merger = FFmpegMergerPP(self)
1547 if not merger.available:
1548 postprocessors = []
1549 self.report_warning('You have requested multiple '
1550 'formats but ffmpeg or avconv are not installed.'
1551 ' The formats won\'t be merged.')
1552 else:
1553 postprocessors = [merger]
1554
1555 def compatible_formats(formats):
1556 video, audio = formats
1557 # Check extension
1558 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1559 if video_ext and audio_ext:
1560 COMPATIBLE_EXTS = (
1561 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1562 ('webm')
1563 )
1564 for exts in COMPATIBLE_EXTS:
1565 if video_ext in exts and audio_ext in exts:
1566 return True
1567 # TODO: Check acodec/vcodec
1568 return False
1569
1570 filename_real_ext = os.path.splitext(filename)[1][1:]
1571 filename_wo_ext = (
1572 os.path.splitext(filename)[0]
1573 if filename_real_ext == info_dict['ext']
1574 else filename)
1575 requested_formats = info_dict['requested_formats']
1576 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1577 info_dict['ext'] = 'mkv'
1578 self.report_warning(
1579 'Requested formats are incompatible for merge and will be merged into mkv.')
1580 # Ensure filename always has a correct extension for successful merge
1581 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1582 if os.path.exists(encodeFilename(filename)):
1583 self.to_screen(
1584 '[download] %s has already been downloaded and '
1585 'merged' % filename)
1586 else:
1587 for f in requested_formats:
1588 new_info = dict(info_dict)
1589 new_info.update(f)
1590 fname = self.prepare_filename(new_info)
1591 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1592 downloaded.append(fname)
1593 partial_success = dl(fname, new_info)
1594 success = success and partial_success
1595 info_dict['__postprocessors'] = postprocessors
1596 info_dict['__files_to_merge'] = downloaded
1597 else:
1598 # Just a single file
1599 success = dl(filename, info_dict)
1600 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1601 self.report_error('unable to download video data: %s' % str(err))
1602 return
1603 except (OSError, IOError) as err:
1604 raise UnavailableVideoError(err)
1605 except (ContentTooShortError, ) as err:
1606 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1607 return
1608
1609 if success:
1610 # Fixup content
1611 fixup_policy = self.params.get('fixup')
1612 if fixup_policy is None:
1613 fixup_policy = 'detect_or_warn'
1614
1615 stretched_ratio = info_dict.get('stretched_ratio')
1616 if stretched_ratio is not None and stretched_ratio != 1:
1617 if fixup_policy == 'warn':
1618 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1619 info_dict['id'], stretched_ratio))
1620 elif fixup_policy == 'detect_or_warn':
1621 stretched_pp = FFmpegFixupStretchedPP(self)
1622 if stretched_pp.available:
1623 info_dict.setdefault('__postprocessors', [])
1624 info_dict['__postprocessors'].append(stretched_pp)
1625 else:
1626 self.report_warning(
1627 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1628 info_dict['id'], stretched_ratio))
1629 else:
1630 assert fixup_policy in ('ignore', 'never')
1631
1632 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1633 if fixup_policy == 'warn':
1634 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1635 info_dict['id']))
1636 elif fixup_policy == 'detect_or_warn':
1637 fixup_pp = FFmpegFixupM4aPP(self)
1638 if fixup_pp.available:
1639 info_dict.setdefault('__postprocessors', [])
1640 info_dict['__postprocessors'].append(fixup_pp)
1641 else:
1642 self.report_warning(
1643 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1644 info_dict['id']))
1645 else:
1646 assert fixup_policy in ('ignore', 'never')
1647
1648 try:
1649 self.post_process(filename, info_dict)
1650 except (PostProcessingError) as err:
1651 self.report_error('postprocessing: %s' % str(err))
1652 return
1653 self.record_download_archive(info_dict)
1654
1655 def download(self, url_list):
1656 """Download a given list of URLs."""
1657 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1658 if (len(url_list) > 1 and
1659 '%' not in outtmpl and
1660 self.params.get('max_downloads') != 1):
1661 raise SameFileError(outtmpl)
1662
1663 for url in url_list:
1664 try:
1665 # It also downloads the videos
1666 res = self.extract_info(
1667 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1668 except UnavailableVideoError:
1669 self.report_error('unable to download video')
1670 except MaxDownloadsReached:
1671 self.to_screen('[info] Maximum number of downloaded files reached.')
1672 raise
1673 else:
1674 if self.params.get('dump_single_json', False):
1675 self.to_stdout(json.dumps(res))
1676
1677 return self._download_retcode
1678
1679 def download_with_info_file(self, info_filename):
1680 with contextlib.closing(fileinput.FileInput(
1681 [info_filename], mode='r',
1682 openhook=fileinput.hook_encoded('utf-8'))) as f:
1683 # FileInput doesn't have a read method, we can't call json.load
1684 info = self.filter_requested_info(json.loads('\n'.join(f)))
1685 try:
1686 self.process_ie_result(info, download=True)
1687 except DownloadError:
1688 webpage_url = info.get('webpage_url')
1689 if webpage_url is not None:
1690 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1691 return self.download([webpage_url])
1692 else:
1693 raise
1694 return self._download_retcode
1695
1696 @staticmethod
1697 def filter_requested_info(info_dict):
1698 return dict(
1699 (k, v) for k, v in info_dict.items()
1700 if k not in ['requested_formats', 'requested_subtitles'])
1701
1702 def post_process(self, filename, ie_info):
1703 """Run all the postprocessors on the given file."""
1704 info = dict(ie_info)
1705 info['filepath'] = filename
1706 pps_chain = []
1707 if ie_info.get('__postprocessors') is not None:
1708 pps_chain.extend(ie_info['__postprocessors'])
1709 pps_chain.extend(self._pps)
1710 for pp in pps_chain:
1711 files_to_delete = []
1712 try:
1713 files_to_delete, info = pp.run(info)
1714 except PostProcessingError as e:
1715 self.report_error(e.msg)
1716 if files_to_delete and not self.params.get('keepvideo', False):
1717 for old_filename in files_to_delete:
1718 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1719 try:
1720 os.remove(encodeFilename(old_filename))
1721 except (IOError, OSError):
1722 self.report_warning('Unable to remove downloaded original file')
1723
1724 def _make_archive_id(self, info_dict):
1725 # Future-proof against any change in case
1726 # and backwards compatibility with prior versions
1727 extractor = info_dict.get('extractor_key')
1728 if extractor is None:
1729 if 'id' in info_dict:
1730 extractor = info_dict.get('ie_key') # key in a playlist
1731 if extractor is None:
1732 return None # Incomplete video information
1733 return extractor.lower() + ' ' + info_dict['id']
1734
1735 def in_download_archive(self, info_dict):
1736 fn = self.params.get('download_archive')
1737 if fn is None:
1738 return False
1739
1740 vid_id = self._make_archive_id(info_dict)
1741 if vid_id is None:
1742 return False # Incomplete video information
1743
1744 try:
1745 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1746 for line in archive_file:
1747 if line.strip() == vid_id:
1748 return True
1749 except IOError as ioe:
1750 if ioe.errno != errno.ENOENT:
1751 raise
1752 return False
1753
1754 def record_download_archive(self, info_dict):
1755 fn = self.params.get('download_archive')
1756 if fn is None:
1757 return
1758 vid_id = self._make_archive_id(info_dict)
1759 assert vid_id
1760 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1761 archive_file.write(vid_id + '\n')
1762
1763 @staticmethod
1764 def format_resolution(format, default='unknown'):
1765 if format.get('vcodec') == 'none':
1766 return 'audio only'
1767 if format.get('resolution') is not None:
1768 return format['resolution']
1769 if format.get('height') is not None:
1770 if format.get('width') is not None:
1771 res = '%sx%s' % (format['width'], format['height'])
1772 else:
1773 res = '%sp' % format['height']
1774 elif format.get('width') is not None:
1775 res = '?x%d' % format['width']
1776 else:
1777 res = default
1778 return res
1779
1780 def _format_note(self, fdict):
1781 res = ''
1782 if fdict.get('ext') in ['f4f', 'f4m']:
1783 res += '(unsupported) '
1784 if fdict.get('format_note') is not None:
1785 res += fdict['format_note'] + ' '
1786 if fdict.get('tbr') is not None:
1787 res += '%4dk ' % fdict['tbr']
1788 if fdict.get('container') is not None:
1789 if res:
1790 res += ', '
1791 res += '%s container' % fdict['container']
1792 if (fdict.get('vcodec') is not None and
1793 fdict.get('vcodec') != 'none'):
1794 if res:
1795 res += ', '
1796 res += fdict['vcodec']
1797 if fdict.get('vbr') is not None:
1798 res += '@'
1799 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1800 res += 'video@'
1801 if fdict.get('vbr') is not None:
1802 res += '%4dk' % fdict['vbr']
1803 if fdict.get('fps') is not None:
1804 res += ', %sfps' % fdict['fps']
1805 if fdict.get('acodec') is not None:
1806 if res:
1807 res += ', '
1808 if fdict['acodec'] == 'none':
1809 res += 'video only'
1810 else:
1811 res += '%-5s' % fdict['acodec']
1812 elif fdict.get('abr') is not None:
1813 if res:
1814 res += ', '
1815 res += 'audio'
1816 if fdict.get('abr') is not None:
1817 res += '@%3dk' % fdict['abr']
1818 if fdict.get('asr') is not None:
1819 res += ' (%5dHz)' % fdict['asr']
1820 if fdict.get('filesize') is not None:
1821 if res:
1822 res += ', '
1823 res += format_bytes(fdict['filesize'])
1824 elif fdict.get('filesize_approx') is not None:
1825 if res:
1826 res += ', '
1827 res += '~' + format_bytes(fdict['filesize_approx'])
1828 return res
1829
1830 def list_formats(self, info_dict):
1831 formats = info_dict.get('formats', [info_dict])
1832 table = [
1833 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1834 for f in formats
1835 if f.get('preference') is None or f['preference'] >= -1000]
1836 if len(formats) > 1:
1837 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1838
1839 header_line = ['format code', 'extension', 'resolution', 'note']
1840 self.to_screen(
1841 '[info] Available formats for %s:\n%s' %
1842 (info_dict['id'], render_table(header_line, table)))
1843
1844 def list_thumbnails(self, info_dict):
1845 thumbnails = info_dict.get('thumbnails')
1846 if not thumbnails:
1847 tn_url = info_dict.get('thumbnail')
1848 if tn_url:
1849 thumbnails = [{'id': '0', 'url': tn_url}]
1850 else:
1851 self.to_screen(
1852 '[info] No thumbnails present for %s' % info_dict['id'])
1853 return
1854
1855 self.to_screen(
1856 '[info] Thumbnails for %s:' % info_dict['id'])
1857 self.to_screen(render_table(
1858 ['ID', 'width', 'height', 'URL'],
1859 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1860
1861 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1862 if not subtitles:
1863 self.to_screen('%s has no %s' % (video_id, name))
1864 return
1865 self.to_screen(
1866 'Available %s for %s:' % (name, video_id))
1867 self.to_screen(render_table(
1868 ['Language', 'formats'],
1869 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1870 for lang, formats in subtitles.items()]))
1871
1872 def urlopen(self, req):
1873 """ Start an HTTP download """
1874 return self._opener.open(req, timeout=self._socket_timeout)
1875
1876 def print_debug_header(self):
1877 if not self.params.get('verbose'):
1878 return
1879
1880 if type('') is not compat_str:
1881 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1882 self.report_warning(
1883 'Your Python is broken! Update to a newer and supported version')
1884
1885 stdout_encoding = getattr(
1886 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1887 encoding_str = (
1888 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1889 locale.getpreferredencoding(),
1890 sys.getfilesystemencoding(),
1891 stdout_encoding,
1892 self.get_encoding()))
1893 write_string(encoding_str, encoding=None)
1894
1895 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1896 try:
1897 sp = subprocess.Popen(
1898 ['git', 'rev-parse', '--short', 'HEAD'],
1899 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1900 cwd=os.path.dirname(os.path.abspath(__file__)))
1901 out, err = sp.communicate()
1902 out = out.decode().strip()
1903 if re.match('[0-9a-f]+', out):
1904 self._write_string('[debug] Git HEAD: ' + out + '\n')
1905 except Exception:
1906 try:
1907 sys.exc_clear()
1908 except Exception:
1909 pass
1910 self._write_string('[debug] Python version %s - %s\n' % (
1911 platform.python_version(), platform_name()))
1912
1913 exe_versions = FFmpegPostProcessor.get_versions(self)
1914 exe_versions['rtmpdump'] = rtmpdump_version()
1915 exe_str = ', '.join(
1916 '%s %s' % (exe, v)
1917 for exe, v in sorted(exe_versions.items())
1918 if v
1919 )
1920 if not exe_str:
1921 exe_str = 'none'
1922 self._write_string('[debug] exe versions: %s\n' % exe_str)
1923
1924 proxy_map = {}
1925 for handler in self._opener.handlers:
1926 if hasattr(handler, 'proxies'):
1927 proxy_map.update(handler.proxies)
1928 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1929
1930 if self.params.get('call_home', False):
1931 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1932 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1933 latest_version = self.urlopen(
1934 'https://yt-dl.org/latest/version').read().decode('utf-8')
1935 if version_tuple(latest_version) > version_tuple(__version__):
1936 self.report_warning(
1937 'You are using an outdated version (newest version: %s)! '
1938 'See https://yt-dl.org/update if you need help updating.' %
1939 latest_version)
1940
1941 def _setup_opener(self):
1942 timeout_val = self.params.get('socket_timeout')
1943 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1944
1945 opts_cookiefile = self.params.get('cookiefile')
1946 opts_proxy = self.params.get('proxy')
1947
1948 if opts_cookiefile is None:
1949 self.cookiejar = compat_cookiejar.CookieJar()
1950 else:
1951 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1952 opts_cookiefile)
1953 if os.access(opts_cookiefile, os.R_OK):
1954 self.cookiejar.load()
1955
1956 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1957 if opts_proxy is not None:
1958 if opts_proxy == '':
1959 proxies = {}
1960 else:
1961 proxies = {'http': opts_proxy, 'https': opts_proxy}
1962 else:
1963 proxies = compat_urllib_request.getproxies()
1964 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1965 if 'http' in proxies and 'https' not in proxies:
1966 proxies['https'] = proxies['http']
1967 proxy_handler = PerRequestProxyHandler(proxies)
1968
1969 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1970 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1971 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1972 data_handler = compat_urllib_request_DataHandler()
1973 opener = compat_urllib_request.build_opener(
1974 proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1975
1976 # Delete the default user-agent header, which would otherwise apply in
1977 # cases where our custom HTTP handler doesn't come into play
1978 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1979 opener.addheaders = []
1980 self._opener = opener
1981
1982 def encode(self, s):
1983 if isinstance(s, bytes):
1984 return s # Already encoded
1985
1986 try:
1987 return s.encode(self.get_encoding())
1988 except UnicodeEncodeError as err:
1989 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1990 raise
1991
1992 def get_encoding(self):
1993 encoding = self.params.get('encoding')
1994 if encoding is None:
1995 encoding = preferredencoding()
1996 return encoding
1997
1998 def _write_thumbnails(self, info_dict, filename):
1999 if self.params.get('writethumbnail', False):
2000 thumbnails = info_dict.get('thumbnails')
2001 if thumbnails:
2002 thumbnails = [thumbnails[-1]]
2003 elif self.params.get('write_all_thumbnails', False):
2004 thumbnails = info_dict.get('thumbnails')
2005 else:
2006 return
2007
2008 if not thumbnails:
2009 # No thumbnails present, so return immediately
2010 return
2011
2012 for t in thumbnails:
2013 thumb_ext = determine_ext(t['url'], 'jpg')
2014 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2015 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2016 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2017
2018 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2019 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2020 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2021 else:
2022 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2023 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2024 try:
2025 uf = self.urlopen(t['url'])
2026 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2027 shutil.copyfileobj(uf, thumbf)
2028 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2029 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2030 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2031 self.report_warning('Unable to download thumbnail "%s": %s' %
2032 (t['url'], compat_str(err)))