]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[YoutubeDL] format spec: allow grouping specifiers with parentheses
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28 import ctypes
29
30 from .compat import (
31 compat_basestring,
32 compat_cookiejar,
33 compat_expanduser,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_str,
38 compat_tokenize_tokenize,
39 compat_urllib_error,
40 compat_urllib_request,
41 )
42 from .utils import (
43 escape_url,
44 ContentTooShortError,
45 date_from_str,
46 DateRange,
47 DEFAULT_OUTTMPL,
48 determine_ext,
49 DownloadError,
50 encodeFilename,
51 ExtractorError,
52 format_bytes,
53 formatSeconds,
54 HEADRequest,
55 locked_file,
56 make_HTTPS_handler,
57 MaxDownloadsReached,
58 PagedList,
59 parse_filesize,
60 PerRequestProxyHandler,
61 PostProcessingError,
62 platform_name,
63 preferredencoding,
64 render_table,
65 SameFileError,
66 sanitize_filename,
67 sanitize_path,
68 std_headers,
69 subtitles_filename,
70 UnavailableVideoError,
71 url_basename,
72 version_tuple,
73 write_json_file,
74 write_string,
75 YoutubeDLHandler,
76 prepend_extension,
77 replace_extension,
78 args_to_str,
79 age_restricted,
80 )
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
86 FFmpegFixupM4aPP,
87 FFmpegFixupStretchedPP,
88 FFmpegMergerPP,
89 FFmpegPostProcessor,
90 get_postprocessor,
91 )
92 from .version import __version__
93
94
95 class YoutubeDL(object):
96 """YoutubeDL class.
97
98 YoutubeDL objects are the ones responsible of downloading the
99 actual video file and writing it to disk if the user has requested
100 it, among some other tasks. In most cases there should be one per
101 program. As, given a video URL, the downloader doesn't know how to
102 extract all the needed information, task that InfoExtractors do, it
103 has to pass the URL to one of them.
104
105 For this, YoutubeDL objects have a method that allows
106 InfoExtractors to be registered in a given order. When it is passed
107 a URL, the YoutubeDL object handles it to the first InfoExtractor it
108 finds that reports being able to handle it. The InfoExtractor extracts
109 all the information about the video or videos the URL refers to, and
110 YoutubeDL process the extracted information, possibly using a File
111 Downloader to download the video.
112
113 YoutubeDL objects accept a lot of parameters. In order not to saturate
114 the object constructor with arguments, it receives a dictionary of
115 options instead. These options are available through the params
116 attribute for the InfoExtractors to use. The YoutubeDL also
117 registers itself as the downloader in charge for the InfoExtractors
118 that are added to it, so this is a "mutual registration".
119
120 Available options:
121
122 username: Username for authentication purposes.
123 password: Password for authentication purposes.
124 videopassword: Password for accessing a video.
125 usenetrc: Use netrc for authentication instead.
126 verbose: Print additional info to stdout.
127 quiet: Do not print messages to stdout.
128 no_warnings: Do not print out anything for warnings.
129 forceurl: Force printing final URL.
130 forcetitle: Force printing title.
131 forceid: Force printing ID.
132 forcethumbnail: Force printing thumbnail URL.
133 forcedescription: Force printing description.
134 forcefilename: Force printing final filename.
135 forceduration: Force printing duration.
136 forcejson: Force printing info_dict as JSON.
137 dump_single_json: Force printing the info_dict of the whole playlist
138 (or video) as a single JSON line.
139 simulate: Do not download the video files.
140 format: Video format code. See options.py for more information.
141 outtmpl: Template for output names.
142 restrictfilenames: Do not allow "&" and spaces in file names
143 ignoreerrors: Do not stop on download errors.
144 force_generic_extractor: Force downloader to use the generic extractor
145 nooverwrites: Prevent overwriting files.
146 playliststart: Playlist item to start at.
147 playlistend: Playlist item to end at.
148 playlist_items: Specific indices of playlist to download.
149 playlistreverse: Download playlist items in reverse order.
150 matchtitle: Download only matching titles.
151 rejecttitle: Reject downloads for matching titles.
152 logger: Log messages to a logging.Logger instance.
153 logtostderr: Log messages to stderr instead of stdout.
154 writedescription: Write the video description to a .description file
155 writeinfojson: Write the video description to a .info.json file
156 writeannotations: Write the video annotations to a .annotations.xml file
157 writethumbnail: Write the thumbnail image to a file
158 write_all_thumbnails: Write all thumbnail formats to files
159 writesubtitles: Write the video subtitles to a file
160 writeautomaticsub: Write the automatic subtitles to a file
161 allsubtitles: Downloads all the subtitles of the video
162 (requires writesubtitles or writeautomaticsub)
163 listsubtitles: Lists all available subtitles for the video
164 subtitlesformat: The format code for subtitles
165 subtitleslangs: List of languages of the subtitles to download
166 keepvideo: Keep the video file after post-processing
167 daterange: A DateRange object, download only if the upload_date is in the range.
168 skip_download: Skip the actual download of the video file
169 cachedir: Location of the cache files in the filesystem.
170 False to disable filesystem cache.
171 noplaylist: Download single video instead of a playlist if in doubt.
172 age_limit: An integer representing the user's age in years.
173 Unsuitable videos for the given age are skipped.
174 min_views: An integer representing the minimum view count the video
175 must have in order to not be skipped.
176 Videos without view count information are always
177 downloaded. None for no limit.
178 max_views: An integer representing the maximum view count.
179 Videos that are more popular than that are not
180 downloaded.
181 Videos without view count information are always
182 downloaded. None for no limit.
183 download_archive: File name of a file where all downloads are recorded.
184 Videos already present in the file are not downloaded
185 again.
186 cookiefile: File name where cookies should be read from and dumped to.
187 nocheckcertificate:Do not verify SSL certificates
188 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
189 At the moment, this is only supported by YouTube.
190 proxy: URL of the proxy server to use
191 cn_verification_proxy: URL of the proxy to use for IP address verification
192 on Chinese sites. (Experimental)
193 socket_timeout: Time to wait for unresponsive hosts, in seconds
194 bidi_workaround: Work around buggy terminals without bidirectional text
195 support, using fridibi
196 debug_printtraffic:Print out sent and received HTTP traffic
197 include_ads: Download ads as well
198 default_search: Prepend this string if an input url is not valid.
199 'auto' for elaborate guessing
200 encoding: Use this encoding instead of the system-specified.
201 extract_flat: Do not resolve URLs, return the immediate result.
202 Pass in 'in_playlist' to only show this behavior for
203 playlist items.
204 postprocessors: A list of dictionaries, each with an entry
205 * key: The name of the postprocessor. See
206 youtube_dl/postprocessor/__init__.py for a list.
207 as well as any further keyword arguments for the
208 postprocessor.
209 progress_hooks: A list of functions that get called on download
210 progress, with a dictionary with the entries
211 * status: One of "downloading", "error", or "finished".
212 Check this first and ignore unknown values.
213
214 If status is one of "downloading", or "finished", the
215 following properties may also be present:
216 * filename: The final filename (always present)
217 * tmpfilename: The filename we're currently writing to
218 * downloaded_bytes: Bytes on disk
219 * total_bytes: Size of the whole file, None if unknown
220 * total_bytes_estimate: Guess of the eventual file size,
221 None if unavailable.
222 * elapsed: The number of seconds since download started.
223 * eta: The estimated time in seconds, None if unknown
224 * speed: The download speed in bytes/second, None if
225 unknown
226 * fragment_index: The counter of the currently
227 downloaded video fragment.
228 * fragment_count: The number of fragments (= individual
229 files that will be merged)
230
231 Progress hooks are guaranteed to be called at least once
232 (with status "finished") if the download is successful.
233 merge_output_format: Extension to use when merging formats.
234 fixup: Automatically correct known faults of the file.
235 One of:
236 - "never": do nothing
237 - "warn": only emit a warning
238 - "detect_or_warn": check whether we can do anything
239 about it, warn otherwise (default)
240 source_address: (Experimental) Client-side IP address to bind to.
241 call_home: Boolean, true iff we are allowed to contact the
242 youtube-dl servers for debugging.
243 sleep_interval: Number of seconds to sleep before each download.
244 listformats: Print an overview of available video formats and exit.
245 list_thumbnails: Print a table of all thumbnails and exit.
246 match_filter: A function that gets called with the info_dict of
247 every video.
248 If it returns a message, the video is ignored.
249 If it returns None, the video is downloaded.
250 match_filter_func in utils.py is one example for this.
251 no_color: Do not emit color codes in output.
252
253 The following options determine which downloader is picked:
254 external_downloader: Executable of the external downloader to call.
255 None or unset for standard (built-in) downloader.
256 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
257
258 The following parameters are not used by YoutubeDL itself, they are used by
259 the downloader (see youtube_dl/downloader/common.py):
260 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261 noresizebuffer, retries, continuedl, noprogress, consoletitle,
262 xattr_set_filesize, external_downloader_args.
263
264 The following options are used by the post processors:
265 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
266 otherwise prefer avconv.
267 """
268
269 params = None
270 _ies = []
271 _pps = []
272 _download_retcode = None
273 _num_downloads = None
274 _screen_file = None
275
276 def __init__(self, params=None, auto_init=True):
277 """Create a FileDownloader object with the given options."""
278 if params is None:
279 params = {}
280 self._ies = []
281 self._ies_instances = {}
282 self._pps = []
283 self._progress_hooks = []
284 self._download_retcode = 0
285 self._num_downloads = 0
286 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
287 self._err_file = sys.stderr
288 self.params = params
289 self.cache = Cache(self)
290
291 if params.get('bidi_workaround', False):
292 try:
293 import pty
294 master, slave = pty.openpty()
295 width = compat_get_terminal_size().columns
296 if width is None:
297 width_args = []
298 else:
299 width_args = ['-w', str(width)]
300 sp_kwargs = dict(
301 stdin=subprocess.PIPE,
302 stdout=slave,
303 stderr=self._err_file)
304 try:
305 self._output_process = subprocess.Popen(
306 ['bidiv'] + width_args, **sp_kwargs
307 )
308 except OSError:
309 self._output_process = subprocess.Popen(
310 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
311 self._output_channel = os.fdopen(master, 'rb')
312 except OSError as ose:
313 if ose.errno == 2:
314 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
315 else:
316 raise
317
318 if (sys.version_info >= (3,) and sys.platform != 'win32' and
319 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
320 not params.get('restrictfilenames', False)):
321 # On Python 3, the Unicode filesystem API will throw errors (#1474)
322 self.report_warning(
323 'Assuming --restrict-filenames since file system encoding '
324 'cannot encode all characters. '
325 'Set the LC_ALL environment variable to fix this.')
326 self.params['restrictfilenames'] = True
327
328 if isinstance(params.get('outtmpl'), bytes):
329 self.report_warning(
330 'Parameter outtmpl is bytes, but should be a unicode string. '
331 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
332
333 self._setup_opener()
334
335 if auto_init:
336 self.print_debug_header()
337 self.add_default_info_extractors()
338
339 for pp_def_raw in self.params.get('postprocessors', []):
340 pp_class = get_postprocessor(pp_def_raw['key'])
341 pp_def = dict(pp_def_raw)
342 del pp_def['key']
343 pp = pp_class(self, **compat_kwargs(pp_def))
344 self.add_post_processor(pp)
345
346 for ph in self.params.get('progress_hooks', []):
347 self.add_progress_hook(ph)
348
349 def warn_if_short_id(self, argv):
350 # short YouTube ID starting with dash?
351 idxs = [
352 i for i, a in enumerate(argv)
353 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
354 if idxs:
355 correct_argv = (
356 ['youtube-dl'] +
357 [a for i, a in enumerate(argv) if i not in idxs] +
358 ['--'] + [argv[i] for i in idxs]
359 )
360 self.report_warning(
361 'Long argument string detected. '
362 'Use -- to separate parameters and URLs, like this:\n%s\n' %
363 args_to_str(correct_argv))
364
365 def add_info_extractor(self, ie):
366 """Add an InfoExtractor object to the end of the list."""
367 self._ies.append(ie)
368 self._ies_instances[ie.ie_key()] = ie
369 ie.set_downloader(self)
370
371 def get_info_extractor(self, ie_key):
372 """
373 Get an instance of an IE with name ie_key, it will try to get one from
374 the _ies list, if there's no instance it will create a new one and add
375 it to the extractor list.
376 """
377 ie = self._ies_instances.get(ie_key)
378 if ie is None:
379 ie = get_info_extractor(ie_key)()
380 self.add_info_extractor(ie)
381 return ie
382
383 def add_default_info_extractors(self):
384 """
385 Add the InfoExtractors returned by gen_extractors to the end of the list
386 """
387 for ie in gen_extractors():
388 self.add_info_extractor(ie)
389
390 def add_post_processor(self, pp):
391 """Add a PostProcessor object to the end of the chain."""
392 self._pps.append(pp)
393 pp.set_downloader(self)
394
395 def add_progress_hook(self, ph):
396 """Add the progress hook (currently only for the file downloader)"""
397 self._progress_hooks.append(ph)
398
399 def _bidi_workaround(self, message):
400 if not hasattr(self, '_output_channel'):
401 return message
402
403 assert hasattr(self, '_output_process')
404 assert isinstance(message, compat_str)
405 line_count = message.count('\n') + 1
406 self._output_process.stdin.write((message + '\n').encode('utf-8'))
407 self._output_process.stdin.flush()
408 res = ''.join(self._output_channel.readline().decode('utf-8')
409 for _ in range(line_count))
410 return res[:-len('\n')]
411
412 def to_screen(self, message, skip_eol=False):
413 """Print message to stdout if not in quiet mode."""
414 return self.to_stdout(message, skip_eol, check_quiet=True)
415
416 def _write_string(self, s, out=None):
417 write_string(s, out=out, encoding=self.params.get('encoding'))
418
419 def to_stdout(self, message, skip_eol=False, check_quiet=False):
420 """Print message to stdout if not in quiet mode."""
421 if self.params.get('logger'):
422 self.params['logger'].debug(message)
423 elif not check_quiet or not self.params.get('quiet', False):
424 message = self._bidi_workaround(message)
425 terminator = ['\n', ''][skip_eol]
426 output = message + terminator
427
428 self._write_string(output, self._screen_file)
429
430 def to_stderr(self, message):
431 """Print message to stderr."""
432 assert isinstance(message, compat_str)
433 if self.params.get('logger'):
434 self.params['logger'].error(message)
435 else:
436 message = self._bidi_workaround(message)
437 output = message + '\n'
438 self._write_string(output, self._err_file)
439
440 def to_console_title(self, message):
441 if not self.params.get('consoletitle', False):
442 return
443 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
444 # c_wchar_p() might not be necessary if `message` is
445 # already of type unicode()
446 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
447 elif 'TERM' in os.environ:
448 self._write_string('\033]0;%s\007' % message, self._screen_file)
449
450 def save_console_title(self):
451 if not self.params.get('consoletitle', False):
452 return
453 if 'TERM' in os.environ:
454 # Save the title on stack
455 self._write_string('\033[22;0t', self._screen_file)
456
457 def restore_console_title(self):
458 if not self.params.get('consoletitle', False):
459 return
460 if 'TERM' in os.environ:
461 # Restore the title from stack
462 self._write_string('\033[23;0t', self._screen_file)
463
464 def __enter__(self):
465 self.save_console_title()
466 return self
467
468 def __exit__(self, *args):
469 self.restore_console_title()
470
471 if self.params.get('cookiefile') is not None:
472 self.cookiejar.save()
473
474 def trouble(self, message=None, tb=None):
475 """Determine action to take when a download problem appears.
476
477 Depending on if the downloader has been configured to ignore
478 download errors or not, this method may throw an exception or
479 not when errors are found, after printing the message.
480
481 tb, if given, is additional traceback information.
482 """
483 if message is not None:
484 self.to_stderr(message)
485 if self.params.get('verbose'):
486 if tb is None:
487 if sys.exc_info()[0]: # if .trouble has been called from an except block
488 tb = ''
489 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
490 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
491 tb += compat_str(traceback.format_exc())
492 else:
493 tb_data = traceback.format_list(traceback.extract_stack())
494 tb = ''.join(tb_data)
495 self.to_stderr(tb)
496 if not self.params.get('ignoreerrors', False):
497 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498 exc_info = sys.exc_info()[1].exc_info
499 else:
500 exc_info = sys.exc_info()
501 raise DownloadError(message, exc_info)
502 self._download_retcode = 1
503
504 def report_warning(self, message):
505 '''
506 Print the message to stderr, it will be prefixed with 'WARNING:'
507 If stderr is a tty file the 'WARNING:' will be colored
508 '''
509 if self.params.get('logger') is not None:
510 self.params['logger'].warning(message)
511 else:
512 if self.params.get('no_warnings'):
513 return
514 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
515 _msg_header = '\033[0;33mWARNING:\033[0m'
516 else:
517 _msg_header = 'WARNING:'
518 warning_message = '%s %s' % (_msg_header, message)
519 self.to_stderr(warning_message)
520
521 def report_error(self, message, tb=None):
522 '''
523 Do the same as trouble, but prefixes the message with 'ERROR:', colored
524 in red if stderr is a tty file.
525 '''
526 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
527 _msg_header = '\033[0;31mERROR:\033[0m'
528 else:
529 _msg_header = 'ERROR:'
530 error_message = '%s %s' % (_msg_header, message)
531 self.trouble(error_message, tb)
532
533 def report_file_already_downloaded(self, file_name):
534 """Report file has already been fully downloaded."""
535 try:
536 self.to_screen('[download] %s has already been downloaded' % file_name)
537 except UnicodeEncodeError:
538 self.to_screen('[download] The file has already been downloaded')
539
540 def prepare_filename(self, info_dict):
541 """Generate the output filename."""
542 try:
543 template_dict = dict(info_dict)
544
545 template_dict['epoch'] = int(time.time())
546 autonumber_size = self.params.get('autonumber_size')
547 if autonumber_size is None:
548 autonumber_size = 5
549 autonumber_templ = '%0' + str(autonumber_size) + 'd'
550 template_dict['autonumber'] = autonumber_templ % self._num_downloads
551 if template_dict.get('playlist_index') is not None:
552 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
553 if template_dict.get('resolution') is None:
554 if template_dict.get('width') and template_dict.get('height'):
555 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
556 elif template_dict.get('height'):
557 template_dict['resolution'] = '%sp' % template_dict['height']
558 elif template_dict.get('width'):
559 template_dict['resolution'] = '?x%d' % template_dict['width']
560
561 sanitize = lambda k, v: sanitize_filename(
562 compat_str(v),
563 restricted=self.params.get('restrictfilenames'),
564 is_id=(k == 'id'))
565 template_dict = dict((k, sanitize(k, v))
566 for k, v in template_dict.items()
567 if v is not None)
568 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
569
570 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
571 tmpl = compat_expanduser(outtmpl)
572 filename = tmpl % template_dict
573 # Temporary fix for #4787
574 # 'Treat' all problem characters by passing filename through preferredencoding
575 # to workaround encoding issues with subprocess on python2 @ Windows
576 if sys.version_info < (3, 0) and sys.platform == 'win32':
577 filename = encodeFilename(filename, True).decode(preferredencoding())
578 return filename
579 except ValueError as err:
580 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
581 return None
582
583 def _match_entry(self, info_dict, incomplete):
584 """ Returns None iff the file should be downloaded """
585
586 video_title = info_dict.get('title', info_dict.get('id', 'video'))
587 if 'title' in info_dict:
588 # This can happen when we're just evaluating the playlist
589 title = info_dict['title']
590 matchtitle = self.params.get('matchtitle', False)
591 if matchtitle:
592 if not re.search(matchtitle, title, re.IGNORECASE):
593 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
594 rejecttitle = self.params.get('rejecttitle', False)
595 if rejecttitle:
596 if re.search(rejecttitle, title, re.IGNORECASE):
597 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
598 date = info_dict.get('upload_date', None)
599 if date is not None:
600 dateRange = self.params.get('daterange', DateRange())
601 if date not in dateRange:
602 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
603 view_count = info_dict.get('view_count', None)
604 if view_count is not None:
605 min_views = self.params.get('min_views')
606 if min_views is not None and view_count < min_views:
607 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
608 max_views = self.params.get('max_views')
609 if max_views is not None and view_count > max_views:
610 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
611 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
612 return 'Skipping "%s" because it is age restricted' % video_title
613 if self.in_download_archive(info_dict):
614 return '%s has already been recorded in archive' % video_title
615
616 if not incomplete:
617 match_filter = self.params.get('match_filter')
618 if match_filter is not None:
619 ret = match_filter(info_dict)
620 if ret is not None:
621 return ret
622
623 return None
624
625 @staticmethod
626 def add_extra_info(info_dict, extra_info):
627 '''Set the keys from extra_info in info dict if they are missing'''
628 for key, value in extra_info.items():
629 info_dict.setdefault(key, value)
630
631 def extract_info(self, url, download=True, ie_key=None, extra_info={},
632 process=True, force_generic_extractor=False):
633 '''
634 Returns a list with a dictionary for each video we find.
635 If 'download', also downloads the videos.
636 extra_info is a dict containing the extra values to add to each result
637 '''
638
639 if not ie_key and force_generic_extractor:
640 ie_key = 'Generic'
641
642 if ie_key:
643 ies = [self.get_info_extractor(ie_key)]
644 else:
645 ies = self._ies
646
647 for ie in ies:
648 if not ie.suitable(url):
649 continue
650
651 if not ie.working():
652 self.report_warning('The program functionality for this site has been marked as broken, '
653 'and will probably not work.')
654
655 try:
656 ie_result = ie.extract(url)
657 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
658 break
659 if isinstance(ie_result, list):
660 # Backwards compatibility: old IE result format
661 ie_result = {
662 '_type': 'compat_list',
663 'entries': ie_result,
664 }
665 self.add_default_extra_info(ie_result, ie, url)
666 if process:
667 return self.process_ie_result(ie_result, download, extra_info)
668 else:
669 return ie_result
670 except ExtractorError as de: # An error we somewhat expected
671 self.report_error(compat_str(de), de.format_traceback())
672 break
673 except MaxDownloadsReached:
674 raise
675 except Exception as e:
676 if self.params.get('ignoreerrors', False):
677 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
678 break
679 else:
680 raise
681 else:
682 self.report_error('no suitable InfoExtractor for URL %s' % url)
683
684 def add_default_extra_info(self, ie_result, ie, url):
685 self.add_extra_info(ie_result, {
686 'extractor': ie.IE_NAME,
687 'webpage_url': url,
688 'webpage_url_basename': url_basename(url),
689 'extractor_key': ie.ie_key(),
690 })
691
692 def process_ie_result(self, ie_result, download=True, extra_info={}):
693 """
694 Take the result of the ie(may be modified) and resolve all unresolved
695 references (URLs, playlist items).
696
697 It will also download the videos if 'download'.
698 Returns the resolved ie_result.
699 """
700
701 result_type = ie_result.get('_type', 'video')
702
703 if result_type in ('url', 'url_transparent'):
704 extract_flat = self.params.get('extract_flat', False)
705 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
706 extract_flat is True):
707 if self.params.get('forcejson', False):
708 self.to_stdout(json.dumps(ie_result))
709 return ie_result
710
711 if result_type == 'video':
712 self.add_extra_info(ie_result, extra_info)
713 return self.process_video_result(ie_result, download=download)
714 elif result_type == 'url':
715 # We have to add extra_info to the results because it may be
716 # contained in a playlist
717 return self.extract_info(ie_result['url'],
718 download,
719 ie_key=ie_result.get('ie_key'),
720 extra_info=extra_info)
721 elif result_type == 'url_transparent':
722 # Use the information from the embedding page
723 info = self.extract_info(
724 ie_result['url'], ie_key=ie_result.get('ie_key'),
725 extra_info=extra_info, download=False, process=False)
726
727 force_properties = dict(
728 (k, v) for k, v in ie_result.items() if v is not None)
729 for f in ('_type', 'url'):
730 if f in force_properties:
731 del force_properties[f]
732 new_result = info.copy()
733 new_result.update(force_properties)
734
735 assert new_result.get('_type') != 'url_transparent'
736
737 return self.process_ie_result(
738 new_result, download=download, extra_info=extra_info)
739 elif result_type == 'playlist' or result_type == 'multi_video':
740 # We process each entry in the playlist
741 playlist = ie_result.get('title', None) or ie_result.get('id', None)
742 self.to_screen('[download] Downloading playlist: %s' % playlist)
743
744 playlist_results = []
745
746 playliststart = self.params.get('playliststart', 1) - 1
747 playlistend = self.params.get('playlistend', None)
748 # For backwards compatibility, interpret -1 as whole list
749 if playlistend == -1:
750 playlistend = None
751
752 playlistitems_str = self.params.get('playlist_items', None)
753 playlistitems = None
754 if playlistitems_str is not None:
755 def iter_playlistitems(format):
756 for string_segment in format.split(','):
757 if '-' in string_segment:
758 start, end = string_segment.split('-')
759 for item in range(int(start), int(end) + 1):
760 yield int(item)
761 else:
762 yield int(string_segment)
763 playlistitems = iter_playlistitems(playlistitems_str)
764
765 ie_entries = ie_result['entries']
766 if isinstance(ie_entries, list):
767 n_all_entries = len(ie_entries)
768 if playlistitems:
769 entries = [
770 ie_entries[i - 1] for i in playlistitems
771 if -n_all_entries <= i - 1 < n_all_entries]
772 else:
773 entries = ie_entries[playliststart:playlistend]
774 n_entries = len(entries)
775 self.to_screen(
776 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
777 (ie_result['extractor'], playlist, n_all_entries, n_entries))
778 elif isinstance(ie_entries, PagedList):
779 if playlistitems:
780 entries = []
781 for item in playlistitems:
782 entries.extend(ie_entries.getslice(
783 item - 1, item
784 ))
785 else:
786 entries = ie_entries.getslice(
787 playliststart, playlistend)
788 n_entries = len(entries)
789 self.to_screen(
790 "[%s] playlist %s: Downloading %d videos" %
791 (ie_result['extractor'], playlist, n_entries))
792 else: # iterable
793 if playlistitems:
794 entry_list = list(ie_entries)
795 entries = [entry_list[i - 1] for i in playlistitems]
796 else:
797 entries = list(itertools.islice(
798 ie_entries, playliststart, playlistend))
799 n_entries = len(entries)
800 self.to_screen(
801 "[%s] playlist %s: Downloading %d videos" %
802 (ie_result['extractor'], playlist, n_entries))
803
804 if self.params.get('playlistreverse', False):
805 entries = entries[::-1]
806
807 for i, entry in enumerate(entries, 1):
808 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
809 extra = {
810 'n_entries': n_entries,
811 'playlist': playlist,
812 'playlist_id': ie_result.get('id'),
813 'playlist_title': ie_result.get('title'),
814 'playlist_index': i + playliststart,
815 'extractor': ie_result['extractor'],
816 'webpage_url': ie_result['webpage_url'],
817 'webpage_url_basename': url_basename(ie_result['webpage_url']),
818 'extractor_key': ie_result['extractor_key'],
819 }
820
821 reason = self._match_entry(entry, incomplete=True)
822 if reason is not None:
823 self.to_screen('[download] ' + reason)
824 continue
825
826 entry_result = self.process_ie_result(entry,
827 download=download,
828 extra_info=extra)
829 playlist_results.append(entry_result)
830 ie_result['entries'] = playlist_results
831 return ie_result
832 elif result_type == 'compat_list':
833 self.report_warning(
834 'Extractor %s returned a compat_list result. '
835 'It needs to be updated.' % ie_result.get('extractor'))
836
837 def _fixup(r):
838 self.add_extra_info(
839 r,
840 {
841 'extractor': ie_result['extractor'],
842 'webpage_url': ie_result['webpage_url'],
843 'webpage_url_basename': url_basename(ie_result['webpage_url']),
844 'extractor_key': ie_result['extractor_key'],
845 }
846 )
847 return r
848 ie_result['entries'] = [
849 self.process_ie_result(_fixup(r), download, extra_info)
850 for r in ie_result['entries']
851 ]
852 return ie_result
853 else:
854 raise Exception('Invalid result type: %s' % result_type)
855
856 def _build_format_filter(self, filter_spec):
857 " Returns a function to filter the formats according to the filter_spec "
858
859 OPERATORS = {
860 '<': operator.lt,
861 '<=': operator.le,
862 '>': operator.gt,
863 '>=': operator.ge,
864 '=': operator.eq,
865 '!=': operator.ne,
866 }
867 operator_rex = re.compile(r'''(?x)\s*
868 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
869 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
870 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
871 $
872 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
873 m = operator_rex.search(filter_spec)
874 if m:
875 try:
876 comparison_value = int(m.group('value'))
877 except ValueError:
878 comparison_value = parse_filesize(m.group('value'))
879 if comparison_value is None:
880 comparison_value = parse_filesize(m.group('value') + 'B')
881 if comparison_value is None:
882 raise ValueError(
883 'Invalid value %r in format specification %r' % (
884 m.group('value'), filter_spec))
885 op = OPERATORS[m.group('op')]
886
887 if not m:
888 STR_OPERATORS = {
889 '=': operator.eq,
890 '!=': operator.ne,
891 }
892 str_operator_rex = re.compile(r'''(?x)
893 \s*(?P<key>ext|acodec|vcodec|container|protocol)
894 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
895 \s*(?P<value>[a-zA-Z0-9_-]+)
896 \s*$
897 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
898 m = str_operator_rex.search(filter_spec)
899 if m:
900 comparison_value = m.group('value')
901 op = STR_OPERATORS[m.group('op')]
902
903 if not m:
904 raise ValueError('Invalid filter specification %r' % filter_spec)
905
906 def _filter(f):
907 actual_value = f.get(m.group('key'))
908 if actual_value is None:
909 return m.group('none_inclusive')
910 return op(actual_value, comparison_value)
911 return _filter
912
913 def build_format_selector(self, format_spec):
914 def syntax_error(note, start):
915 message = (
916 'Invalid format specification: '
917 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
918 return SyntaxError(message)
919
920 PICKFIRST = 'PICKFIRST'
921 MERGE = 'MERGE'
922 SINGLE = 'SINGLE'
923 GROUP = 'GROUP'
924 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
925
926 def _parse_filter(tokens):
927 filter_parts = []
928 for type, string, start, _, _ in tokens:
929 if type == tokenize.OP and string == ']':
930 return ''.join(filter_parts)
931 else:
932 filter_parts.append(string)
933
934 def _parse_format_selection(tokens, endwith=[]):
935 selectors = []
936 current_selector = None
937 for type, string, start, _, _ in tokens:
938 # ENCODING is only defined in python 3.x
939 if type == getattr(tokenize, 'ENCODING', None):
940 continue
941 elif type in [tokenize.NAME, tokenize.NUMBER]:
942 current_selector = FormatSelector(SINGLE, string, [])
943 elif type == tokenize.OP:
944 if string in endwith:
945 break
946 elif string == ')':
947 # ')' will be handled by the parentheses group
948 tokens.restore_last_token()
949 break
950 if string == ',':
951 selectors.append(current_selector)
952 current_selector = None
953 elif string == '/':
954 first_choice = current_selector
955 second_choice = _parse_format_selection(tokens, [','])
956 current_selector = None
957 selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
958 elif string == '[':
959 if not current_selector:
960 current_selector = FormatSelector(SINGLE, 'best', [])
961 format_filter = _parse_filter(tokens)
962 current_selector.filters.append(format_filter)
963 elif string == '(':
964 if current_selector:
965 raise syntax_error('Unexpected "("', start)
966 current_selector = FormatSelector(GROUP, _parse_format_selection(tokens, [')']), [])
967 elif string == '+':
968 video_selector = current_selector
969 audio_selector = _parse_format_selection(tokens, [','])
970 current_selector = None
971 selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
972 else:
973 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
974 elif type == tokenize.ENDMARKER:
975 break
976 if current_selector:
977 selectors.append(current_selector)
978 return selectors
979
980 def _build_selector_function(selector):
981 if isinstance(selector, list):
982 fs = [_build_selector_function(s) for s in selector]
983
984 def selector_function(formats):
985 for f in fs:
986 for format in f(formats):
987 yield format
988 return selector_function
989 elif selector.type == GROUP:
990 selector_function = _build_selector_function(selector.selector)
991 elif selector.type == PICKFIRST:
992 fs = [_build_selector_function(s) for s in selector.selector]
993
994 def selector_function(formats):
995 for f in fs:
996 picked_formats = list(f(formats))
997 if picked_formats:
998 return picked_formats
999 return []
1000 elif selector.type == SINGLE:
1001 format_spec = selector.selector
1002
1003 def selector_function(formats):
1004 if format_spec == 'all':
1005 for f in formats:
1006 yield f
1007 elif format_spec in ['best', 'worst', None]:
1008 format_idx = 0 if format_spec == 'worst' else -1
1009 audiovideo_formats = [
1010 f for f in formats
1011 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1012 if audiovideo_formats:
1013 yield audiovideo_formats[format_idx]
1014 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1015 elif (all(f.get('acodec') != 'none' for f in formats) or
1016 all(f.get('vcodec') != 'none' for f in formats)):
1017 yield formats[format_idx]
1018 elif format_spec == 'bestaudio':
1019 audio_formats = [
1020 f for f in formats
1021 if f.get('vcodec') == 'none']
1022 if audio_formats:
1023 yield audio_formats[-1]
1024 elif format_spec == 'worstaudio':
1025 audio_formats = [
1026 f for f in formats
1027 if f.get('vcodec') == 'none']
1028 if audio_formats:
1029 yield audio_formats[0]
1030 elif format_spec == 'bestvideo':
1031 video_formats = [
1032 f for f in formats
1033 if f.get('acodec') == 'none']
1034 if video_formats:
1035 yield video_formats[-1]
1036 elif format_spec == 'worstvideo':
1037 video_formats = [
1038 f for f in formats
1039 if f.get('acodec') == 'none']
1040 if video_formats:
1041 yield video_formats[0]
1042 else:
1043 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1044 if format_spec in extensions:
1045 filter_f = lambda f: f['ext'] == format_spec
1046 else:
1047 filter_f = lambda f: f['format_id'] == format_spec
1048 matches = list(filter(filter_f, formats))
1049 if matches:
1050 yield matches[-1]
1051 elif selector.type == MERGE:
1052 def _merge(formats_info):
1053 format_1, format_2 = [f['format_id'] for f in formats_info]
1054 # The first format must contain the video and the
1055 # second the audio
1056 if formats_info[0].get('vcodec') == 'none':
1057 self.report_error('The first format must '
1058 'contain the video, try using '
1059 '"-f %s+%s"' % (format_2, format_1))
1060 return
1061 output_ext = (
1062 formats_info[0]['ext']
1063 if self.params.get('merge_output_format') is None
1064 else self.params['merge_output_format'])
1065 return {
1066 'requested_formats': formats_info,
1067 'format': '%s+%s' % (formats_info[0].get('format'),
1068 formats_info[1].get('format')),
1069 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1070 formats_info[1].get('format_id')),
1071 'width': formats_info[0].get('width'),
1072 'height': formats_info[0].get('height'),
1073 'resolution': formats_info[0].get('resolution'),
1074 'fps': formats_info[0].get('fps'),
1075 'vcodec': formats_info[0].get('vcodec'),
1076 'vbr': formats_info[0].get('vbr'),
1077 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1078 'acodec': formats_info[1].get('acodec'),
1079 'abr': formats_info[1].get('abr'),
1080 'ext': output_ext,
1081 }
1082 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1083
1084 def selector_function(formats):
1085 formats = list(formats)
1086 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1087 yield _merge(pair)
1088
1089 filters = [self._build_format_filter(f) for f in selector.filters]
1090
1091 def final_selector(formats):
1092 for _filter in filters:
1093 formats = list(filter(_filter, formats))
1094 return selector_function(formats)
1095 return final_selector
1096
1097 stream = io.BytesIO(format_spec.encode('utf-8'))
1098 try:
1099 tokens = list(compat_tokenize_tokenize(stream.readline))
1100 except tokenize.TokenError:
1101 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1102
1103 class TokenIterator(object):
1104 def __init__(self, tokens):
1105 self.tokens = tokens
1106 self.counter = 0
1107
1108 def __iter__(self):
1109 return self
1110
1111 def __next__(self):
1112 if self.counter >= len(self.tokens):
1113 raise StopIteration()
1114 value = self.tokens[self.counter]
1115 self.counter += 1
1116 return value
1117
1118 next = __next__
1119
1120 def restore_last_token(self):
1121 self.counter -= 1
1122
1123 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1124 return _build_selector_function(parsed_selector)
1125
1126 def _calc_headers(self, info_dict):
1127 res = std_headers.copy()
1128
1129 add_headers = info_dict.get('http_headers')
1130 if add_headers:
1131 res.update(add_headers)
1132
1133 cookies = self._calc_cookies(info_dict)
1134 if cookies:
1135 res['Cookie'] = cookies
1136
1137 return res
1138
1139 def _calc_cookies(self, info_dict):
1140 pr = compat_urllib_request.Request(info_dict['url'])
1141 self.cookiejar.add_cookie_header(pr)
1142 return pr.get_header('Cookie')
1143
1144 def process_video_result(self, info_dict, download=True):
1145 assert info_dict.get('_type', 'video') == 'video'
1146
1147 if 'id' not in info_dict:
1148 raise ExtractorError('Missing "id" field in extractor result')
1149 if 'title' not in info_dict:
1150 raise ExtractorError('Missing "title" field in extractor result')
1151
1152 if 'playlist' not in info_dict:
1153 # It isn't part of a playlist
1154 info_dict['playlist'] = None
1155 info_dict['playlist_index'] = None
1156
1157 thumbnails = info_dict.get('thumbnails')
1158 if thumbnails is None:
1159 thumbnail = info_dict.get('thumbnail')
1160 if thumbnail:
1161 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1162 if thumbnails:
1163 thumbnails.sort(key=lambda t: (
1164 t.get('preference'), t.get('width'), t.get('height'),
1165 t.get('id'), t.get('url')))
1166 for i, t in enumerate(thumbnails):
1167 if 'width' in t and 'height' in t:
1168 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1169 if t.get('id') is None:
1170 t['id'] = '%d' % i
1171
1172 if thumbnails and 'thumbnail' not in info_dict:
1173 info_dict['thumbnail'] = thumbnails[-1]['url']
1174
1175 if 'display_id' not in info_dict and 'id' in info_dict:
1176 info_dict['display_id'] = info_dict['id']
1177
1178 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1179 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1180 # see http://bugs.python.org/issue1646728)
1181 try:
1182 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1183 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1184 except (ValueError, OverflowError, OSError):
1185 pass
1186
1187 if self.params.get('listsubtitles', False):
1188 if 'automatic_captions' in info_dict:
1189 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1190 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1191 return
1192 info_dict['requested_subtitles'] = self.process_subtitles(
1193 info_dict['id'], info_dict.get('subtitles'),
1194 info_dict.get('automatic_captions'))
1195
1196 # We now pick which formats have to be downloaded
1197 if info_dict.get('formats') is None:
1198 # There's only one format available
1199 formats = [info_dict]
1200 else:
1201 formats = info_dict['formats']
1202
1203 if not formats:
1204 raise ExtractorError('No video formats found!')
1205
1206 formats_dict = {}
1207
1208 # We check that all the formats have the format and format_id fields
1209 for i, format in enumerate(formats):
1210 if 'url' not in format:
1211 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1212
1213 if format.get('format_id') is None:
1214 format['format_id'] = compat_str(i)
1215 format_id = format['format_id']
1216 if format_id not in formats_dict:
1217 formats_dict[format_id] = []
1218 formats_dict[format_id].append(format)
1219
1220 # Make sure all formats have unique format_id
1221 for format_id, ambiguous_formats in formats_dict.items():
1222 if len(ambiguous_formats) > 1:
1223 for i, format in enumerate(ambiguous_formats):
1224 format['format_id'] = '%s-%d' % (format_id, i)
1225
1226 for i, format in enumerate(formats):
1227 if format.get('format') is None:
1228 format['format'] = '{id} - {res}{note}'.format(
1229 id=format['format_id'],
1230 res=self.format_resolution(format),
1231 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1232 )
1233 # Automatically determine file extension if missing
1234 if 'ext' not in format:
1235 format['ext'] = determine_ext(format['url']).lower()
1236 # Add HTTP headers, so that external programs can use them from the
1237 # json output
1238 full_format_info = info_dict.copy()
1239 full_format_info.update(format)
1240 format['http_headers'] = self._calc_headers(full_format_info)
1241
1242 # TODO Central sorting goes here
1243
1244 if formats[0] is not info_dict:
1245 # only set the 'formats' fields if the original info_dict list them
1246 # otherwise we end up with a circular reference, the first (and unique)
1247 # element in the 'formats' field in info_dict is info_dict itself,
1248 # wich can't be exported to json
1249 info_dict['formats'] = formats
1250 if self.params.get('listformats'):
1251 self.list_formats(info_dict)
1252 return
1253 if self.params.get('list_thumbnails'):
1254 self.list_thumbnails(info_dict)
1255 return
1256
1257 req_format = self.params.get('format')
1258 if req_format is None:
1259 req_format_list = []
1260 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1261 info_dict['extractor'] in ['youtube', 'ted']):
1262 merger = FFmpegMergerPP(self)
1263 if merger.available and merger.can_merge():
1264 req_format_list.append('bestvideo+bestaudio')
1265 req_format_list.append('best')
1266 req_format = '/'.join(req_format_list)
1267 format_selector = self.build_format_selector(req_format)
1268 formats_to_download = list(format_selector(formats))
1269 if not formats_to_download:
1270 raise ExtractorError('requested format not available',
1271 expected=True)
1272
1273 if download:
1274 if len(formats_to_download) > 1:
1275 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1276 for format in formats_to_download:
1277 new_info = dict(info_dict)
1278 new_info.update(format)
1279 self.process_info(new_info)
1280 # We update the info dict with the best quality format (backwards compatibility)
1281 info_dict.update(formats_to_download[-1])
1282 return info_dict
1283
1284 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1285 """Select the requested subtitles and their format"""
1286 available_subs = {}
1287 if normal_subtitles and self.params.get('writesubtitles'):
1288 available_subs.update(normal_subtitles)
1289 if automatic_captions and self.params.get('writeautomaticsub'):
1290 for lang, cap_info in automatic_captions.items():
1291 if lang not in available_subs:
1292 available_subs[lang] = cap_info
1293
1294 if (not self.params.get('writesubtitles') and not
1295 self.params.get('writeautomaticsub') or not
1296 available_subs):
1297 return None
1298
1299 if self.params.get('allsubtitles', False):
1300 requested_langs = available_subs.keys()
1301 else:
1302 if self.params.get('subtitleslangs', False):
1303 requested_langs = self.params.get('subtitleslangs')
1304 elif 'en' in available_subs:
1305 requested_langs = ['en']
1306 else:
1307 requested_langs = [list(available_subs.keys())[0]]
1308
1309 formats_query = self.params.get('subtitlesformat', 'best')
1310 formats_preference = formats_query.split('/') if formats_query else []
1311 subs = {}
1312 for lang in requested_langs:
1313 formats = available_subs.get(lang)
1314 if formats is None:
1315 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1316 continue
1317 for ext in formats_preference:
1318 if ext == 'best':
1319 f = formats[-1]
1320 break
1321 matches = list(filter(lambda f: f['ext'] == ext, formats))
1322 if matches:
1323 f = matches[-1]
1324 break
1325 else:
1326 f = formats[-1]
1327 self.report_warning(
1328 'No subtitle format found matching "%s" for language %s, '
1329 'using %s' % (formats_query, lang, f['ext']))
1330 subs[lang] = f
1331 return subs
1332
1333 def process_info(self, info_dict):
1334 """Process a single resolved IE result."""
1335
1336 assert info_dict.get('_type', 'video') == 'video'
1337
1338 max_downloads = self.params.get('max_downloads')
1339 if max_downloads is not None:
1340 if self._num_downloads >= int(max_downloads):
1341 raise MaxDownloadsReached()
1342
1343 info_dict['fulltitle'] = info_dict['title']
1344 if len(info_dict['title']) > 200:
1345 info_dict['title'] = info_dict['title'][:197] + '...'
1346
1347 if 'format' not in info_dict:
1348 info_dict['format'] = info_dict['ext']
1349
1350 reason = self._match_entry(info_dict, incomplete=False)
1351 if reason is not None:
1352 self.to_screen('[download] ' + reason)
1353 return
1354
1355 self._num_downloads += 1
1356
1357 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1358
1359 # Forced printings
1360 if self.params.get('forcetitle', False):
1361 self.to_stdout(info_dict['fulltitle'])
1362 if self.params.get('forceid', False):
1363 self.to_stdout(info_dict['id'])
1364 if self.params.get('forceurl', False):
1365 if info_dict.get('requested_formats') is not None:
1366 for f in info_dict['requested_formats']:
1367 self.to_stdout(f['url'] + f.get('play_path', ''))
1368 else:
1369 # For RTMP URLs, also include the playpath
1370 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1371 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1372 self.to_stdout(info_dict['thumbnail'])
1373 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1374 self.to_stdout(info_dict['description'])
1375 if self.params.get('forcefilename', False) and filename is not None:
1376 self.to_stdout(filename)
1377 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1378 self.to_stdout(formatSeconds(info_dict['duration']))
1379 if self.params.get('forceformat', False):
1380 self.to_stdout(info_dict['format'])
1381 if self.params.get('forcejson', False):
1382 self.to_stdout(json.dumps(info_dict))
1383
1384 # Do nothing else if in simulate mode
1385 if self.params.get('simulate', False):
1386 return
1387
1388 if filename is None:
1389 return
1390
1391 try:
1392 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1393 if dn and not os.path.exists(dn):
1394 os.makedirs(dn)
1395 except (OSError, IOError) as err:
1396 self.report_error('unable to create directory ' + compat_str(err))
1397 return
1398
1399 if self.params.get('writedescription', False):
1400 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1401 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1402 self.to_screen('[info] Video description is already present')
1403 elif info_dict.get('description') is None:
1404 self.report_warning('There\'s no description to write.')
1405 else:
1406 try:
1407 self.to_screen('[info] Writing video description to: ' + descfn)
1408 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1409 descfile.write(info_dict['description'])
1410 except (OSError, IOError):
1411 self.report_error('Cannot write description file ' + descfn)
1412 return
1413
1414 if self.params.get('writeannotations', False):
1415 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1416 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1417 self.to_screen('[info] Video annotations are already present')
1418 else:
1419 try:
1420 self.to_screen('[info] Writing video annotations to: ' + annofn)
1421 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1422 annofile.write(info_dict['annotations'])
1423 except (KeyError, TypeError):
1424 self.report_warning('There are no annotations to write.')
1425 except (OSError, IOError):
1426 self.report_error('Cannot write annotations file: ' + annofn)
1427 return
1428
1429 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1430 self.params.get('writeautomaticsub')])
1431
1432 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1433 # subtitles download errors are already managed as troubles in relevant IE
1434 # that way it will silently go on when used with unsupporting IE
1435 subtitles = info_dict['requested_subtitles']
1436 ie = self.get_info_extractor(info_dict['extractor_key'])
1437 for sub_lang, sub_info in subtitles.items():
1438 sub_format = sub_info['ext']
1439 if sub_info.get('data') is not None:
1440 sub_data = sub_info['data']
1441 else:
1442 try:
1443 sub_data = ie._download_webpage(
1444 sub_info['url'], info_dict['id'], note=False)
1445 except ExtractorError as err:
1446 self.report_warning('Unable to download subtitle for "%s": %s' %
1447 (sub_lang, compat_str(err.cause)))
1448 continue
1449 try:
1450 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1451 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1452 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1453 else:
1454 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1455 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1456 subfile.write(sub_data)
1457 except (OSError, IOError):
1458 self.report_error('Cannot write subtitles file ' + sub_filename)
1459 return
1460
1461 if self.params.get('writeinfojson', False):
1462 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1463 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1464 self.to_screen('[info] Video description metadata is already present')
1465 else:
1466 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1467 try:
1468 write_json_file(self.filter_requested_info(info_dict), infofn)
1469 except (OSError, IOError):
1470 self.report_error('Cannot write metadata to JSON file ' + infofn)
1471 return
1472
1473 self._write_thumbnails(info_dict, filename)
1474
1475 if not self.params.get('skip_download', False):
1476 try:
1477 def dl(name, info):
1478 fd = get_suitable_downloader(info, self.params)(self, self.params)
1479 for ph in self._progress_hooks:
1480 fd.add_progress_hook(ph)
1481 if self.params.get('verbose'):
1482 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1483 return fd.download(name, info)
1484
1485 if info_dict.get('requested_formats') is not None:
1486 downloaded = []
1487 success = True
1488 merger = FFmpegMergerPP(self)
1489 if not merger.available:
1490 postprocessors = []
1491 self.report_warning('You have requested multiple '
1492 'formats but ffmpeg or avconv are not installed.'
1493 ' The formats won\'t be merged.')
1494 else:
1495 postprocessors = [merger]
1496
1497 def compatible_formats(formats):
1498 video, audio = formats
1499 # Check extension
1500 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1501 if video_ext and audio_ext:
1502 COMPATIBLE_EXTS = (
1503 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1504 ('webm')
1505 )
1506 for exts in COMPATIBLE_EXTS:
1507 if video_ext in exts and audio_ext in exts:
1508 return True
1509 # TODO: Check acodec/vcodec
1510 return False
1511
1512 filename_real_ext = os.path.splitext(filename)[1][1:]
1513 filename_wo_ext = (
1514 os.path.splitext(filename)[0]
1515 if filename_real_ext == info_dict['ext']
1516 else filename)
1517 requested_formats = info_dict['requested_formats']
1518 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1519 info_dict['ext'] = 'mkv'
1520 self.report_warning(
1521 'Requested formats are incompatible for merge and will be merged into mkv.')
1522 # Ensure filename always has a correct extension for successful merge
1523 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1524 if os.path.exists(encodeFilename(filename)):
1525 self.to_screen(
1526 '[download] %s has already been downloaded and '
1527 'merged' % filename)
1528 else:
1529 for f in requested_formats:
1530 new_info = dict(info_dict)
1531 new_info.update(f)
1532 fname = self.prepare_filename(new_info)
1533 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1534 downloaded.append(fname)
1535 partial_success = dl(fname, new_info)
1536 success = success and partial_success
1537 info_dict['__postprocessors'] = postprocessors
1538 info_dict['__files_to_merge'] = downloaded
1539 else:
1540 # Just a single file
1541 success = dl(filename, info_dict)
1542 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1543 self.report_error('unable to download video data: %s' % str(err))
1544 return
1545 except (OSError, IOError) as err:
1546 raise UnavailableVideoError(err)
1547 except (ContentTooShortError, ) as err:
1548 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1549 return
1550
1551 if success:
1552 # Fixup content
1553 fixup_policy = self.params.get('fixup')
1554 if fixup_policy is None:
1555 fixup_policy = 'detect_or_warn'
1556
1557 stretched_ratio = info_dict.get('stretched_ratio')
1558 if stretched_ratio is not None and stretched_ratio != 1:
1559 if fixup_policy == 'warn':
1560 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1561 info_dict['id'], stretched_ratio))
1562 elif fixup_policy == 'detect_or_warn':
1563 stretched_pp = FFmpegFixupStretchedPP(self)
1564 if stretched_pp.available:
1565 info_dict.setdefault('__postprocessors', [])
1566 info_dict['__postprocessors'].append(stretched_pp)
1567 else:
1568 self.report_warning(
1569 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1570 info_dict['id'], stretched_ratio))
1571 else:
1572 assert fixup_policy in ('ignore', 'never')
1573
1574 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1575 if fixup_policy == 'warn':
1576 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1577 info_dict['id']))
1578 elif fixup_policy == 'detect_or_warn':
1579 fixup_pp = FFmpegFixupM4aPP(self)
1580 if fixup_pp.available:
1581 info_dict.setdefault('__postprocessors', [])
1582 info_dict['__postprocessors'].append(fixup_pp)
1583 else:
1584 self.report_warning(
1585 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1586 info_dict['id']))
1587 else:
1588 assert fixup_policy in ('ignore', 'never')
1589
1590 try:
1591 self.post_process(filename, info_dict)
1592 except (PostProcessingError) as err:
1593 self.report_error('postprocessing: %s' % str(err))
1594 return
1595 self.record_download_archive(info_dict)
1596
1597 def download(self, url_list):
1598 """Download a given list of URLs."""
1599 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1600 if (len(url_list) > 1 and
1601 '%' not in outtmpl and
1602 self.params.get('max_downloads') != 1):
1603 raise SameFileError(outtmpl)
1604
1605 for url in url_list:
1606 try:
1607 # It also downloads the videos
1608 res = self.extract_info(
1609 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1610 except UnavailableVideoError:
1611 self.report_error('unable to download video')
1612 except MaxDownloadsReached:
1613 self.to_screen('[info] Maximum number of downloaded files reached.')
1614 raise
1615 else:
1616 if self.params.get('dump_single_json', False):
1617 self.to_stdout(json.dumps(res))
1618
1619 return self._download_retcode
1620
1621 def download_with_info_file(self, info_filename):
1622 with contextlib.closing(fileinput.FileInput(
1623 [info_filename], mode='r',
1624 openhook=fileinput.hook_encoded('utf-8'))) as f:
1625 # FileInput doesn't have a read method, we can't call json.load
1626 info = self.filter_requested_info(json.loads('\n'.join(f)))
1627 try:
1628 self.process_ie_result(info, download=True)
1629 except DownloadError:
1630 webpage_url = info.get('webpage_url')
1631 if webpage_url is not None:
1632 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1633 return self.download([webpage_url])
1634 else:
1635 raise
1636 return self._download_retcode
1637
1638 @staticmethod
1639 def filter_requested_info(info_dict):
1640 return dict(
1641 (k, v) for k, v in info_dict.items()
1642 if k not in ['requested_formats', 'requested_subtitles'])
1643
1644 def post_process(self, filename, ie_info):
1645 """Run all the postprocessors on the given file."""
1646 info = dict(ie_info)
1647 info['filepath'] = filename
1648 pps_chain = []
1649 if ie_info.get('__postprocessors') is not None:
1650 pps_chain.extend(ie_info['__postprocessors'])
1651 pps_chain.extend(self._pps)
1652 for pp in pps_chain:
1653 files_to_delete = []
1654 try:
1655 files_to_delete, info = pp.run(info)
1656 except PostProcessingError as e:
1657 self.report_error(e.msg)
1658 if files_to_delete and not self.params.get('keepvideo', False):
1659 for old_filename in files_to_delete:
1660 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1661 try:
1662 os.remove(encodeFilename(old_filename))
1663 except (IOError, OSError):
1664 self.report_warning('Unable to remove downloaded original file')
1665
1666 def _make_archive_id(self, info_dict):
1667 # Future-proof against any change in case
1668 # and backwards compatibility with prior versions
1669 extractor = info_dict.get('extractor_key')
1670 if extractor is None:
1671 if 'id' in info_dict:
1672 extractor = info_dict.get('ie_key') # key in a playlist
1673 if extractor is None:
1674 return None # Incomplete video information
1675 return extractor.lower() + ' ' + info_dict['id']
1676
1677 def in_download_archive(self, info_dict):
1678 fn = self.params.get('download_archive')
1679 if fn is None:
1680 return False
1681
1682 vid_id = self._make_archive_id(info_dict)
1683 if vid_id is None:
1684 return False # Incomplete video information
1685
1686 try:
1687 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1688 for line in archive_file:
1689 if line.strip() == vid_id:
1690 return True
1691 except IOError as ioe:
1692 if ioe.errno != errno.ENOENT:
1693 raise
1694 return False
1695
1696 def record_download_archive(self, info_dict):
1697 fn = self.params.get('download_archive')
1698 if fn is None:
1699 return
1700 vid_id = self._make_archive_id(info_dict)
1701 assert vid_id
1702 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1703 archive_file.write(vid_id + '\n')
1704
1705 @staticmethod
1706 def format_resolution(format, default='unknown'):
1707 if format.get('vcodec') == 'none':
1708 return 'audio only'
1709 if format.get('resolution') is not None:
1710 return format['resolution']
1711 if format.get('height') is not None:
1712 if format.get('width') is not None:
1713 res = '%sx%s' % (format['width'], format['height'])
1714 else:
1715 res = '%sp' % format['height']
1716 elif format.get('width') is not None:
1717 res = '?x%d' % format['width']
1718 else:
1719 res = default
1720 return res
1721
1722 def _format_note(self, fdict):
1723 res = ''
1724 if fdict.get('ext') in ['f4f', 'f4m']:
1725 res += '(unsupported) '
1726 if fdict.get('format_note') is not None:
1727 res += fdict['format_note'] + ' '
1728 if fdict.get('tbr') is not None:
1729 res += '%4dk ' % fdict['tbr']
1730 if fdict.get('container') is not None:
1731 if res:
1732 res += ', '
1733 res += '%s container' % fdict['container']
1734 if (fdict.get('vcodec') is not None and
1735 fdict.get('vcodec') != 'none'):
1736 if res:
1737 res += ', '
1738 res += fdict['vcodec']
1739 if fdict.get('vbr') is not None:
1740 res += '@'
1741 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1742 res += 'video@'
1743 if fdict.get('vbr') is not None:
1744 res += '%4dk' % fdict['vbr']
1745 if fdict.get('fps') is not None:
1746 res += ', %sfps' % fdict['fps']
1747 if fdict.get('acodec') is not None:
1748 if res:
1749 res += ', '
1750 if fdict['acodec'] == 'none':
1751 res += 'video only'
1752 else:
1753 res += '%-5s' % fdict['acodec']
1754 elif fdict.get('abr') is not None:
1755 if res:
1756 res += ', '
1757 res += 'audio'
1758 if fdict.get('abr') is not None:
1759 res += '@%3dk' % fdict['abr']
1760 if fdict.get('asr') is not None:
1761 res += ' (%5dHz)' % fdict['asr']
1762 if fdict.get('filesize') is not None:
1763 if res:
1764 res += ', '
1765 res += format_bytes(fdict['filesize'])
1766 elif fdict.get('filesize_approx') is not None:
1767 if res:
1768 res += ', '
1769 res += '~' + format_bytes(fdict['filesize_approx'])
1770 return res
1771
1772 def list_formats(self, info_dict):
1773 formats = info_dict.get('formats', [info_dict])
1774 table = [
1775 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1776 for f in formats
1777 if f.get('preference') is None or f['preference'] >= -1000]
1778 if len(formats) > 1:
1779 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1780
1781 header_line = ['format code', 'extension', 'resolution', 'note']
1782 self.to_screen(
1783 '[info] Available formats for %s:\n%s' %
1784 (info_dict['id'], render_table(header_line, table)))
1785
1786 def list_thumbnails(self, info_dict):
1787 thumbnails = info_dict.get('thumbnails')
1788 if not thumbnails:
1789 tn_url = info_dict.get('thumbnail')
1790 if tn_url:
1791 thumbnails = [{'id': '0', 'url': tn_url}]
1792 else:
1793 self.to_screen(
1794 '[info] No thumbnails present for %s' % info_dict['id'])
1795 return
1796
1797 self.to_screen(
1798 '[info] Thumbnails for %s:' % info_dict['id'])
1799 self.to_screen(render_table(
1800 ['ID', 'width', 'height', 'URL'],
1801 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1802
1803 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1804 if not subtitles:
1805 self.to_screen('%s has no %s' % (video_id, name))
1806 return
1807 self.to_screen(
1808 'Available %s for %s:' % (name, video_id))
1809 self.to_screen(render_table(
1810 ['Language', 'formats'],
1811 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1812 for lang, formats in subtitles.items()]))
1813
1814 def urlopen(self, req):
1815 """ Start an HTTP download """
1816
1817 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1818 # always respected by websites, some tend to give out URLs with non percent-encoded
1819 # non-ASCII characters (see telemb.py, ard.py [#3412])
1820 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1821 # To work around aforementioned issue we will replace request's original URL with
1822 # percent-encoded one
1823 req_is_string = isinstance(req, compat_basestring)
1824 url = req if req_is_string else req.get_full_url()
1825 url_escaped = escape_url(url)
1826
1827 # Substitute URL if any change after escaping
1828 if url != url_escaped:
1829 if req_is_string:
1830 req = url_escaped
1831 else:
1832 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1833 req = req_type(
1834 url_escaped, data=req.data, headers=req.headers,
1835 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1836
1837 return self._opener.open(req, timeout=self._socket_timeout)
1838
1839 def print_debug_header(self):
1840 if not self.params.get('verbose'):
1841 return
1842
1843 if type('') is not compat_str:
1844 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1845 self.report_warning(
1846 'Your Python is broken! Update to a newer and supported version')
1847
1848 stdout_encoding = getattr(
1849 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1850 encoding_str = (
1851 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1852 locale.getpreferredencoding(),
1853 sys.getfilesystemencoding(),
1854 stdout_encoding,
1855 self.get_encoding()))
1856 write_string(encoding_str, encoding=None)
1857
1858 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1859 try:
1860 sp = subprocess.Popen(
1861 ['git', 'rev-parse', '--short', 'HEAD'],
1862 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1863 cwd=os.path.dirname(os.path.abspath(__file__)))
1864 out, err = sp.communicate()
1865 out = out.decode().strip()
1866 if re.match('[0-9a-f]+', out):
1867 self._write_string('[debug] Git HEAD: ' + out + '\n')
1868 except Exception:
1869 try:
1870 sys.exc_clear()
1871 except Exception:
1872 pass
1873 self._write_string('[debug] Python version %s - %s\n' % (
1874 platform.python_version(), platform_name()))
1875
1876 exe_versions = FFmpegPostProcessor.get_versions(self)
1877 exe_versions['rtmpdump'] = rtmpdump_version()
1878 exe_str = ', '.join(
1879 '%s %s' % (exe, v)
1880 for exe, v in sorted(exe_versions.items())
1881 if v
1882 )
1883 if not exe_str:
1884 exe_str = 'none'
1885 self._write_string('[debug] exe versions: %s\n' % exe_str)
1886
1887 proxy_map = {}
1888 for handler in self._opener.handlers:
1889 if hasattr(handler, 'proxies'):
1890 proxy_map.update(handler.proxies)
1891 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1892
1893 if self.params.get('call_home', False):
1894 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1895 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1896 latest_version = self.urlopen(
1897 'https://yt-dl.org/latest/version').read().decode('utf-8')
1898 if version_tuple(latest_version) > version_tuple(__version__):
1899 self.report_warning(
1900 'You are using an outdated version (newest version: %s)! '
1901 'See https://yt-dl.org/update if you need help updating.' %
1902 latest_version)
1903
1904 def _setup_opener(self):
1905 timeout_val = self.params.get('socket_timeout')
1906 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1907
1908 opts_cookiefile = self.params.get('cookiefile')
1909 opts_proxy = self.params.get('proxy')
1910
1911 if opts_cookiefile is None:
1912 self.cookiejar = compat_cookiejar.CookieJar()
1913 else:
1914 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1915 opts_cookiefile)
1916 if os.access(opts_cookiefile, os.R_OK):
1917 self.cookiejar.load()
1918
1919 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1920 self.cookiejar)
1921 if opts_proxy is not None:
1922 if opts_proxy == '':
1923 proxies = {}
1924 else:
1925 proxies = {'http': opts_proxy, 'https': opts_proxy}
1926 else:
1927 proxies = compat_urllib_request.getproxies()
1928 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1929 if 'http' in proxies and 'https' not in proxies:
1930 proxies['https'] = proxies['http']
1931 proxy_handler = PerRequestProxyHandler(proxies)
1932
1933 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1934 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1935 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1936 opener = compat_urllib_request.build_opener(
1937 proxy_handler, https_handler, cookie_processor, ydlh)
1938
1939 # Delete the default user-agent header, which would otherwise apply in
1940 # cases where our custom HTTP handler doesn't come into play
1941 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1942 opener.addheaders = []
1943 self._opener = opener
1944
1945 def encode(self, s):
1946 if isinstance(s, bytes):
1947 return s # Already encoded
1948
1949 try:
1950 return s.encode(self.get_encoding())
1951 except UnicodeEncodeError as err:
1952 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1953 raise
1954
1955 def get_encoding(self):
1956 encoding = self.params.get('encoding')
1957 if encoding is None:
1958 encoding = preferredencoding()
1959 return encoding
1960
1961 def _write_thumbnails(self, info_dict, filename):
1962 if self.params.get('writethumbnail', False):
1963 thumbnails = info_dict.get('thumbnails')
1964 if thumbnails:
1965 thumbnails = [thumbnails[-1]]
1966 elif self.params.get('write_all_thumbnails', False):
1967 thumbnails = info_dict.get('thumbnails')
1968 else:
1969 return
1970
1971 if not thumbnails:
1972 # No thumbnails present, so return immediately
1973 return
1974
1975 for t in thumbnails:
1976 thumb_ext = determine_ext(t['url'], 'jpg')
1977 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1978 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1979 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1980
1981 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1982 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1983 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1984 else:
1985 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1986 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1987 try:
1988 uf = self.urlopen(t['url'])
1989 with open(thumb_filename, 'wb') as thumbf:
1990 shutil.copyfileobj(uf, thumbf)
1991 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1992 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1993 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1994 self.report_warning('Unable to download thumbnail "%s": %s' %
1995 (t['url'], compat_str(err)))