]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
[SouthparkDE] regex and tests
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DownloadError,
55 encode_compat_str,
56 encodeFilename,
57 error_to_compat_str,
58 expand_path,
59 ExtractorError,
60 format_bytes,
61 formatSeconds,
62 GeoRestrictedError,
63 int_or_none,
64 ISO3166Utils,
65 locked_file,
66 make_HTTPS_handler,
67 MaxDownloadsReached,
68 orderedSet,
69 PagedList,
70 parse_filesize,
71 PerRequestProxyHandler,
72 platform_name,
73 PostProcessingError,
74 preferredencoding,
75 prepend_extension,
76 register_socks_protocols,
77 render_table,
78 replace_extension,
79 SameFileError,
80 sanitize_filename,
81 sanitize_path,
82 sanitize_url,
83 sanitized_Request,
84 std_headers,
85 str_or_none,
86 subtitles_filename,
87 UnavailableVideoError,
88 url_basename,
89 version_tuple,
90 write_json_file,
91 write_string,
92 YoutubeDLCookieJar,
93 YoutubeDLCookieProcessor,
94 YoutubeDLHandler,
95 YoutubeDLRedirectHandler,
96 )
97 from .cache import Cache
98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
99 from .extractor.openload import PhantomJSwrapper
100 from .downloader import get_suitable_downloader
101 from .downloader.rtmp import rtmpdump_version
102 from .postprocessor import (
103 FFmpegFixupM3u8PP,
104 FFmpegFixupM4aPP,
105 FFmpegFixupStretchedPP,
106 FFmpegMergerPP,
107 FFmpegPostProcessor,
108 FFmpegSubtitlesConvertorPP,
109 get_postprocessor,
110 )
111 from .version import __version__
112
113 if compat_os_name == 'nt':
114 import ctypes
115
116
117 class YoutubeDL(object):
118 """YoutubeDL class.
119
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
126
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
134
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
141
142 Available options:
143
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
146 videopassword: Password for accessing a video.
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
153 no_warnings: Do not print out anything for warnings.
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
160 forceduration: Force printing duration.
161 forcejson: Force printing info_dict as JSON.
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
164 simulate: Do not download the video files.
165 format: Video format code. See options.py for more information.
166 outtmpl: Template for output names.
167 restrictfilenames: Do not allow "&" and spaces in file names.
168 trim_file_name: Limit length of filename (extension excluded).
169 ignoreerrors: Do not stop on download errors.
170 force_generic_extractor: Force downloader to use the generic extractor
171 nooverwrites: Prevent overwriting files.
172 playliststart: Playlist item to start at.
173 playlistend: Playlist item to end at.
174 playlist_items: Specific indices of playlist to download.
175 playlistreverse: Download playlist items in reverse order.
176 playlistrandom: Download playlist items in random order.
177 matchtitle: Download only matching titles.
178 rejecttitle: Reject downloads for matching titles.
179 logger: Log messages to a logging.Logger instance.
180 logtostderr: Log messages to stderr instead of stdout.
181 writedescription: Write the video description to a .description file
182 writeinfojson: Write the video description to a .info.json file
183 writeannotations: Write the video annotations to a .annotations.xml file
184 writethumbnail: Write the thumbnail image to a file
185 write_all_thumbnails: Write all thumbnail formats to files
186 writesubtitles: Write the video subtitles to a file
187 writeautomaticsub: Write the automatically generated subtitles to a file
188 allsubtitles: Downloads all the subtitles of the video
189 (requires writesubtitles or writeautomaticsub)
190 listsubtitles: Lists all available subtitles for the video
191 subtitlesformat: The format code for subtitles
192 subtitleslangs: List of languages of the subtitles to download
193 keepvideo: Keep the video file after post-processing
194 daterange: A DateRange object, download only if the upload_date is in the range.
195 skip_download: Skip the actual download of the video file
196 cachedir: Location of the cache files in the filesystem.
197 False to disable filesystem cache.
198 noplaylist: Download single video instead of a playlist if in doubt.
199 age_limit: An integer representing the user's age in years.
200 Unsuitable videos for the given age are skipped.
201 min_views: An integer representing the minimum view count the video
202 must have in order to not be skipped.
203 Videos without view count information are always
204 downloaded. None for no limit.
205 max_views: An integer representing the maximum view count.
206 Videos that are more popular than that are not
207 downloaded.
208 Videos without view count information are always
209 downloaded. None for no limit.
210 download_archive: File name of a file where all downloads are recorded.
211 Videos already present in the file are not downloaded
212 again.
213 cookiefile: File name where cookies should be read from and dumped to.
214 nocheckcertificate:Do not verify SSL certificates
215 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
216 At the moment, this is only supported by YouTube.
217 proxy: URL of the proxy server to use
218 geo_verification_proxy: URL of the proxy to use for IP address verification
219 on geo-restricted sites.
220 socket_timeout: Time to wait for unresponsive hosts, in seconds
221 bidi_workaround: Work around buggy terminals without bidirectional text
222 support, using fridibi
223 debug_printtraffic:Print out sent and received HTTP traffic
224 include_ads: Download ads as well
225 default_search: Prepend this string if an input url is not valid.
226 'auto' for elaborate guessing
227 encoding: Use this encoding instead of the system-specified.
228 extract_flat: Do not resolve URLs, return the immediate result.
229 Pass in 'in_playlist' to only show this behavior for
230 playlist items.
231 postprocessors: A list of dictionaries, each with an entry
232 * key: The name of the postprocessor. See
233 youtube_dlc/postprocessor/__init__.py for a list.
234 as well as any further keyword arguments for the
235 postprocessor.
236 progress_hooks: A list of functions that get called on download
237 progress, with a dictionary with the entries
238 * status: One of "downloading", "error", or "finished".
239 Check this first and ignore unknown values.
240
241 If status is one of "downloading", or "finished", the
242 following properties may also be present:
243 * filename: The final filename (always present)
244 * tmpfilename: The filename we're currently writing to
245 * downloaded_bytes: Bytes on disk
246 * total_bytes: Size of the whole file, None if unknown
247 * total_bytes_estimate: Guess of the eventual file size,
248 None if unavailable.
249 * elapsed: The number of seconds since download started.
250 * eta: The estimated time in seconds, None if unknown
251 * speed: The download speed in bytes/second, None if
252 unknown
253 * fragment_index: The counter of the currently
254 downloaded video fragment.
255 * fragment_count: The number of fragments (= individual
256 files that will be merged)
257
258 Progress hooks are guaranteed to be called at least once
259 (with status "finished") if the download is successful.
260 merge_output_format: Extension to use when merging formats.
261 fixup: Automatically correct known faults of the file.
262 One of:
263 - "never": do nothing
264 - "warn": only emit a warning
265 - "detect_or_warn": check whether we can do anything
266 about it, warn otherwise (default)
267 source_address: Client-side IP address to bind to.
268 call_home: Boolean, true iff we are allowed to contact the
269 youtube-dlc servers for debugging.
270 sleep_interval: Number of seconds to sleep before each download when
271 used alone or a lower bound of a range for randomized
272 sleep before each download (minimum possible number
273 of seconds to sleep) when used along with
274 max_sleep_interval.
275 max_sleep_interval:Upper bound of a range for randomized sleep before each
276 download (maximum possible number of seconds to sleep).
277 Must only be used along with sleep_interval.
278 Actual sleep time will be a random float from range
279 [sleep_interval; max_sleep_interval].
280 listformats: Print an overview of available video formats and exit.
281 list_thumbnails: Print a table of all thumbnails and exit.
282 match_filter: A function that gets called with the info_dict of
283 every video.
284 If it returns a message, the video is ignored.
285 If it returns None, the video is downloaded.
286 match_filter_func in utils.py is one example for this.
287 no_color: Do not emit color codes in output.
288 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
289 HTTP header
290 geo_bypass_country:
291 Two-letter ISO 3166-2 country code that will be used for
292 explicit geographic restriction bypassing via faking
293 X-Forwarded-For HTTP header
294 geo_bypass_ip_block:
295 IP range in CIDR notation that will be used similarly to
296 geo_bypass_country
297
298 The following options determine which downloader is picked:
299 external_downloader: Executable of the external downloader to call.
300 None or unset for standard (built-in) downloader.
301 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
302 if True, otherwise use ffmpeg/avconv if False, otherwise
303 use downloader suggested by extractor if None.
304
305 The following parameters are not used by YoutubeDL itself, they are used by
306 the downloader (see youtube_dlc/downloader/common.py):
307 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
308 noresizebuffer, retries, continuedl, noprogress, consoletitle,
309 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
310 http_chunk_size.
311
312 The following options are used by the post processors:
313 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
314 otherwise prefer ffmpeg.
315 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
316 to the binary or its containing directory.
317 postprocessor_args: A list of additional command-line arguments for the
318 postprocessor.
319
320 The following options are used by the Youtube extractor:
321 youtube_include_dash_manifest: If True (default), DASH manifests and related
322 data will be downloaded and processed by extractor.
323 You can reduce network I/O by disabling it if you don't
324 care about DASH.
325 """
326
327 _NUMERIC_FIELDS = set((
328 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
329 'timestamp', 'upload_year', 'upload_month', 'upload_day',
330 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
331 'average_rating', 'comment_count', 'age_limit',
332 'start_time', 'end_time',
333 'chapter_number', 'season_number', 'episode_number',
334 'track_number', 'disc_number', 'release_year',
335 'playlist_index',
336 ))
337
338 params = None
339 _ies = []
340 _pps = []
341 _download_retcode = None
342 _num_downloads = None
343 _screen_file = None
344
345 def __init__(self, params=None, auto_init=True):
346 """Create a FileDownloader object with the given options."""
347 if params is None:
348 params = {}
349 self._ies = []
350 self._ies_instances = {}
351 self._pps = []
352 self._progress_hooks = []
353 self._download_retcode = 0
354 self._num_downloads = 0
355 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
356 self._err_file = sys.stderr
357 self.params = {
358 # Default parameters
359 'nocheckcertificate': False,
360 }
361 self.params.update(params)
362 self.cache = Cache(self)
363 self.archive = set()
364
365 """Preload the archive, if any is specified"""
366 def preload_download_archive(self):
367 fn = self.params.get('download_archive')
368 if fn is None:
369 return False
370 try:
371 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
372 for line in archive_file:
373 self.archive.add(line.strip())
374 except IOError as ioe:
375 if ioe.errno != errno.ENOENT:
376 raise
377 return False
378 return True
379
380 def check_deprecated(param, option, suggestion):
381 if self.params.get(param) is not None:
382 self.report_warning(
383 '%s is deprecated. Use %s instead.' % (option, suggestion))
384 return True
385 return False
386
387 if self.params.get('verbose'):
388 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
389
390 preload_download_archive(self)
391
392 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
393 if self.params.get('geo_verification_proxy') is None:
394 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
395
396 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
397 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
398 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
399
400 if params.get('bidi_workaround', False):
401 try:
402 import pty
403 master, slave = pty.openpty()
404 width = compat_get_terminal_size().columns
405 if width is None:
406 width_args = []
407 else:
408 width_args = ['-w', str(width)]
409 sp_kwargs = dict(
410 stdin=subprocess.PIPE,
411 stdout=slave,
412 stderr=self._err_file)
413 try:
414 self._output_process = subprocess.Popen(
415 ['bidiv'] + width_args, **sp_kwargs
416 )
417 except OSError:
418 self._output_process = subprocess.Popen(
419 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
420 self._output_channel = os.fdopen(master, 'rb')
421 except OSError as ose:
422 if ose.errno == errno.ENOENT:
423 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
424 else:
425 raise
426
427 if (sys.platform != 'win32'
428 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
429 and not params.get('restrictfilenames', False)):
430 # Unicode filesystem API will throw errors (#1474, #13027)
431 self.report_warning(
432 'Assuming --restrict-filenames since file system encoding '
433 'cannot encode all characters. '
434 'Set the LC_ALL environment variable to fix this.')
435 self.params['restrictfilenames'] = True
436
437 if isinstance(params.get('outtmpl'), bytes):
438 self.report_warning(
439 'Parameter outtmpl is bytes, but should be a unicode string. '
440 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
441
442 self._setup_opener()
443
444 if auto_init:
445 self.print_debug_header()
446 self.add_default_info_extractors()
447
448 for pp_def_raw in self.params.get('postprocessors', []):
449 pp_class = get_postprocessor(pp_def_raw['key'])
450 pp_def = dict(pp_def_raw)
451 del pp_def['key']
452 pp = pp_class(self, **compat_kwargs(pp_def))
453 self.add_post_processor(pp)
454
455 for ph in self.params.get('progress_hooks', []):
456 self.add_progress_hook(ph)
457
458 register_socks_protocols()
459
460 def warn_if_short_id(self, argv):
461 # short YouTube ID starting with dash?
462 idxs = [
463 i for i, a in enumerate(argv)
464 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
465 if idxs:
466 correct_argv = (
467 ['youtube-dlc']
468 + [a for i, a in enumerate(argv) if i not in idxs]
469 + ['--'] + [argv[i] for i in idxs]
470 )
471 self.report_warning(
472 'Long argument string detected. '
473 'Use -- to separate parameters and URLs, like this:\n%s\n' %
474 args_to_str(correct_argv))
475
476 def add_info_extractor(self, ie):
477 """Add an InfoExtractor object to the end of the list."""
478 self._ies.append(ie)
479 if not isinstance(ie, type):
480 self._ies_instances[ie.ie_key()] = ie
481 ie.set_downloader(self)
482
483 def get_info_extractor(self, ie_key):
484 """
485 Get an instance of an IE with name ie_key, it will try to get one from
486 the _ies list, if there's no instance it will create a new one and add
487 it to the extractor list.
488 """
489 ie = self._ies_instances.get(ie_key)
490 if ie is None:
491 ie = get_info_extractor(ie_key)()
492 self.add_info_extractor(ie)
493 return ie
494
495 def add_default_info_extractors(self):
496 """
497 Add the InfoExtractors returned by gen_extractors to the end of the list
498 """
499 for ie in gen_extractor_classes():
500 self.add_info_extractor(ie)
501
502 def add_post_processor(self, pp):
503 """Add a PostProcessor object to the end of the chain."""
504 self._pps.append(pp)
505 pp.set_downloader(self)
506
507 def add_progress_hook(self, ph):
508 """Add the progress hook (currently only for the file downloader)"""
509 self._progress_hooks.append(ph)
510
511 def _bidi_workaround(self, message):
512 if not hasattr(self, '_output_channel'):
513 return message
514
515 assert hasattr(self, '_output_process')
516 assert isinstance(message, compat_str)
517 line_count = message.count('\n') + 1
518 self._output_process.stdin.write((message + '\n').encode('utf-8'))
519 self._output_process.stdin.flush()
520 res = ''.join(self._output_channel.readline().decode('utf-8')
521 for _ in range(line_count))
522 return res[:-len('\n')]
523
524 def to_screen(self, message, skip_eol=False):
525 """Print message to stdout if not in quiet mode."""
526 return self.to_stdout(message, skip_eol, check_quiet=True)
527
528 def _write_string(self, s, out=None):
529 write_string(s, out=out, encoding=self.params.get('encoding'))
530
531 def to_stdout(self, message, skip_eol=False, check_quiet=False):
532 """Print message to stdout if not in quiet mode."""
533 if self.params.get('logger'):
534 self.params['logger'].debug(message)
535 elif not check_quiet or not self.params.get('quiet', False):
536 message = self._bidi_workaround(message)
537 terminator = ['\n', ''][skip_eol]
538 output = message + terminator
539
540 self._write_string(output, self._screen_file)
541
542 def to_stderr(self, message):
543 """Print message to stderr."""
544 assert isinstance(message, compat_str)
545 if self.params.get('logger'):
546 self.params['logger'].error(message)
547 else:
548 message = self._bidi_workaround(message)
549 output = message + '\n'
550 self._write_string(output, self._err_file)
551
552 def to_console_title(self, message):
553 if not self.params.get('consoletitle', False):
554 return
555 if compat_os_name == 'nt':
556 if ctypes.windll.kernel32.GetConsoleWindow():
557 # c_wchar_p() might not be necessary if `message` is
558 # already of type unicode()
559 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
560 elif 'TERM' in os.environ:
561 self._write_string('\033]0;%s\007' % message, self._screen_file)
562
563 def save_console_title(self):
564 if not self.params.get('consoletitle', False):
565 return
566 if self.params.get('simulate', False):
567 return
568 if compat_os_name != 'nt' and 'TERM' in os.environ:
569 # Save the title on stack
570 self._write_string('\033[22;0t', self._screen_file)
571
572 def restore_console_title(self):
573 if not self.params.get('consoletitle', False):
574 return
575 if self.params.get('simulate', False):
576 return
577 if compat_os_name != 'nt' and 'TERM' in os.environ:
578 # Restore the title from stack
579 self._write_string('\033[23;0t', self._screen_file)
580
581 def __enter__(self):
582 self.save_console_title()
583 return self
584
585 def __exit__(self, *args):
586 self.restore_console_title()
587
588 if self.params.get('cookiefile') is not None:
589 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
590
591 def trouble(self, message=None, tb=None):
592 """Determine action to take when a download problem appears.
593
594 Depending on if the downloader has been configured to ignore
595 download errors or not, this method may throw an exception or
596 not when errors are found, after printing the message.
597
598 tb, if given, is additional traceback information.
599 """
600 if message is not None:
601 self.to_stderr(message)
602 if self.params.get('verbose'):
603 if tb is None:
604 if sys.exc_info()[0]: # if .trouble has been called from an except block
605 tb = ''
606 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
607 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
608 tb += encode_compat_str(traceback.format_exc())
609 else:
610 tb_data = traceback.format_list(traceback.extract_stack())
611 tb = ''.join(tb_data)
612 self.to_stderr(tb)
613 if not self.params.get('ignoreerrors', False):
614 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
615 exc_info = sys.exc_info()[1].exc_info
616 else:
617 exc_info = sys.exc_info()
618 raise DownloadError(message, exc_info)
619 self._download_retcode = 1
620
621 def report_warning(self, message):
622 '''
623 Print the message to stderr, it will be prefixed with 'WARNING:'
624 If stderr is a tty file the 'WARNING:' will be colored
625 '''
626 if self.params.get('logger') is not None:
627 self.params['logger'].warning(message)
628 else:
629 if self.params.get('no_warnings'):
630 return
631 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
632 _msg_header = '\033[0;33mWARNING:\033[0m'
633 else:
634 _msg_header = 'WARNING:'
635 warning_message = '%s %s' % (_msg_header, message)
636 self.to_stderr(warning_message)
637
638 def report_error(self, message, tb=None):
639 '''
640 Do the same as trouble, but prefixes the message with 'ERROR:', colored
641 in red if stderr is a tty file.
642 '''
643 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
644 _msg_header = '\033[0;31mERROR:\033[0m'
645 else:
646 _msg_header = 'ERROR:'
647 error_message = '%s %s' % (_msg_header, message)
648 self.trouble(error_message, tb)
649
650 def report_file_already_downloaded(self, file_name):
651 """Report file has already been fully downloaded."""
652 try:
653 self.to_screen('[download] %s has already been downloaded' % file_name)
654 except UnicodeEncodeError:
655 self.to_screen('[download] The file has already been downloaded')
656
657 def prepare_filename(self, info_dict):
658 """Generate the output filename."""
659 try:
660 template_dict = dict(info_dict)
661
662 template_dict['epoch'] = int(time.time())
663 autonumber_size = self.params.get('autonumber_size')
664 if autonumber_size is None:
665 autonumber_size = 5
666 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
667 if template_dict.get('resolution') is None:
668 if template_dict.get('width') and template_dict.get('height'):
669 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
670 elif template_dict.get('height'):
671 template_dict['resolution'] = '%sp' % template_dict['height']
672 elif template_dict.get('width'):
673 template_dict['resolution'] = '%dx?' % template_dict['width']
674
675 sanitize = lambda k, v: sanitize_filename(
676 compat_str(v),
677 restricted=self.params.get('restrictfilenames'),
678 is_id=(k == 'id' or k.endswith('_id')))
679 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
680 for k, v in template_dict.items()
681 if v is not None and not isinstance(v, (list, tuple, dict)))
682 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
683
684 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
685
686 # For fields playlist_index and autonumber convert all occurrences
687 # of %(field)s to %(field)0Nd for backward compatibility
688 field_size_compat_map = {
689 'playlist_index': len(str(template_dict['n_entries'])),
690 'autonumber': autonumber_size,
691 }
692 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
693 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
694 if mobj:
695 outtmpl = re.sub(
696 FIELD_SIZE_COMPAT_RE,
697 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
698 outtmpl)
699
700 # Missing numeric fields used together with integer presentation types
701 # in format specification will break the argument substitution since
702 # string 'NA' is returned for missing fields. We will patch output
703 # template for missing fields to meet string presentation type.
704 for numeric_field in self._NUMERIC_FIELDS:
705 if numeric_field not in template_dict:
706 # As of [1] format syntax is:
707 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
708 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
709 FORMAT_RE = r'''(?x)
710 (?<!%)
711 %
712 \({0}\) # mapping key
713 (?:[#0\-+ ]+)? # conversion flags (optional)
714 (?:\d+)? # minimum field width (optional)
715 (?:\.\d+)? # precision (optional)
716 [hlL]? # length modifier (optional)
717 [diouxXeEfFgGcrs%] # conversion type
718 '''
719 outtmpl = re.sub(
720 FORMAT_RE.format(numeric_field),
721 r'%({0})s'.format(numeric_field), outtmpl)
722
723 # expand_path translates '%%' into '%' and '$$' into '$'
724 # correspondingly that is not what we want since we need to keep
725 # '%%' intact for template dict substitution step. Working around
726 # with boundary-alike separator hack.
727 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
728 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
729
730 # outtmpl should be expand_path'ed before template dict substitution
731 # because meta fields may contain env variables we don't want to
732 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
733 # title "Hello $PATH", we don't want `$PATH` to be expanded.
734 filename = expand_path(outtmpl).replace(sep, '') % template_dict
735
736 # https://github.com/blackjack4494/youtube-dlc/issues/85
737 trim_file_name = self.params.get('trim_file_name', False)
738 if trim_file_name:
739 fn_groups = filename.rsplit('.')
740 ext = fn_groups[-1]
741 sub_ext = ''
742 if len(fn_groups) > 2:
743 sub_ext = fn_groups[-2]
744 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
745
746 # Temporary fix for #4787
747 # 'Treat' all problem characters by passing filename through preferredencoding
748 # to workaround encoding issues with subprocess on python2 @ Windows
749 if sys.version_info < (3, 0) and sys.platform == 'win32':
750 filename = encodeFilename(filename, True).decode(preferredencoding())
751 return sanitize_path(filename)
752 except ValueError as err:
753 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
754 return None
755
756 def _match_entry(self, info_dict, incomplete):
757 """ Returns None if the file should be downloaded """
758
759 video_title = info_dict.get('title', info_dict.get('id', 'video'))
760 if 'title' in info_dict:
761 # This can happen when we're just evaluating the playlist
762 title = info_dict['title']
763 matchtitle = self.params.get('matchtitle', False)
764 if matchtitle:
765 if not re.search(matchtitle, title, re.IGNORECASE):
766 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
767 rejecttitle = self.params.get('rejecttitle', False)
768 if rejecttitle:
769 if re.search(rejecttitle, title, re.IGNORECASE):
770 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
771 date = info_dict.get('upload_date')
772 if date is not None:
773 dateRange = self.params.get('daterange', DateRange())
774 if date not in dateRange:
775 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
776 view_count = info_dict.get('view_count')
777 if view_count is not None:
778 min_views = self.params.get('min_views')
779 if min_views is not None and view_count < min_views:
780 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
781 max_views = self.params.get('max_views')
782 if max_views is not None and view_count > max_views:
783 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
784 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
785 return 'Skipping "%s" because it is age restricted' % video_title
786 if self.in_download_archive(info_dict):
787 return '%s has already been recorded in archive' % video_title
788
789 if not incomplete:
790 match_filter = self.params.get('match_filter')
791 if match_filter is not None:
792 ret = match_filter(info_dict)
793 if ret is not None:
794 return ret
795
796 return None
797
798 @staticmethod
799 def add_extra_info(info_dict, extra_info):
800 '''Set the keys from extra_info in info dict if they are missing'''
801 for key, value in extra_info.items():
802 info_dict.setdefault(key, value)
803
804 def extract_info(self, url, download=True, ie_key=None, extra_info={},
805 process=True, force_generic_extractor=False):
806 '''
807 Returns a list with a dictionary for each video we find.
808 If 'download', also downloads the videos.
809 extra_info is a dict containing the extra values to add to each result
810 '''
811
812 if not ie_key and force_generic_extractor:
813 ie_key = 'Generic'
814
815 if ie_key:
816 ies = [self.get_info_extractor(ie_key)]
817 else:
818 ies = self._ies
819
820 for ie in ies:
821 if not ie.suitable(url):
822 continue
823
824 ie = self.get_info_extractor(ie.ie_key())
825 if not ie.working():
826 self.report_warning('The program functionality for this site has been marked as broken, '
827 'and will probably not work.')
828
829 try:
830 ie_result = ie.extract(url)
831 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
832 break
833 if isinstance(ie_result, list):
834 # Backwards compatibility: old IE result format
835 ie_result = {
836 '_type': 'compat_list',
837 'entries': ie_result,
838 }
839 self.add_default_extra_info(ie_result, ie, url)
840 if process:
841 return self.process_ie_result(ie_result, download, extra_info)
842 else:
843 return ie_result
844 except GeoRestrictedError as e:
845 msg = e.msg
846 if e.countries:
847 msg += '\nThis video is available in %s.' % ', '.join(
848 map(ISO3166Utils.short2full, e.countries))
849 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
850 self.report_error(msg)
851 break
852 except ExtractorError as e: # An error we somewhat expected
853 self.report_error(compat_str(e), e.format_traceback())
854 break
855 except MaxDownloadsReached:
856 raise
857 except Exception as e:
858 if self.params.get('ignoreerrors', False):
859 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
860 break
861 else:
862 raise
863 else:
864 self.report_error('no suitable InfoExtractor for URL %s' % url)
865
866 def add_default_extra_info(self, ie_result, ie, url):
867 self.add_extra_info(ie_result, {
868 'extractor': ie.IE_NAME,
869 'webpage_url': url,
870 'webpage_url_basename': url_basename(url),
871 'extractor_key': ie.ie_key(),
872 })
873
874 def process_ie_result(self, ie_result, download=True, extra_info={}):
875 """
876 Take the result of the ie(may be modified) and resolve all unresolved
877 references (URLs, playlist items).
878
879 It will also download the videos if 'download'.
880 Returns the resolved ie_result.
881 """
882 result_type = ie_result.get('_type', 'video')
883
884 if result_type in ('url', 'url_transparent'):
885 ie_result['url'] = sanitize_url(ie_result['url'])
886 extract_flat = self.params.get('extract_flat', False)
887 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
888 or extract_flat is True):
889 self.__forced_printings(
890 ie_result, self.prepare_filename(ie_result),
891 incomplete=True)
892 return ie_result
893
894 if result_type == 'video':
895 self.add_extra_info(ie_result, extra_info)
896 return self.process_video_result(ie_result, download=download)
897 elif result_type == 'url':
898 # We have to add extra_info to the results because it may be
899 # contained in a playlist
900 return self.extract_info(ie_result['url'],
901 download,
902 ie_key=ie_result.get('ie_key'),
903 extra_info=extra_info)
904 elif result_type == 'url_transparent':
905 # Use the information from the embedding page
906 info = self.extract_info(
907 ie_result['url'], ie_key=ie_result.get('ie_key'),
908 extra_info=extra_info, download=False, process=False)
909
910 # extract_info may return None when ignoreerrors is enabled and
911 # extraction failed with an error, don't crash and return early
912 # in this case
913 if not info:
914 return info
915
916 force_properties = dict(
917 (k, v) for k, v in ie_result.items() if v is not None)
918 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
919 if f in force_properties:
920 del force_properties[f]
921 new_result = info.copy()
922 new_result.update(force_properties)
923
924 # Extracted info may not be a video result (i.e.
925 # info.get('_type', 'video') != video) but rather an url or
926 # url_transparent. In such cases outer metadata (from ie_result)
927 # should be propagated to inner one (info). For this to happen
928 # _type of info should be overridden with url_transparent. This
929 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
930 if new_result.get('_type') == 'url':
931 new_result['_type'] = 'url_transparent'
932
933 return self.process_ie_result(
934 new_result, download=download, extra_info=extra_info)
935 elif result_type in ('playlist', 'multi_video'):
936 # We process each entry in the playlist
937 playlist = ie_result.get('title') or ie_result.get('id')
938 self.to_screen('[download] Downloading playlist: %s' % playlist)
939
940 playlist_results = []
941
942 playliststart = self.params.get('playliststart', 1) - 1
943 playlistend = self.params.get('playlistend')
944 # For backwards compatibility, interpret -1 as whole list
945 if playlistend == -1:
946 playlistend = None
947
948 playlistitems_str = self.params.get('playlist_items')
949 playlistitems = None
950 if playlistitems_str is not None:
951 def iter_playlistitems(format):
952 for string_segment in format.split(','):
953 if '-' in string_segment:
954 start, end = string_segment.split('-')
955 for item in range(int(start), int(end) + 1):
956 yield int(item)
957 else:
958 yield int(string_segment)
959 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
960
961 ie_entries = ie_result['entries']
962
963 def make_playlistitems_entries(list_ie_entries):
964 num_entries = len(list_ie_entries)
965 return [
966 list_ie_entries[i - 1] for i in playlistitems
967 if -num_entries <= i - 1 < num_entries]
968
969 def report_download(num_entries):
970 self.to_screen(
971 '[%s] playlist %s: Downloading %d videos' %
972 (ie_result['extractor'], playlist, num_entries))
973
974 if isinstance(ie_entries, list):
975 n_all_entries = len(ie_entries)
976 if playlistitems:
977 entries = make_playlistitems_entries(ie_entries)
978 else:
979 entries = ie_entries[playliststart:playlistend]
980 n_entries = len(entries)
981 self.to_screen(
982 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
983 (ie_result['extractor'], playlist, n_all_entries, n_entries))
984 elif isinstance(ie_entries, PagedList):
985 if playlistitems:
986 entries = []
987 for item in playlistitems:
988 entries.extend(ie_entries.getslice(
989 item - 1, item
990 ))
991 else:
992 entries = ie_entries.getslice(
993 playliststart, playlistend)
994 n_entries = len(entries)
995 report_download(n_entries)
996 else: # iterable
997 if playlistitems:
998 entries = make_playlistitems_entries(list(itertools.islice(
999 ie_entries, 0, max(playlistitems))))
1000 else:
1001 entries = list(itertools.islice(
1002 ie_entries, playliststart, playlistend))
1003 n_entries = len(entries)
1004 report_download(n_entries)
1005
1006 if self.params.get('playlistreverse', False):
1007 entries = entries[::-1]
1008
1009 if self.params.get('playlistrandom', False):
1010 random.shuffle(entries)
1011
1012 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1013
1014 for i, entry in enumerate(entries, 1):
1015 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1016 # This __x_forwarded_for_ip thing is a bit ugly but requires
1017 # minimal changes
1018 if x_forwarded_for:
1019 entry['__x_forwarded_for_ip'] = x_forwarded_for
1020 extra = {
1021 'n_entries': n_entries,
1022 'playlist': playlist,
1023 'playlist_id': ie_result.get('id'),
1024 'playlist_title': ie_result.get('title'),
1025 'playlist_uploader': ie_result.get('uploader'),
1026 'playlist_uploader_id': ie_result.get('uploader_id'),
1027 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1028 'extractor': ie_result['extractor'],
1029 'webpage_url': ie_result['webpage_url'],
1030 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1031 'extractor_key': ie_result['extractor_key'],
1032 }
1033
1034 reason = self._match_entry(entry, incomplete=True)
1035 if reason is not None:
1036 self.to_screen('[download] ' + reason)
1037 continue
1038
1039 entry_result = self.process_ie_result(entry,
1040 download=download,
1041 extra_info=extra)
1042 playlist_results.append(entry_result)
1043 ie_result['entries'] = playlist_results
1044 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1045 return ie_result
1046 elif result_type == 'compat_list':
1047 self.report_warning(
1048 'Extractor %s returned a compat_list result. '
1049 'It needs to be updated.' % ie_result.get('extractor'))
1050
1051 def _fixup(r):
1052 self.add_extra_info(
1053 r,
1054 {
1055 'extractor': ie_result['extractor'],
1056 'webpage_url': ie_result['webpage_url'],
1057 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1058 'extractor_key': ie_result['extractor_key'],
1059 }
1060 )
1061 return r
1062 ie_result['entries'] = [
1063 self.process_ie_result(_fixup(r), download, extra_info)
1064 for r in ie_result['entries']
1065 ]
1066 return ie_result
1067 else:
1068 raise Exception('Invalid result type: %s' % result_type)
1069
1070 def _build_format_filter(self, filter_spec):
1071 " Returns a function to filter the formats according to the filter_spec "
1072
1073 OPERATORS = {
1074 '<': operator.lt,
1075 '<=': operator.le,
1076 '>': operator.gt,
1077 '>=': operator.ge,
1078 '=': operator.eq,
1079 '!=': operator.ne,
1080 }
1081 operator_rex = re.compile(r'''(?x)\s*
1082 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1083 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1084 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1085 $
1086 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1087 m = operator_rex.search(filter_spec)
1088 if m:
1089 try:
1090 comparison_value = int(m.group('value'))
1091 except ValueError:
1092 comparison_value = parse_filesize(m.group('value'))
1093 if comparison_value is None:
1094 comparison_value = parse_filesize(m.group('value') + 'B')
1095 if comparison_value is None:
1096 raise ValueError(
1097 'Invalid value %r in format specification %r' % (
1098 m.group('value'), filter_spec))
1099 op = OPERATORS[m.group('op')]
1100
1101 if not m:
1102 STR_OPERATORS = {
1103 '=': operator.eq,
1104 '^=': lambda attr, value: attr.startswith(value),
1105 '$=': lambda attr, value: attr.endswith(value),
1106 '*=': lambda attr, value: value in attr,
1107 }
1108 str_operator_rex = re.compile(r'''(?x)
1109 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1110 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1111 \s*(?P<value>[a-zA-Z0-9._-]+)
1112 \s*$
1113 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1114 m = str_operator_rex.search(filter_spec)
1115 if m:
1116 comparison_value = m.group('value')
1117 str_op = STR_OPERATORS[m.group('op')]
1118 if m.group('negation'):
1119 op = lambda attr, value: not str_op(attr, value)
1120 else:
1121 op = str_op
1122
1123 if not m:
1124 raise ValueError('Invalid filter specification %r' % filter_spec)
1125
1126 def _filter(f):
1127 actual_value = f.get(m.group('key'))
1128 if actual_value is None:
1129 return m.group('none_inclusive')
1130 return op(actual_value, comparison_value)
1131 return _filter
1132
1133 def _default_format_spec(self, info_dict, download=True):
1134
1135 def can_merge():
1136 merger = FFmpegMergerPP(self)
1137 return merger.available and merger.can_merge()
1138
1139 def prefer_best():
1140 if self.params.get('simulate', False):
1141 return False
1142 if not download:
1143 return False
1144 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1145 return True
1146 if info_dict.get('is_live'):
1147 return True
1148 if not can_merge():
1149 return True
1150 return False
1151
1152 req_format_list = ['bestvideo+bestaudio', 'best']
1153 if prefer_best():
1154 req_format_list.reverse()
1155 return '/'.join(req_format_list)
1156
1157 def build_format_selector(self, format_spec):
1158 def syntax_error(note, start):
1159 message = (
1160 'Invalid format specification: '
1161 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1162 return SyntaxError(message)
1163
1164 PICKFIRST = 'PICKFIRST'
1165 MERGE = 'MERGE'
1166 SINGLE = 'SINGLE'
1167 GROUP = 'GROUP'
1168 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1169
1170 def _parse_filter(tokens):
1171 filter_parts = []
1172 for type, string, start, _, _ in tokens:
1173 if type == tokenize.OP and string == ']':
1174 return ''.join(filter_parts)
1175 else:
1176 filter_parts.append(string)
1177
1178 def _remove_unused_ops(tokens):
1179 # Remove operators that we don't use and join them with the surrounding strings
1180 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1181 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1182 last_string, last_start, last_end, last_line = None, None, None, None
1183 for type, string, start, end, line in tokens:
1184 if type == tokenize.OP and string == '[':
1185 if last_string:
1186 yield tokenize.NAME, last_string, last_start, last_end, last_line
1187 last_string = None
1188 yield type, string, start, end, line
1189 # everything inside brackets will be handled by _parse_filter
1190 for type, string, start, end, line in tokens:
1191 yield type, string, start, end, line
1192 if type == tokenize.OP and string == ']':
1193 break
1194 elif type == tokenize.OP and string in ALLOWED_OPS:
1195 if last_string:
1196 yield tokenize.NAME, last_string, last_start, last_end, last_line
1197 last_string = None
1198 yield type, string, start, end, line
1199 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1200 if not last_string:
1201 last_string = string
1202 last_start = start
1203 last_end = end
1204 else:
1205 last_string += string
1206 if last_string:
1207 yield tokenize.NAME, last_string, last_start, last_end, last_line
1208
1209 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1210 selectors = []
1211 current_selector = None
1212 for type, string, start, _, _ in tokens:
1213 # ENCODING is only defined in python 3.x
1214 if type == getattr(tokenize, 'ENCODING', None):
1215 continue
1216 elif type in [tokenize.NAME, tokenize.NUMBER]:
1217 current_selector = FormatSelector(SINGLE, string, [])
1218 elif type == tokenize.OP:
1219 if string == ')':
1220 if not inside_group:
1221 # ')' will be handled by the parentheses group
1222 tokens.restore_last_token()
1223 break
1224 elif inside_merge and string in ['/', ',']:
1225 tokens.restore_last_token()
1226 break
1227 elif inside_choice and string == ',':
1228 tokens.restore_last_token()
1229 break
1230 elif string == ',':
1231 if not current_selector:
1232 raise syntax_error('"," must follow a format selector', start)
1233 selectors.append(current_selector)
1234 current_selector = None
1235 elif string == '/':
1236 if not current_selector:
1237 raise syntax_error('"/" must follow a format selector', start)
1238 first_choice = current_selector
1239 second_choice = _parse_format_selection(tokens, inside_choice=True)
1240 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1241 elif string == '[':
1242 if not current_selector:
1243 current_selector = FormatSelector(SINGLE, 'best', [])
1244 format_filter = _parse_filter(tokens)
1245 current_selector.filters.append(format_filter)
1246 elif string == '(':
1247 if current_selector:
1248 raise syntax_error('Unexpected "("', start)
1249 group = _parse_format_selection(tokens, inside_group=True)
1250 current_selector = FormatSelector(GROUP, group, [])
1251 elif string == '+':
1252 if not current_selector:
1253 raise syntax_error('Unexpected "+"', start)
1254 selector_1 = current_selector
1255 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1256 if not selector_2:
1257 raise syntax_error('Expected a selector', start)
1258 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1259 else:
1260 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1261 elif type == tokenize.ENDMARKER:
1262 break
1263 if current_selector:
1264 selectors.append(current_selector)
1265 return selectors
1266
1267 def _build_selector_function(selector):
1268 if isinstance(selector, list):
1269 fs = [_build_selector_function(s) for s in selector]
1270
1271 def selector_function(ctx):
1272 for f in fs:
1273 for format in f(ctx):
1274 yield format
1275 return selector_function
1276 elif selector.type == GROUP:
1277 selector_function = _build_selector_function(selector.selector)
1278 elif selector.type == PICKFIRST:
1279 fs = [_build_selector_function(s) for s in selector.selector]
1280
1281 def selector_function(ctx):
1282 for f in fs:
1283 picked_formats = list(f(ctx))
1284 if picked_formats:
1285 return picked_formats
1286 return []
1287 elif selector.type == SINGLE:
1288 format_spec = selector.selector
1289
1290 def selector_function(ctx):
1291 formats = list(ctx['formats'])
1292 if not formats:
1293 return
1294 if format_spec == 'all':
1295 for f in formats:
1296 yield f
1297 elif format_spec in ['best', 'worst', None]:
1298 format_idx = 0 if format_spec == 'worst' else -1
1299 audiovideo_formats = [
1300 f for f in formats
1301 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1302 if audiovideo_formats:
1303 yield audiovideo_formats[format_idx]
1304 # for extractors with incomplete formats (audio only (soundcloud)
1305 # or video only (imgur)) we will fallback to best/worst
1306 # {video,audio}-only format
1307 elif ctx['incomplete_formats']:
1308 yield formats[format_idx]
1309 elif format_spec == 'bestaudio':
1310 audio_formats = [
1311 f for f in formats
1312 if f.get('vcodec') == 'none']
1313 if audio_formats:
1314 yield audio_formats[-1]
1315 elif format_spec == 'worstaudio':
1316 audio_formats = [
1317 f for f in formats
1318 if f.get('vcodec') == 'none']
1319 if audio_formats:
1320 yield audio_formats[0]
1321 elif format_spec == 'bestvideo':
1322 video_formats = [
1323 f for f in formats
1324 if f.get('acodec') == 'none']
1325 if video_formats:
1326 yield video_formats[-1]
1327 elif format_spec == 'worstvideo':
1328 video_formats = [
1329 f for f in formats
1330 if f.get('acodec') == 'none']
1331 if video_formats:
1332 yield video_formats[0]
1333 else:
1334 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1335 if format_spec in extensions:
1336 filter_f = lambda f: f['ext'] == format_spec
1337 else:
1338 filter_f = lambda f: f['format_id'] == format_spec
1339 matches = list(filter(filter_f, formats))
1340 if matches:
1341 yield matches[-1]
1342 elif selector.type == MERGE:
1343 def _merge(formats_pair):
1344 format_1, format_2 = formats_pair
1345
1346 formats_info = []
1347 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1348 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1349
1350 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1351 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1352
1353 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1354 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1355
1356 output_ext = self.params.get('merge_output_format')
1357 if not output_ext:
1358 if the_only_video:
1359 output_ext = the_only_video['ext']
1360 elif the_only_audio and not video_fmts:
1361 output_ext = the_only_audio['ext']
1362 else:
1363 output_ext = 'mkv'
1364
1365 new_dict = {
1366 'requested_formats': formats_info,
1367 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1368 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1369 'ext': output_ext,
1370 }
1371
1372 if the_only_video:
1373 new_dict.update({
1374 'width': the_only_video.get('width'),
1375 'height': the_only_video.get('height'),
1376 'resolution': the_only_video.get('resolution'),
1377 'fps': the_only_video.get('fps'),
1378 'vcodec': the_only_video.get('vcodec'),
1379 'vbr': the_only_video.get('vbr'),
1380 'stretched_ratio': the_only_video.get('stretched_ratio'),
1381 })
1382
1383 if the_only_audio:
1384 new_dict.update({
1385 'acodec': the_only_audio.get('acodec'),
1386 'abr': the_only_audio.get('abr'),
1387 })
1388
1389 return new_dict
1390
1391 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1392
1393 def selector_function(ctx):
1394 for pair in itertools.product(
1395 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1396 yield _merge(pair)
1397
1398 filters = [self._build_format_filter(f) for f in selector.filters]
1399
1400 def final_selector(ctx):
1401 ctx_copy = copy.deepcopy(ctx)
1402 for _filter in filters:
1403 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1404 return selector_function(ctx_copy)
1405 return final_selector
1406
1407 stream = io.BytesIO(format_spec.encode('utf-8'))
1408 try:
1409 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1410 except tokenize.TokenError:
1411 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1412
1413 class TokenIterator(object):
1414 def __init__(self, tokens):
1415 self.tokens = tokens
1416 self.counter = 0
1417
1418 def __iter__(self):
1419 return self
1420
1421 def __next__(self):
1422 if self.counter >= len(self.tokens):
1423 raise StopIteration()
1424 value = self.tokens[self.counter]
1425 self.counter += 1
1426 return value
1427
1428 next = __next__
1429
1430 def restore_last_token(self):
1431 self.counter -= 1
1432
1433 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1434 return _build_selector_function(parsed_selector)
1435
1436 def _calc_headers(self, info_dict):
1437 res = std_headers.copy()
1438
1439 add_headers = info_dict.get('http_headers')
1440 if add_headers:
1441 res.update(add_headers)
1442
1443 cookies = self._calc_cookies(info_dict)
1444 if cookies:
1445 res['Cookie'] = cookies
1446
1447 if 'X-Forwarded-For' not in res:
1448 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1449 if x_forwarded_for_ip:
1450 res['X-Forwarded-For'] = x_forwarded_for_ip
1451
1452 return res
1453
1454 def _calc_cookies(self, info_dict):
1455 pr = sanitized_Request(info_dict['url'])
1456 self.cookiejar.add_cookie_header(pr)
1457 return pr.get_header('Cookie')
1458
1459 def process_video_result(self, info_dict, download=True):
1460 assert info_dict.get('_type', 'video') == 'video'
1461
1462 if 'id' not in info_dict:
1463 raise ExtractorError('Missing "id" field in extractor result')
1464 if 'title' not in info_dict:
1465 raise ExtractorError('Missing "title" field in extractor result')
1466
1467 def report_force_conversion(field, field_not, conversion):
1468 self.report_warning(
1469 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1470 % (field, field_not, conversion))
1471
1472 def sanitize_string_field(info, string_field):
1473 field = info.get(string_field)
1474 if field is None or isinstance(field, compat_str):
1475 return
1476 report_force_conversion(string_field, 'a string', 'string')
1477 info[string_field] = compat_str(field)
1478
1479 def sanitize_numeric_fields(info):
1480 for numeric_field in self._NUMERIC_FIELDS:
1481 field = info.get(numeric_field)
1482 if field is None or isinstance(field, compat_numeric_types):
1483 continue
1484 report_force_conversion(numeric_field, 'numeric', 'int')
1485 info[numeric_field] = int_or_none(field)
1486
1487 sanitize_string_field(info_dict, 'id')
1488 sanitize_numeric_fields(info_dict)
1489
1490 if 'playlist' not in info_dict:
1491 # It isn't part of a playlist
1492 info_dict['playlist'] = None
1493 info_dict['playlist_index'] = None
1494
1495 thumbnails = info_dict.get('thumbnails')
1496 if thumbnails is None:
1497 thumbnail = info_dict.get('thumbnail')
1498 if thumbnail:
1499 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1500 if thumbnails:
1501 thumbnails.sort(key=lambda t: (
1502 t.get('preference') if t.get('preference') is not None else -1,
1503 t.get('width') if t.get('width') is not None else -1,
1504 t.get('height') if t.get('height') is not None else -1,
1505 t.get('id') if t.get('id') is not None else '', t.get('url')))
1506 for i, t in enumerate(thumbnails):
1507 t['url'] = sanitize_url(t['url'])
1508 if t.get('width') and t.get('height'):
1509 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1510 if t.get('id') is None:
1511 t['id'] = '%d' % i
1512
1513 if self.params.get('list_thumbnails'):
1514 self.list_thumbnails(info_dict)
1515 return
1516
1517 thumbnail = info_dict.get('thumbnail')
1518 if thumbnail:
1519 info_dict['thumbnail'] = sanitize_url(thumbnail)
1520 elif thumbnails:
1521 info_dict['thumbnail'] = thumbnails[-1]['url']
1522
1523 if 'display_id' not in info_dict and 'id' in info_dict:
1524 info_dict['display_id'] = info_dict['id']
1525
1526 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1527 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1528 # see http://bugs.python.org/issue1646728)
1529 try:
1530 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1531 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1532 except (ValueError, OverflowError, OSError):
1533 pass
1534
1535 # Auto generate title fields corresponding to the *_number fields when missing
1536 # in order to always have clean titles. This is very common for TV series.
1537 for field in ('chapter', 'season', 'episode'):
1538 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1539 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1540
1541 for cc_kind in ('subtitles', 'automatic_captions'):
1542 cc = info_dict.get(cc_kind)
1543 if cc:
1544 for _, subtitle in cc.items():
1545 for subtitle_format in subtitle:
1546 if subtitle_format.get('url'):
1547 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1548 if subtitle_format.get('ext') is None:
1549 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1550
1551 automatic_captions = info_dict.get('automatic_captions')
1552 subtitles = info_dict.get('subtitles')
1553
1554 if self.params.get('listsubtitles', False):
1555 if 'automatic_captions' in info_dict:
1556 self.list_subtitles(
1557 info_dict['id'], automatic_captions, 'automatic captions')
1558 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1559 return
1560
1561 info_dict['requested_subtitles'] = self.process_subtitles(
1562 info_dict['id'], subtitles, automatic_captions)
1563
1564 # We now pick which formats have to be downloaded
1565 if info_dict.get('formats') is None:
1566 # There's only one format available
1567 formats = [info_dict]
1568 else:
1569 formats = info_dict['formats']
1570
1571 if not formats:
1572 raise ExtractorError('No video formats found!')
1573
1574 def is_wellformed(f):
1575 url = f.get('url')
1576 if not url:
1577 self.report_warning(
1578 '"url" field is missing or empty - skipping format, '
1579 'there is an error in extractor')
1580 return False
1581 if isinstance(url, bytes):
1582 sanitize_string_field(f, 'url')
1583 return True
1584
1585 # Filter out malformed formats for better extraction robustness
1586 formats = list(filter(is_wellformed, formats))
1587
1588 formats_dict = {}
1589
1590 # We check that all the formats have the format and format_id fields
1591 for i, format in enumerate(formats):
1592 sanitize_string_field(format, 'format_id')
1593 sanitize_numeric_fields(format)
1594 format['url'] = sanitize_url(format['url'])
1595 if not format.get('format_id'):
1596 format['format_id'] = compat_str(i)
1597 else:
1598 # Sanitize format_id from characters used in format selector expression
1599 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1600 format_id = format['format_id']
1601 if format_id not in formats_dict:
1602 formats_dict[format_id] = []
1603 formats_dict[format_id].append(format)
1604
1605 # Make sure all formats have unique format_id
1606 for format_id, ambiguous_formats in formats_dict.items():
1607 if len(ambiguous_formats) > 1:
1608 for i, format in enumerate(ambiguous_formats):
1609 format['format_id'] = '%s-%d' % (format_id, i)
1610
1611 for i, format in enumerate(formats):
1612 if format.get('format') is None:
1613 format['format'] = '{id} - {res}{note}'.format(
1614 id=format['format_id'],
1615 res=self.format_resolution(format),
1616 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1617 )
1618 # Automatically determine file extension if missing
1619 if format.get('ext') is None:
1620 format['ext'] = determine_ext(format['url']).lower()
1621 # Automatically determine protocol if missing (useful for format
1622 # selection purposes)
1623 if format.get('protocol') is None:
1624 format['protocol'] = determine_protocol(format)
1625 # Add HTTP headers, so that external programs can use them from the
1626 # json output
1627 full_format_info = info_dict.copy()
1628 full_format_info.update(format)
1629 format['http_headers'] = self._calc_headers(full_format_info)
1630 # Remove private housekeeping stuff
1631 if '__x_forwarded_for_ip' in info_dict:
1632 del info_dict['__x_forwarded_for_ip']
1633
1634 # TODO Central sorting goes here
1635
1636 if formats[0] is not info_dict:
1637 # only set the 'formats' fields if the original info_dict list them
1638 # otherwise we end up with a circular reference, the first (and unique)
1639 # element in the 'formats' field in info_dict is info_dict itself,
1640 # which can't be exported to json
1641 info_dict['formats'] = formats
1642 if self.params.get('listformats'):
1643 self.list_formats(info_dict)
1644 return
1645
1646 req_format = self.params.get('format')
1647 if req_format is None:
1648 req_format = self._default_format_spec(info_dict, download=download)
1649 if self.params.get('verbose'):
1650 self.to_stdout('[debug] Default format spec: %s' % req_format)
1651
1652 format_selector = self.build_format_selector(req_format)
1653
1654 # While in format selection we may need to have an access to the original
1655 # format set in order to calculate some metrics or do some processing.
1656 # For now we need to be able to guess whether original formats provided
1657 # by extractor are incomplete or not (i.e. whether extractor provides only
1658 # video-only or audio-only formats) for proper formats selection for
1659 # extractors with such incomplete formats (see
1660 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1661 # Since formats may be filtered during format selection and may not match
1662 # the original formats the results may be incorrect. Thus original formats
1663 # or pre-calculated metrics should be passed to format selection routines
1664 # as well.
1665 # We will pass a context object containing all necessary additional data
1666 # instead of just formats.
1667 # This fixes incorrect format selection issue (see
1668 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1669 incomplete_formats = (
1670 # All formats are video-only or
1671 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1672 # all formats are audio-only
1673 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1674
1675 ctx = {
1676 'formats': formats,
1677 'incomplete_formats': incomplete_formats,
1678 }
1679
1680 formats_to_download = list(format_selector(ctx))
1681 if not formats_to_download:
1682 raise ExtractorError('requested format not available',
1683 expected=True)
1684
1685 if download:
1686 if len(formats_to_download) > 1:
1687 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1688 for format in formats_to_download:
1689 new_info = dict(info_dict)
1690 new_info.update(format)
1691 self.process_info(new_info)
1692 # We update the info dict with the best quality format (backwards compatibility)
1693 info_dict.update(formats_to_download[-1])
1694 return info_dict
1695
1696 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1697 """Select the requested subtitles and their format"""
1698 available_subs = {}
1699 if normal_subtitles and self.params.get('writesubtitles'):
1700 available_subs.update(normal_subtitles)
1701 if automatic_captions and self.params.get('writeautomaticsub'):
1702 for lang, cap_info in automatic_captions.items():
1703 if lang not in available_subs:
1704 available_subs[lang] = cap_info
1705
1706 if (not self.params.get('writesubtitles') and not
1707 self.params.get('writeautomaticsub') or not
1708 available_subs):
1709 return None
1710
1711 if self.params.get('allsubtitles', False):
1712 requested_langs = available_subs.keys()
1713 else:
1714 if self.params.get('subtitleslangs', False):
1715 requested_langs = self.params.get('subtitleslangs')
1716 elif 'en' in available_subs:
1717 requested_langs = ['en']
1718 else:
1719 requested_langs = [list(available_subs.keys())[0]]
1720
1721 formats_query = self.params.get('subtitlesformat', 'best')
1722 formats_preference = formats_query.split('/') if formats_query else []
1723 subs = {}
1724 for lang in requested_langs:
1725 formats = available_subs.get(lang)
1726 if formats is None:
1727 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1728 continue
1729 for ext in formats_preference:
1730 if ext == 'best':
1731 f = formats[-1]
1732 break
1733 matches = list(filter(lambda f: f['ext'] == ext, formats))
1734 if matches:
1735 f = matches[-1]
1736 break
1737 else:
1738 f = formats[-1]
1739 self.report_warning(
1740 'No subtitle format found matching "%s" for language %s, '
1741 'using %s' % (formats_query, lang, f['ext']))
1742 subs[lang] = f
1743 return subs
1744
1745 def __forced_printings(self, info_dict, filename, incomplete):
1746 def print_mandatory(field):
1747 if (self.params.get('force%s' % field, False)
1748 and (not incomplete or info_dict.get(field) is not None)):
1749 self.to_stdout(info_dict[field])
1750
1751 def print_optional(field):
1752 if (self.params.get('force%s' % field, False)
1753 and info_dict.get(field) is not None):
1754 self.to_stdout(info_dict[field])
1755
1756 print_mandatory('title')
1757 print_mandatory('id')
1758 if self.params.get('forceurl', False) and not incomplete:
1759 if info_dict.get('requested_formats') is not None:
1760 for f in info_dict['requested_formats']:
1761 self.to_stdout(f['url'] + f.get('play_path', ''))
1762 else:
1763 # For RTMP URLs, also include the playpath
1764 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1765 print_optional('thumbnail')
1766 print_optional('description')
1767 if self.params.get('forcefilename', False) and filename is not None:
1768 self.to_stdout(filename)
1769 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1770 self.to_stdout(formatSeconds(info_dict['duration']))
1771 print_mandatory('format')
1772 if self.params.get('forcejson', False):
1773 self.to_stdout(json.dumps(info_dict))
1774
1775 def process_info(self, info_dict):
1776 """Process a single resolved IE result."""
1777
1778 assert info_dict.get('_type', 'video') == 'video'
1779
1780 max_downloads = self.params.get('max_downloads')
1781 if max_downloads is not None:
1782 if self._num_downloads >= int(max_downloads):
1783 raise MaxDownloadsReached()
1784
1785 # TODO: backward compatibility, to be removed
1786 info_dict['fulltitle'] = info_dict['title']
1787
1788 if 'format' not in info_dict:
1789 info_dict['format'] = info_dict['ext']
1790
1791 reason = self._match_entry(info_dict, incomplete=False)
1792 if reason is not None:
1793 self.to_screen('[download] ' + reason)
1794 return
1795
1796 self._num_downloads += 1
1797
1798 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1799
1800 # Forced printings
1801 self.__forced_printings(info_dict, filename, incomplete=False)
1802
1803 # Do nothing else if in simulate mode
1804 if self.params.get('simulate', False):
1805 return
1806
1807 if filename is None:
1808 return
1809
1810 def ensure_dir_exists(path):
1811 try:
1812 dn = os.path.dirname(path)
1813 if dn and not os.path.exists(dn):
1814 os.makedirs(dn)
1815 return True
1816 except (OSError, IOError) as err:
1817 self.report_error('unable to create directory ' + error_to_compat_str(err))
1818 return False
1819
1820 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1821 return
1822
1823 if self.params.get('writedescription', False):
1824 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1825 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1826 self.to_screen('[info] Video description is already present')
1827 elif info_dict.get('description') is None:
1828 self.report_warning('There\'s no description to write.')
1829 else:
1830 try:
1831 self.to_screen('[info] Writing video description to: ' + descfn)
1832 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1833 descfile.write(info_dict['description'])
1834 except (OSError, IOError):
1835 self.report_error('Cannot write description file ' + descfn)
1836 return
1837
1838 if self.params.get('writeannotations', False):
1839 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1840 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1841 self.to_screen('[info] Video annotations are already present')
1842 elif not info_dict.get('annotations'):
1843 self.report_warning('There are no annotations to write.')
1844 else:
1845 try:
1846 self.to_screen('[info] Writing video annotations to: ' + annofn)
1847 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1848 annofile.write(info_dict['annotations'])
1849 except (KeyError, TypeError):
1850 self.report_warning('There are no annotations to write.')
1851 except (OSError, IOError):
1852 self.report_error('Cannot write annotations file: ' + annofn)
1853 return
1854
1855 def dl(name, info):
1856 fd = get_suitable_downloader(info, self.params)(self, self.params)
1857 for ph in self._progress_hooks:
1858 fd.add_progress_hook(ph)
1859 if self.params.get('verbose'):
1860 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1861 return fd.download(name, info)
1862
1863 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1864 self.params.get('writeautomaticsub')])
1865
1866 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1867 # subtitles download errors are already managed as troubles in relevant IE
1868 # that way it will silently go on when used with unsupporting IE
1869 subtitles = info_dict['requested_subtitles']
1870 ie = self.get_info_extractor(info_dict['extractor_key'])
1871 for sub_lang, sub_info in subtitles.items():
1872 sub_format = sub_info['ext']
1873 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1874 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1875 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1876 else:
1877 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1878 if sub_info.get('data') is not None:
1879 try:
1880 # Use newline='' to prevent conversion of newline characters
1881 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1882 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1883 subfile.write(sub_info['data'])
1884 except (OSError, IOError):
1885 self.report_error('Cannot write subtitles file ' + sub_filename)
1886 return
1887 else:
1888 try:
1889 if self.params.get('sleep_interval_subtitles', False):
1890 dl(sub_filename, sub_info)
1891 else:
1892 sub_data = ie._request_webpage(
1893 sub_info['url'], info_dict['id'], note=False).read()
1894 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1895 subfile.write(sub_data)
1896 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1897 self.report_warning('Unable to download subtitle for "%s": %s' %
1898 (sub_lang, error_to_compat_str(err)))
1899 continue
1900
1901 if self.params.get('skip_download', False):
1902 if self.params.get('convertsubtitles', False):
1903 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1904 filename_real_ext = os.path.splitext(filename)[1][1:]
1905 filename_wo_ext = (
1906 os.path.splitext(filename)[0]
1907 if filename_real_ext == info_dict['ext']
1908 else filename)
1909 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1910 if subconv.available:
1911 info_dict.setdefault('__postprocessors', [])
1912 # info_dict['__postprocessors'].append(subconv)
1913 if os.path.exists(encodeFilename(afilename)):
1914 self.to_screen(
1915 '[download] %s has already been downloaded and '
1916 'converted' % afilename)
1917 else:
1918 try:
1919 self.post_process(filename, info_dict)
1920 except (PostProcessingError) as err:
1921 self.report_error('postprocessing: %s' % str(err))
1922 return
1923
1924 if self.params.get('writeinfojson', False):
1925 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1926 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1927 self.to_screen('[info] Video description metadata is already present')
1928 else:
1929 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1930 try:
1931 write_json_file(self.filter_requested_info(info_dict), infofn)
1932 except (OSError, IOError):
1933 self.report_error('Cannot write metadata to JSON file ' + infofn)
1934 return
1935
1936 self._write_thumbnails(info_dict, filename)
1937
1938 if not self.params.get('skip_download', False):
1939 try:
1940 if info_dict.get('requested_formats') is not None:
1941 downloaded = []
1942 success = True
1943 merger = FFmpegMergerPP(self)
1944 if not merger.available:
1945 postprocessors = []
1946 self.report_warning('You have requested multiple '
1947 'formats but ffmpeg or avconv are not installed.'
1948 ' The formats won\'t be merged.')
1949 else:
1950 postprocessors = [merger]
1951
1952 def compatible_formats(formats):
1953 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1954 video_formats = [format for format in formats if format.get('vcodec') != 'none']
1955 audio_formats = [format for format in formats if format.get('acodec') != 'none']
1956 if len(video_formats) > 2 or len(audio_formats) > 2:
1957 return False
1958
1959 # Check extension
1960 exts = set(format.get('ext') for format in formats)
1961 COMPATIBLE_EXTS = (
1962 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1963 set(('webm',)),
1964 )
1965 for ext_sets in COMPATIBLE_EXTS:
1966 if ext_sets.issuperset(exts):
1967 return True
1968 # TODO: Check acodec/vcodec
1969 return False
1970
1971 filename_real_ext = os.path.splitext(filename)[1][1:]
1972 filename_wo_ext = (
1973 os.path.splitext(filename)[0]
1974 if filename_real_ext == info_dict['ext']
1975 else filename)
1976 requested_formats = info_dict['requested_formats']
1977 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1978 info_dict['ext'] = 'mkv'
1979 self.report_warning(
1980 'Requested formats are incompatible for merge and will be merged into mkv.')
1981 # Ensure filename always has a correct extension for successful merge
1982 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1983 if os.path.exists(encodeFilename(filename)):
1984 self.to_screen(
1985 '[download] %s has already been downloaded and '
1986 'merged' % filename)
1987 else:
1988 for f in requested_formats:
1989 new_info = dict(info_dict)
1990 new_info.update(f)
1991 fname = prepend_extension(
1992 self.prepare_filename(new_info),
1993 'f%s' % f['format_id'], new_info['ext'])
1994 if not ensure_dir_exists(fname):
1995 return
1996 downloaded.append(fname)
1997 partial_success = dl(fname, new_info)
1998 success = success and partial_success
1999 info_dict['__postprocessors'] = postprocessors
2000 info_dict['__files_to_merge'] = downloaded
2001 else:
2002 # Just a single file
2003 success = dl(filename, info_dict)
2004 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2005 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2006 return
2007 except (OSError, IOError) as err:
2008 raise UnavailableVideoError(err)
2009 except (ContentTooShortError, ) as err:
2010 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2011 return
2012
2013 if success and filename != '-':
2014 # Fixup content
2015 fixup_policy = self.params.get('fixup')
2016 if fixup_policy is None:
2017 fixup_policy = 'detect_or_warn'
2018
2019 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2020
2021 stretched_ratio = info_dict.get('stretched_ratio')
2022 if stretched_ratio is not None and stretched_ratio != 1:
2023 if fixup_policy == 'warn':
2024 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2025 info_dict['id'], stretched_ratio))
2026 elif fixup_policy == 'detect_or_warn':
2027 stretched_pp = FFmpegFixupStretchedPP(self)
2028 if stretched_pp.available:
2029 info_dict.setdefault('__postprocessors', [])
2030 info_dict['__postprocessors'].append(stretched_pp)
2031 else:
2032 self.report_warning(
2033 '%s: Non-uniform pixel ratio (%s). %s'
2034 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2035 else:
2036 assert fixup_policy in ('ignore', 'never')
2037
2038 if (info_dict.get('requested_formats') is None
2039 and info_dict.get('container') == 'm4a_dash'):
2040 if fixup_policy == 'warn':
2041 self.report_warning(
2042 '%s: writing DASH m4a. '
2043 'Only some players support this container.'
2044 % info_dict['id'])
2045 elif fixup_policy == 'detect_or_warn':
2046 fixup_pp = FFmpegFixupM4aPP(self)
2047 if fixup_pp.available:
2048 info_dict.setdefault('__postprocessors', [])
2049 info_dict['__postprocessors'].append(fixup_pp)
2050 else:
2051 self.report_warning(
2052 '%s: writing DASH m4a. '
2053 'Only some players support this container. %s'
2054 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2055 else:
2056 assert fixup_policy in ('ignore', 'never')
2057
2058 if (info_dict.get('protocol') == 'm3u8_native'
2059 or info_dict.get('protocol') == 'm3u8'
2060 and self.params.get('hls_prefer_native')):
2061 if fixup_policy == 'warn':
2062 self.report_warning('%s: malformed AAC bitstream detected.' % (
2063 info_dict['id']))
2064 elif fixup_policy == 'detect_or_warn':
2065 fixup_pp = FFmpegFixupM3u8PP(self)
2066 if fixup_pp.available:
2067 info_dict.setdefault('__postprocessors', [])
2068 info_dict['__postprocessors'].append(fixup_pp)
2069 else:
2070 self.report_warning(
2071 '%s: malformed AAC bitstream detected. %s'
2072 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2073 else:
2074 assert fixup_policy in ('ignore', 'never')
2075
2076 try:
2077 self.post_process(filename, info_dict)
2078 except (PostProcessingError) as err:
2079 self.report_error('postprocessing: %s' % str(err))
2080 return
2081 self.record_download_archive(info_dict)
2082
2083 def download(self, url_list):
2084 """Download a given list of URLs."""
2085 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2086 if (len(url_list) > 1
2087 and outtmpl != '-'
2088 and '%' not in outtmpl
2089 and self.params.get('max_downloads') != 1):
2090 raise SameFileError(outtmpl)
2091
2092 for url in url_list:
2093 try:
2094 # It also downloads the videos
2095 res = self.extract_info(
2096 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2097 except UnavailableVideoError:
2098 self.report_error('unable to download video')
2099 except MaxDownloadsReached:
2100 self.to_screen('[info] Maximum number of downloaded files reached.')
2101 raise
2102 else:
2103 if self.params.get('dump_single_json', False):
2104 self.to_stdout(json.dumps(res))
2105
2106 return self._download_retcode
2107
2108 def download_with_info_file(self, info_filename):
2109 with contextlib.closing(fileinput.FileInput(
2110 [info_filename], mode='r',
2111 openhook=fileinput.hook_encoded('utf-8'))) as f:
2112 # FileInput doesn't have a read method, we can't call json.load
2113 info = self.filter_requested_info(json.loads('\n'.join(f)))
2114 try:
2115 self.process_ie_result(info, download=True)
2116 except DownloadError:
2117 webpage_url = info.get('webpage_url')
2118 if webpage_url is not None:
2119 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2120 return self.download([webpage_url])
2121 else:
2122 raise
2123 return self._download_retcode
2124
2125 @staticmethod
2126 def filter_requested_info(info_dict):
2127 return dict(
2128 (k, v) for k, v in info_dict.items()
2129 if k not in ['requested_formats', 'requested_subtitles'])
2130
2131 def post_process(self, filename, ie_info):
2132 """Run all the postprocessors on the given file."""
2133 info = dict(ie_info)
2134 info['filepath'] = filename
2135 pps_chain = []
2136 if ie_info.get('__postprocessors') is not None:
2137 pps_chain.extend(ie_info['__postprocessors'])
2138 pps_chain.extend(self._pps)
2139 for pp in pps_chain:
2140 files_to_delete = []
2141 try:
2142 files_to_delete, info = pp.run(info)
2143 except PostProcessingError as e:
2144 self.report_error(e.msg)
2145 if files_to_delete and not self.params.get('keepvideo', False):
2146 for old_filename in set(files_to_delete):
2147 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2148 try:
2149 os.remove(encodeFilename(old_filename))
2150 except (IOError, OSError):
2151 self.report_warning('Unable to remove downloaded original file')
2152
2153 def _make_archive_id(self, info_dict):
2154 video_id = info_dict.get('id')
2155 if not video_id:
2156 return
2157 # Future-proof against any change in case
2158 # and backwards compatibility with prior versions
2159 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2160 if extractor is None:
2161 url = str_or_none(info_dict.get('url'))
2162 if not url:
2163 return
2164 # Try to find matching extractor for the URL and take its ie_key
2165 for ie in self._ies:
2166 if ie.suitable(url):
2167 extractor = ie.ie_key()
2168 break
2169 else:
2170 return
2171 return extractor.lower() + ' ' + video_id
2172
2173 def in_download_archive(self, info_dict):
2174 fn = self.params.get('download_archive')
2175 if fn is None:
2176 return False
2177
2178 vid_id = self._make_archive_id(info_dict)
2179 if not vid_id:
2180 return False # Incomplete video information
2181
2182 return vid_id in self.archive
2183
2184 def record_download_archive(self, info_dict):
2185 fn = self.params.get('download_archive')
2186 if fn is None:
2187 return
2188 vid_id = self._make_archive_id(info_dict)
2189 assert vid_id
2190 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2191 archive_file.write(vid_id + '\n')
2192 self.archive.add(vid_id)
2193
2194 @staticmethod
2195 def format_resolution(format, default='unknown'):
2196 if format.get('vcodec') == 'none':
2197 return 'audio only'
2198 if format.get('resolution') is not None:
2199 return format['resolution']
2200 if format.get('height') is not None:
2201 if format.get('width') is not None:
2202 res = '%sx%s' % (format['width'], format['height'])
2203 else:
2204 res = '%sp' % format['height']
2205 elif format.get('width') is not None:
2206 res = '%dx?' % format['width']
2207 else:
2208 res = default
2209 return res
2210
2211 def _format_note(self, fdict):
2212 res = ''
2213 if fdict.get('ext') in ['f4f', 'f4m']:
2214 res += '(unsupported) '
2215 if fdict.get('language'):
2216 if res:
2217 res += ' '
2218 res += '[%s] ' % fdict['language']
2219 if fdict.get('format_note') is not None:
2220 res += fdict['format_note'] + ' '
2221 if fdict.get('tbr') is not None:
2222 res += '%4dk ' % fdict['tbr']
2223 if fdict.get('container') is not None:
2224 if res:
2225 res += ', '
2226 res += '%s container' % fdict['container']
2227 if (fdict.get('vcodec') is not None
2228 and fdict.get('vcodec') != 'none'):
2229 if res:
2230 res += ', '
2231 res += fdict['vcodec']
2232 if fdict.get('vbr') is not None:
2233 res += '@'
2234 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2235 res += 'video@'
2236 if fdict.get('vbr') is not None:
2237 res += '%4dk' % fdict['vbr']
2238 if fdict.get('fps') is not None:
2239 if res:
2240 res += ', '
2241 res += '%sfps' % fdict['fps']
2242 if fdict.get('acodec') is not None:
2243 if res:
2244 res += ', '
2245 if fdict['acodec'] == 'none':
2246 res += 'video only'
2247 else:
2248 res += '%-5s' % fdict['acodec']
2249 elif fdict.get('abr') is not None:
2250 if res:
2251 res += ', '
2252 res += 'audio'
2253 if fdict.get('abr') is not None:
2254 res += '@%3dk' % fdict['abr']
2255 if fdict.get('asr') is not None:
2256 res += ' (%5dHz)' % fdict['asr']
2257 if fdict.get('filesize') is not None:
2258 if res:
2259 res += ', '
2260 res += format_bytes(fdict['filesize'])
2261 elif fdict.get('filesize_approx') is not None:
2262 if res:
2263 res += ', '
2264 res += '~' + format_bytes(fdict['filesize_approx'])
2265 return res
2266
2267 def list_formats(self, info_dict):
2268 formats = info_dict.get('formats', [info_dict])
2269 table = [
2270 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2271 for f in formats
2272 if f.get('preference') is None or f['preference'] >= -1000]
2273 if len(formats) > 1:
2274 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2275
2276 header_line = ['format code', 'extension', 'resolution', 'note']
2277 self.to_screen(
2278 '[info] Available formats for %s:\n%s' %
2279 (info_dict['id'], render_table(header_line, table)))
2280
2281 def list_thumbnails(self, info_dict):
2282 thumbnails = info_dict.get('thumbnails')
2283 if not thumbnails:
2284 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2285 return
2286
2287 self.to_screen(
2288 '[info] Thumbnails for %s:' % info_dict['id'])
2289 self.to_screen(render_table(
2290 ['ID', 'width', 'height', 'URL'],
2291 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2292
2293 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2294 if not subtitles:
2295 self.to_screen('%s has no %s' % (video_id, name))
2296 return
2297 self.to_screen(
2298 'Available %s for %s:' % (name, video_id))
2299 self.to_screen(render_table(
2300 ['Language', 'formats'],
2301 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2302 for lang, formats in subtitles.items()]))
2303
2304 def urlopen(self, req):
2305 """ Start an HTTP download """
2306 if isinstance(req, compat_basestring):
2307 req = sanitized_Request(req)
2308 return self._opener.open(req, timeout=self._socket_timeout)
2309
2310 def print_debug_header(self):
2311 if not self.params.get('verbose'):
2312 return
2313
2314 if type('') is not compat_str:
2315 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2316 self.report_warning(
2317 'Your Python is broken! Update to a newer and supported version')
2318
2319 stdout_encoding = getattr(
2320 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2321 encoding_str = (
2322 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2323 locale.getpreferredencoding(),
2324 sys.getfilesystemencoding(),
2325 stdout_encoding,
2326 self.get_encoding()))
2327 write_string(encoding_str, encoding=None)
2328
2329 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2330 if _LAZY_LOADER:
2331 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2332 try:
2333 sp = subprocess.Popen(
2334 ['git', 'rev-parse', '--short', 'HEAD'],
2335 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2336 cwd=os.path.dirname(os.path.abspath(__file__)))
2337 out, err = sp.communicate()
2338 out = out.decode().strip()
2339 if re.match('[0-9a-f]+', out):
2340 self._write_string('[debug] Git HEAD: ' + out + '\n')
2341 except Exception:
2342 try:
2343 sys.exc_clear()
2344 except Exception:
2345 pass
2346
2347 def python_implementation():
2348 impl_name = platform.python_implementation()
2349 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2350 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2351 return impl_name
2352
2353 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2354 platform.python_version(), python_implementation(),
2355 platform_name()))
2356
2357 exe_versions = FFmpegPostProcessor.get_versions(self)
2358 exe_versions['rtmpdump'] = rtmpdump_version()
2359 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2360 exe_str = ', '.join(
2361 '%s %s' % (exe, v)
2362 for exe, v in sorted(exe_versions.items())
2363 if v
2364 )
2365 if not exe_str:
2366 exe_str = 'none'
2367 self._write_string('[debug] exe versions: %s\n' % exe_str)
2368
2369 proxy_map = {}
2370 for handler in self._opener.handlers:
2371 if hasattr(handler, 'proxies'):
2372 proxy_map.update(handler.proxies)
2373 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2374
2375 if self.params.get('call_home', False):
2376 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2377 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2378 latest_version = self.urlopen(
2379 'https://yt-dl.org/latest/version').read().decode('utf-8')
2380 if version_tuple(latest_version) > version_tuple(__version__):
2381 self.report_warning(
2382 'You are using an outdated version (newest version: %s)! '
2383 'See https://yt-dl.org/update if you need help updating.' %
2384 latest_version)
2385
2386 def _setup_opener(self):
2387 timeout_val = self.params.get('socket_timeout')
2388 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2389
2390 opts_cookiefile = self.params.get('cookiefile')
2391 opts_proxy = self.params.get('proxy')
2392
2393 if opts_cookiefile is None:
2394 self.cookiejar = compat_cookiejar.CookieJar()
2395 else:
2396 opts_cookiefile = expand_path(opts_cookiefile)
2397 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2398 if os.access(opts_cookiefile, os.R_OK):
2399 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2400
2401 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2402 if opts_proxy is not None:
2403 if opts_proxy == '':
2404 proxies = {}
2405 else:
2406 proxies = {'http': opts_proxy, 'https': opts_proxy}
2407 else:
2408 proxies = compat_urllib_request.getproxies()
2409 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2410 if 'http' in proxies and 'https' not in proxies:
2411 proxies['https'] = proxies['http']
2412 proxy_handler = PerRequestProxyHandler(proxies)
2413
2414 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2415 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2416 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2417 redirect_handler = YoutubeDLRedirectHandler()
2418 data_handler = compat_urllib_request_DataHandler()
2419
2420 # When passing our own FileHandler instance, build_opener won't add the
2421 # default FileHandler and allows us to disable the file protocol, which
2422 # can be used for malicious purposes (see
2423 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2424 file_handler = compat_urllib_request.FileHandler()
2425
2426 def file_open(*args, **kwargs):
2427 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2428 file_handler.file_open = file_open
2429
2430 opener = compat_urllib_request.build_opener(
2431 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2432
2433 # Delete the default user-agent header, which would otherwise apply in
2434 # cases where our custom HTTP handler doesn't come into play
2435 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2436 opener.addheaders = []
2437 self._opener = opener
2438
2439 def encode(self, s):
2440 if isinstance(s, bytes):
2441 return s # Already encoded
2442
2443 try:
2444 return s.encode(self.get_encoding())
2445 except UnicodeEncodeError as err:
2446 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2447 raise
2448
2449 def get_encoding(self):
2450 encoding = self.params.get('encoding')
2451 if encoding is None:
2452 encoding = preferredencoding()
2453 return encoding
2454
2455 def _write_thumbnails(self, info_dict, filename):
2456 if self.params.get('writethumbnail', False):
2457 thumbnails = info_dict.get('thumbnails')
2458 if thumbnails:
2459 thumbnails = [thumbnails[-1]]
2460 elif self.params.get('write_all_thumbnails', False):
2461 thumbnails = info_dict.get('thumbnails')
2462 else:
2463 return
2464
2465 if not thumbnails:
2466 # No thumbnails present, so return immediately
2467 return
2468
2469 for t in thumbnails:
2470 thumb_ext = determine_ext(t['url'], 'jpg')
2471 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2472 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2473 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2474
2475 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2476 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2477 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2478 else:
2479 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2480 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2481 try:
2482 uf = self.urlopen(t['url'])
2483 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2484 shutil.copyfileobj(uf, thumbf)
2485 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2486 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2487 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2488 self.report_warning('Unable to download thumbnail "%s": %s' %
2489 (t['url'], error_to_compat_str(err)))