]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
2ecb137fc9b6043356f74a0431a348bdd4afc1ba
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
57 DownloadError,
58 encode_compat_str,
59 encodeFilename,
60 error_to_compat_str,
61 expand_path,
62 ExtractorError,
63 format_bytes,
64 format_field,
65 formatSeconds,
66 GeoRestrictedError,
67 int_or_none,
68 iri_to_uri,
69 ISO3166Utils,
70 locked_file,
71 make_HTTPS_handler,
72 MaxDownloadsReached,
73 orderedSet,
74 PagedList,
75 parse_filesize,
76 PerRequestProxyHandler,
77 platform_name,
78 PostProcessingError,
79 preferredencoding,
80 prepend_extension,
81 register_socks_protocols,
82 render_table,
83 replace_extension,
84 SameFileError,
85 sanitize_filename,
86 sanitize_path,
87 sanitize_url,
88 sanitized_Request,
89 std_headers,
90 str_or_none,
91 subtitles_filename,
92 to_high_limit_path,
93 UnavailableVideoError,
94 url_basename,
95 version_tuple,
96 write_json_file,
97 write_string,
98 YoutubeDLCookieJar,
99 YoutubeDLCookieProcessor,
100 YoutubeDLHandler,
101 YoutubeDLRedirectHandler,
102 )
103 from .cache import Cache
104 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
105 from .extractor.openload import PhantomJSwrapper
106 from .downloader import get_suitable_downloader
107 from .downloader.rtmp import rtmpdump_version
108 from .postprocessor import (
109 FFmpegFixupM3u8PP,
110 FFmpegFixupM4aPP,
111 FFmpegFixupStretchedPP,
112 FFmpegMergerPP,
113 FFmpegPostProcessor,
114 FFmpegSubtitlesConvertorPP,
115 get_postprocessor,
116 )
117 from .version import __version__
118
119 if compat_os_name == 'nt':
120 import ctypes
121
122
123 class YoutubeDL(object):
124 """YoutubeDL class.
125
126 YoutubeDL objects are the ones responsible of downloading the
127 actual video file and writing it to disk if the user has requested
128 it, among some other tasks. In most cases there should be one per
129 program. As, given a video URL, the downloader doesn't know how to
130 extract all the needed information, task that InfoExtractors do, it
131 has to pass the URL to one of them.
132
133 For this, YoutubeDL objects have a method that allows
134 InfoExtractors to be registered in a given order. When it is passed
135 a URL, the YoutubeDL object handles it to the first InfoExtractor it
136 finds that reports being able to handle it. The InfoExtractor extracts
137 all the information about the video or videos the URL refers to, and
138 YoutubeDL process the extracted information, possibly using a File
139 Downloader to download the video.
140
141 YoutubeDL objects accept a lot of parameters. In order not to saturate
142 the object constructor with arguments, it receives a dictionary of
143 options instead. These options are available through the params
144 attribute for the InfoExtractors to use. The YoutubeDL also
145 registers itself as the downloader in charge for the InfoExtractors
146 that are added to it, so this is a "mutual registration".
147
148 Available options:
149
150 username: Username for authentication purposes.
151 password: Password for authentication purposes.
152 videopassword: Password for accessing a video.
153 ap_mso: Adobe Pass multiple-system operator identifier.
154 ap_username: Multiple-system operator account username.
155 ap_password: Multiple-system operator account password.
156 usenetrc: Use netrc for authentication instead.
157 verbose: Print additional info to stdout.
158 quiet: Do not print messages to stdout.
159 no_warnings: Do not print out anything for warnings.
160 forceurl: Force printing final URL.
161 forcetitle: Force printing title.
162 forceid: Force printing ID.
163 forcethumbnail: Force printing thumbnail URL.
164 forcedescription: Force printing description.
165 forcefilename: Force printing final filename.
166 forceduration: Force printing duration.
167 forcejson: Force printing info_dict as JSON.
168 dump_single_json: Force printing the info_dict of the whole playlist
169 (or video) as a single JSON line.
170 force_write_download_archive: Force writing download archive regardless of
171 'skip_download' or 'simulate'.
172 simulate: Do not download the video files.
173 format: Video format code. see "FORMAT SELECTION" for more details.
174 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
175 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
176 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
177 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
178 outtmpl: Template for output names.
179 restrictfilenames: Do not allow "&" and spaces in file names.
180 trim_file_name: Limit length of filename (extension excluded).
181 ignoreerrors: Do not stop on download errors. (Default False when running youtube-dlc, but True when directly accessing YoutubeDL class)
182 force_generic_extractor: Force downloader to use the generic extractor
183 nooverwrites: Prevent overwriting files.
184 playliststart: Playlist item to start at.
185 playlistend: Playlist item to end at.
186 playlist_items: Specific indices of playlist to download.
187 playlistreverse: Download playlist items in reverse order.
188 playlistrandom: Download playlist items in random order.
189 matchtitle: Download only matching titles.
190 rejecttitle: Reject downloads for matching titles.
191 logger: Log messages to a logging.Logger instance.
192 logtostderr: Log messages to stderr instead of stdout.
193 writedescription: Write the video description to a .description file
194 writeinfojson: Write the video description to a .info.json file
195 writeannotations: Write the video annotations to a .annotations.xml file
196 writethumbnail: Write the thumbnail image to a file
197 write_all_thumbnails: Write all thumbnail formats to files
198 writelink: Write an internet shortcut file, depending on the
199 current platform (.url/.webloc/.desktop)
200 writeurllink: Write a Windows internet shortcut file (.url)
201 writewebloclink: Write a macOS internet shortcut file (.webloc)
202 writedesktoplink: Write a Linux internet shortcut file (.desktop)
203 writesubtitles: Write the video subtitles to a file
204 writeautomaticsub: Write the automatically generated subtitles to a file
205 allsubtitles: Downloads all the subtitles of the video
206 (requires writesubtitles or writeautomaticsub)
207 listsubtitles: Lists all available subtitles for the video
208 subtitlesformat: The format code for subtitles
209 subtitleslangs: List of languages of the subtitles to download
210 keepvideo: Keep the video file after post-processing
211 daterange: A DateRange object, download only if the upload_date is in the range.
212 skip_download: Skip the actual download of the video file
213 cachedir: Location of the cache files in the filesystem.
214 False to disable filesystem cache.
215 noplaylist: Download single video instead of a playlist if in doubt.
216 age_limit: An integer representing the user's age in years.
217 Unsuitable videos for the given age are skipped.
218 min_views: An integer representing the minimum view count the video
219 must have in order to not be skipped.
220 Videos without view count information are always
221 downloaded. None for no limit.
222 max_views: An integer representing the maximum view count.
223 Videos that are more popular than that are not
224 downloaded.
225 Videos without view count information are always
226 downloaded. None for no limit.
227 download_archive: File name of a file where all downloads are recorded.
228 Videos already present in the file are not downloaded
229 again.
230 break_on_existing: Stop the download process after attempting to download a file that's
231 in the archive.
232 cookiefile: File name where cookies should be read from and dumped to.
233 nocheckcertificate:Do not verify SSL certificates
234 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
235 At the moment, this is only supported by YouTube.
236 proxy: URL of the proxy server to use
237 geo_verification_proxy: URL of the proxy to use for IP address verification
238 on geo-restricted sites.
239 socket_timeout: Time to wait for unresponsive hosts, in seconds
240 bidi_workaround: Work around buggy terminals without bidirectional text
241 support, using fridibi
242 debug_printtraffic:Print out sent and received HTTP traffic
243 include_ads: Download ads as well
244 default_search: Prepend this string if an input url is not valid.
245 'auto' for elaborate guessing
246 encoding: Use this encoding instead of the system-specified.
247 extract_flat: Do not resolve URLs, return the immediate result.
248 Pass in 'in_playlist' to only show this behavior for
249 playlist items.
250 postprocessors: A list of dictionaries, each with an entry
251 * key: The name of the postprocessor. See
252 youtube_dlc/postprocessor/__init__.py for a list.
253 as well as any further keyword arguments for the
254 postprocessor.
255 post_hooks: A list of functions that get called as the final step
256 for each video file, after all postprocessors have been
257 called. The filename will be passed as the only argument.
258 progress_hooks: A list of functions that get called on download
259 progress, with a dictionary with the entries
260 * status: One of "downloading", "error", or "finished".
261 Check this first and ignore unknown values.
262
263 If status is one of "downloading", or "finished", the
264 following properties may also be present:
265 * filename: The final filename (always present)
266 * tmpfilename: The filename we're currently writing to
267 * downloaded_bytes: Bytes on disk
268 * total_bytes: Size of the whole file, None if unknown
269 * total_bytes_estimate: Guess of the eventual file size,
270 None if unavailable.
271 * elapsed: The number of seconds since download started.
272 * eta: The estimated time in seconds, None if unknown
273 * speed: The download speed in bytes/second, None if
274 unknown
275 * fragment_index: The counter of the currently
276 downloaded video fragment.
277 * fragment_count: The number of fragments (= individual
278 files that will be merged)
279
280 Progress hooks are guaranteed to be called at least once
281 (with status "finished") if the download is successful.
282 merge_output_format: Extension to use when merging formats.
283 fixup: Automatically correct known faults of the file.
284 One of:
285 - "never": do nothing
286 - "warn": only emit a warning
287 - "detect_or_warn": check whether we can do anything
288 about it, warn otherwise (default)
289 source_address: Client-side IP address to bind to.
290 call_home: Boolean, true iff we are allowed to contact the
291 youtube-dlc servers for debugging.
292 sleep_interval: Number of seconds to sleep before each download when
293 used alone or a lower bound of a range for randomized
294 sleep before each download (minimum possible number
295 of seconds to sleep) when used along with
296 max_sleep_interval.
297 max_sleep_interval:Upper bound of a range for randomized sleep before each
298 download (maximum possible number of seconds to sleep).
299 Must only be used along with sleep_interval.
300 Actual sleep time will be a random float from range
301 [sleep_interval; max_sleep_interval].
302 listformats: Print an overview of available video formats and exit.
303 list_thumbnails: Print a table of all thumbnails and exit.
304 match_filter: A function that gets called with the info_dict of
305 every video.
306 If it returns a message, the video is ignored.
307 If it returns None, the video is downloaded.
308 match_filter_func in utils.py is one example for this.
309 no_color: Do not emit color codes in output.
310 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
311 HTTP header
312 geo_bypass_country:
313 Two-letter ISO 3166-2 country code that will be used for
314 explicit geographic restriction bypassing via faking
315 X-Forwarded-For HTTP header
316 geo_bypass_ip_block:
317 IP range in CIDR notation that will be used similarly to
318 geo_bypass_country
319
320 The following options determine which downloader is picked:
321 external_downloader: Executable of the external downloader to call.
322 None or unset for standard (built-in) downloader.
323 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
324 if True, otherwise use ffmpeg/avconv if False, otherwise
325 use downloader suggested by extractor if None.
326
327 The following parameters are not used by YoutubeDL itself, they are used by
328 the downloader (see youtube_dlc/downloader/common.py):
329 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
330 noresizebuffer, retries, continuedl, noprogress, consoletitle,
331 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
332 http_chunk_size.
333
334 The following options are used by the post processors:
335 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
336 otherwise prefer ffmpeg.
337 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
338 to the binary or its containing directory.
339 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
340 of additional command-line arguments for the postprocessor.
341 Use 'default' as the name for arguments to passed to all PP.
342
343 The following options are used by the Youtube extractor:
344 youtube_include_dash_manifest: If True (default), DASH manifests and related
345 data will be downloaded and processed by extractor.
346 You can reduce network I/O by disabling it if you don't
347 care about DASH.
348 """
349
350 _NUMERIC_FIELDS = set((
351 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
352 'timestamp', 'upload_year', 'upload_month', 'upload_day',
353 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
354 'average_rating', 'comment_count', 'age_limit',
355 'start_time', 'end_time',
356 'chapter_number', 'season_number', 'episode_number',
357 'track_number', 'disc_number', 'release_year',
358 'playlist_index',
359 ))
360
361 params = None
362 _ies = []
363 _pps = []
364 _download_retcode = None
365 _num_downloads = None
366 _screen_file = None
367
368 def __init__(self, params=None, auto_init=True):
369 """Create a FileDownloader object with the given options."""
370 if params is None:
371 params = {}
372 self._ies = []
373 self._ies_instances = {}
374 self._pps = []
375 self._post_hooks = []
376 self._progress_hooks = []
377 self._download_retcode = 0
378 self._num_downloads = 0
379 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
380 self._err_file = sys.stderr
381 self.params = {
382 # Default parameters
383 'nocheckcertificate': False,
384 }
385 self.params.update(params)
386 self.cache = Cache(self)
387 self.archive = set()
388
389 """Preload the archive, if any is specified"""
390 def preload_download_archive(self):
391 fn = self.params.get('download_archive')
392 if fn is None:
393 return False
394 try:
395 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
396 for line in archive_file:
397 self.archive.add(line.strip())
398 except IOError as ioe:
399 if ioe.errno != errno.ENOENT:
400 raise
401 return False
402 return True
403
404 def check_deprecated(param, option, suggestion):
405 if self.params.get(param) is not None:
406 self.report_warning(
407 '%s is deprecated. Use %s instead.' % (option, suggestion))
408 return True
409 return False
410
411 if self.params.get('verbose'):
412 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
413
414 preload_download_archive(self)
415
416 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
417 if self.params.get('geo_verification_proxy') is None:
418 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
419
420 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
421 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
422 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
423
424 if params.get('bidi_workaround', False):
425 try:
426 import pty
427 master, slave = pty.openpty()
428 width = compat_get_terminal_size().columns
429 if width is None:
430 width_args = []
431 else:
432 width_args = ['-w', str(width)]
433 sp_kwargs = dict(
434 stdin=subprocess.PIPE,
435 stdout=slave,
436 stderr=self._err_file)
437 try:
438 self._output_process = subprocess.Popen(
439 ['bidiv'] + width_args, **sp_kwargs
440 )
441 except OSError:
442 self._output_process = subprocess.Popen(
443 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
444 self._output_channel = os.fdopen(master, 'rb')
445 except OSError as ose:
446 if ose.errno == errno.ENOENT:
447 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
448 else:
449 raise
450
451 if (sys.platform != 'win32'
452 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
453 and not params.get('restrictfilenames', False)):
454 # Unicode filesystem API will throw errors (#1474, #13027)
455 self.report_warning(
456 'Assuming --restrict-filenames since file system encoding '
457 'cannot encode all characters. '
458 'Set the LC_ALL environment variable to fix this.')
459 self.params['restrictfilenames'] = True
460
461 if isinstance(params.get('outtmpl'), bytes):
462 self.report_warning(
463 'Parameter outtmpl is bytes, but should be a unicode string. '
464 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
465
466 self._setup_opener()
467
468 if auto_init:
469 self.print_debug_header()
470 self.add_default_info_extractors()
471
472 for pp_def_raw in self.params.get('postprocessors', []):
473 pp_class = get_postprocessor(pp_def_raw['key'])
474 pp_def = dict(pp_def_raw)
475 del pp_def['key']
476 pp = pp_class(self, **compat_kwargs(pp_def))
477 self.add_post_processor(pp)
478
479 for ph in self.params.get('post_hooks', []):
480 self.add_post_hook(ph)
481
482 for ph in self.params.get('progress_hooks', []):
483 self.add_progress_hook(ph)
484
485 register_socks_protocols()
486
487 def warn_if_short_id(self, argv):
488 # short YouTube ID starting with dash?
489 idxs = [
490 i for i, a in enumerate(argv)
491 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
492 if idxs:
493 correct_argv = (
494 ['youtube-dlc']
495 + [a for i, a in enumerate(argv) if i not in idxs]
496 + ['--'] + [argv[i] for i in idxs]
497 )
498 self.report_warning(
499 'Long argument string detected. '
500 'Use -- to separate parameters and URLs, like this:\n%s\n' %
501 args_to_str(correct_argv))
502
503 def add_info_extractor(self, ie):
504 """Add an InfoExtractor object to the end of the list."""
505 self._ies.append(ie)
506 if not isinstance(ie, type):
507 self._ies_instances[ie.ie_key()] = ie
508 ie.set_downloader(self)
509
510 def get_info_extractor(self, ie_key):
511 """
512 Get an instance of an IE with name ie_key, it will try to get one from
513 the _ies list, if there's no instance it will create a new one and add
514 it to the extractor list.
515 """
516 ie = self._ies_instances.get(ie_key)
517 if ie is None:
518 ie = get_info_extractor(ie_key)()
519 self.add_info_extractor(ie)
520 return ie
521
522 def add_default_info_extractors(self):
523 """
524 Add the InfoExtractors returned by gen_extractors to the end of the list
525 """
526 for ie in gen_extractor_classes():
527 self.add_info_extractor(ie)
528
529 def add_post_processor(self, pp):
530 """Add a PostProcessor object to the end of the chain."""
531 self._pps.append(pp)
532 pp.set_downloader(self)
533
534 def add_post_hook(self, ph):
535 """Add the post hook"""
536 self._post_hooks.append(ph)
537
538 def add_progress_hook(self, ph):
539 """Add the progress hook (currently only for the file downloader)"""
540 self._progress_hooks.append(ph)
541
542 def _bidi_workaround(self, message):
543 if not hasattr(self, '_output_channel'):
544 return message
545
546 assert hasattr(self, '_output_process')
547 assert isinstance(message, compat_str)
548 line_count = message.count('\n') + 1
549 self._output_process.stdin.write((message + '\n').encode('utf-8'))
550 self._output_process.stdin.flush()
551 res = ''.join(self._output_channel.readline().decode('utf-8')
552 for _ in range(line_count))
553 return res[:-len('\n')]
554
555 def to_screen(self, message, skip_eol=False):
556 """Print message to stdout if not in quiet mode."""
557 return self.to_stdout(message, skip_eol, check_quiet=True)
558
559 def _write_string(self, s, out=None):
560 write_string(s, out=out, encoding=self.params.get('encoding'))
561
562 def to_stdout(self, message, skip_eol=False, check_quiet=False):
563 """Print message to stdout if not in quiet mode."""
564 if self.params.get('logger'):
565 self.params['logger'].debug(message)
566 elif not check_quiet or not self.params.get('quiet', False):
567 message = self._bidi_workaround(message)
568 terminator = ['\n', ''][skip_eol]
569 output = message + terminator
570
571 self._write_string(output, self._screen_file)
572
573 def to_stderr(self, message):
574 """Print message to stderr."""
575 assert isinstance(message, compat_str)
576 if self.params.get('logger'):
577 self.params['logger'].error(message)
578 else:
579 message = self._bidi_workaround(message)
580 output = message + '\n'
581 self._write_string(output, self._err_file)
582
583 def to_console_title(self, message):
584 if not self.params.get('consoletitle', False):
585 return
586 if compat_os_name == 'nt':
587 if ctypes.windll.kernel32.GetConsoleWindow():
588 # c_wchar_p() might not be necessary if `message` is
589 # already of type unicode()
590 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
591 elif 'TERM' in os.environ:
592 self._write_string('\033]0;%s\007' % message, self._screen_file)
593
594 def save_console_title(self):
595 if not self.params.get('consoletitle', False):
596 return
597 if self.params.get('simulate', False):
598 return
599 if compat_os_name != 'nt' and 'TERM' in os.environ:
600 # Save the title on stack
601 self._write_string('\033[22;0t', self._screen_file)
602
603 def restore_console_title(self):
604 if not self.params.get('consoletitle', False):
605 return
606 if self.params.get('simulate', False):
607 return
608 if compat_os_name != 'nt' and 'TERM' in os.environ:
609 # Restore the title from stack
610 self._write_string('\033[23;0t', self._screen_file)
611
612 def __enter__(self):
613 self.save_console_title()
614 return self
615
616 def __exit__(self, *args):
617 self.restore_console_title()
618
619 if self.params.get('cookiefile') is not None:
620 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
621
622 def trouble(self, message=None, tb=None):
623 """Determine action to take when a download problem appears.
624
625 Depending on if the downloader has been configured to ignore
626 download errors or not, this method may throw an exception or
627 not when errors are found, after printing the message.
628
629 tb, if given, is additional traceback information.
630 """
631 if message is not None:
632 self.to_stderr(message)
633 if self.params.get('verbose'):
634 if tb is None:
635 if sys.exc_info()[0]: # if .trouble has been called from an except block
636 tb = ''
637 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
638 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
639 tb += encode_compat_str(traceback.format_exc())
640 else:
641 tb_data = traceback.format_list(traceback.extract_stack())
642 tb = ''.join(tb_data)
643 self.to_stderr(tb)
644 if not self.params.get('ignoreerrors', False):
645 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
646 exc_info = sys.exc_info()[1].exc_info
647 else:
648 exc_info = sys.exc_info()
649 raise DownloadError(message, exc_info)
650 self._download_retcode = 1
651
652 def report_warning(self, message):
653 '''
654 Print the message to stderr, it will be prefixed with 'WARNING:'
655 If stderr is a tty file the 'WARNING:' will be colored
656 '''
657 if self.params.get('logger') is not None:
658 self.params['logger'].warning(message)
659 else:
660 if self.params.get('no_warnings'):
661 return
662 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
663 _msg_header = '\033[0;33mWARNING:\033[0m'
664 else:
665 _msg_header = 'WARNING:'
666 warning_message = '%s %s' % (_msg_header, message)
667 self.to_stderr(warning_message)
668
669 def report_error(self, message, tb=None):
670 '''
671 Do the same as trouble, but prefixes the message with 'ERROR:', colored
672 in red if stderr is a tty file.
673 '''
674 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
675 _msg_header = '\033[0;31mERROR:\033[0m'
676 else:
677 _msg_header = 'ERROR:'
678 error_message = '%s %s' % (_msg_header, message)
679 self.trouble(error_message, tb)
680
681 def report_file_already_downloaded(self, file_name):
682 """Report file has already been fully downloaded."""
683 try:
684 self.to_screen('[download] %s has already been downloaded' % file_name)
685 except UnicodeEncodeError:
686 self.to_screen('[download] The file has already been downloaded')
687
688 def prepare_filename(self, info_dict):
689 """Generate the output filename."""
690 try:
691 template_dict = dict(info_dict)
692
693 template_dict['epoch'] = int(time.time())
694 autonumber_size = self.params.get('autonumber_size')
695 if autonumber_size is None:
696 autonumber_size = 5
697 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
698 if template_dict.get('resolution') is None:
699 if template_dict.get('width') and template_dict.get('height'):
700 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
701 elif template_dict.get('height'):
702 template_dict['resolution'] = '%sp' % template_dict['height']
703 elif template_dict.get('width'):
704 template_dict['resolution'] = '%dx?' % template_dict['width']
705
706 sanitize = lambda k, v: sanitize_filename(
707 compat_str(v),
708 restricted=self.params.get('restrictfilenames'),
709 is_id=(k == 'id' or k.endswith('_id')))
710 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
711 for k, v in template_dict.items()
712 if v is not None and not isinstance(v, (list, tuple, dict)))
713 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
714
715 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
716
717 # For fields playlist_index and autonumber convert all occurrences
718 # of %(field)s to %(field)0Nd for backward compatibility
719 field_size_compat_map = {
720 'playlist_index': len(str(template_dict['n_entries'])),
721 'autonumber': autonumber_size,
722 }
723 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
724 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
725 if mobj:
726 outtmpl = re.sub(
727 FIELD_SIZE_COMPAT_RE,
728 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
729 outtmpl)
730
731 # Missing numeric fields used together with integer presentation types
732 # in format specification will break the argument substitution since
733 # string 'NA' is returned for missing fields. We will patch output
734 # template for missing fields to meet string presentation type.
735 for numeric_field in self._NUMERIC_FIELDS:
736 if numeric_field not in template_dict:
737 # As of [1] format syntax is:
738 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
739 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
740 FORMAT_RE = r'''(?x)
741 (?<!%)
742 %
743 \({0}\) # mapping key
744 (?:[#0\-+ ]+)? # conversion flags (optional)
745 (?:\d+)? # minimum field width (optional)
746 (?:\.\d+)? # precision (optional)
747 [hlL]? # length modifier (optional)
748 [diouxXeEfFgGcrs%] # conversion type
749 '''
750 outtmpl = re.sub(
751 FORMAT_RE.format(numeric_field),
752 r'%({0})s'.format(numeric_field), outtmpl)
753
754 # expand_path translates '%%' into '%' and '$$' into '$'
755 # correspondingly that is not what we want since we need to keep
756 # '%%' intact for template dict substitution step. Working around
757 # with boundary-alike separator hack.
758 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
759 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
760
761 # outtmpl should be expand_path'ed before template dict substitution
762 # because meta fields may contain env variables we don't want to
763 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
764 # title "Hello $PATH", we don't want `$PATH` to be expanded.
765 filename = expand_path(outtmpl).replace(sep, '') % template_dict
766
767 # https://github.com/blackjack4494/youtube-dlc/issues/85
768 trim_file_name = self.params.get('trim_file_name', False)
769 if trim_file_name:
770 fn_groups = filename.rsplit('.')
771 ext = fn_groups[-1]
772 sub_ext = ''
773 if len(fn_groups) > 2:
774 sub_ext = fn_groups[-2]
775 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
776
777 # Temporary fix for #4787
778 # 'Treat' all problem characters by passing filename through preferredencoding
779 # to workaround encoding issues with subprocess on python2 @ Windows
780 if sys.version_info < (3, 0) and sys.platform == 'win32':
781 filename = encodeFilename(filename, True).decode(preferredencoding())
782 return sanitize_path(filename)
783 except ValueError as err:
784 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
785 return None
786
787 def _match_entry(self, info_dict, incomplete):
788 """ Returns None if the file should be downloaded """
789
790 video_title = info_dict.get('title', info_dict.get('id', 'video'))
791 if 'title' in info_dict:
792 # This can happen when we're just evaluating the playlist
793 title = info_dict['title']
794 matchtitle = self.params.get('matchtitle', False)
795 if matchtitle:
796 if not re.search(matchtitle, title, re.IGNORECASE):
797 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
798 rejecttitle = self.params.get('rejecttitle', False)
799 if rejecttitle:
800 if re.search(rejecttitle, title, re.IGNORECASE):
801 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
802 date = info_dict.get('upload_date')
803 if date is not None:
804 dateRange = self.params.get('daterange', DateRange())
805 if date not in dateRange:
806 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
807 view_count = info_dict.get('view_count')
808 if view_count is not None:
809 min_views = self.params.get('min_views')
810 if min_views is not None and view_count < min_views:
811 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
812 max_views = self.params.get('max_views')
813 if max_views is not None and view_count > max_views:
814 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
815 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
816 return 'Skipping "%s" because it is age restricted' % video_title
817 if self.in_download_archive(info_dict):
818 return '%s has already been recorded in archive' % video_title
819
820 if not incomplete:
821 match_filter = self.params.get('match_filter')
822 if match_filter is not None:
823 ret = match_filter(info_dict)
824 if ret is not None:
825 return ret
826
827 return None
828
829 @staticmethod
830 def add_extra_info(info_dict, extra_info):
831 '''Set the keys from extra_info in info dict if they are missing'''
832 for key, value in extra_info.items():
833 info_dict.setdefault(key, value)
834
835 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
836 process=True, force_generic_extractor=False):
837 '''
838 Returns a list with a dictionary for each video we find.
839 If 'download', also downloads the videos.
840 extra_info is a dict containing the extra values to add to each result
841 '''
842
843 if not ie_key and force_generic_extractor:
844 ie_key = 'Generic'
845
846 if ie_key:
847 ies = [self.get_info_extractor(ie_key)]
848 else:
849 ies = self._ies
850
851 for ie in ies:
852 if not ie.suitable(url):
853 continue
854
855 ie_key = ie.ie_key()
856 ie = self.get_info_extractor(ie_key)
857 if not ie.working():
858 self.report_warning('The program functionality for this site has been marked as broken, '
859 'and will probably not work.')
860
861 try:
862 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
863 except (AssertionError, IndexError, AttributeError):
864 temp_id = None
865 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
866 self.to_screen("[%s] %s: has already been recorded in archive" % (
867 ie_key, temp_id))
868 break
869
870 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
871
872 else:
873 self.report_error('no suitable InfoExtractor for URL %s' % url)
874
875 def __handle_extraction_exceptions(func):
876 def wrapper(self, *args, **kwargs):
877 try:
878 return func(self, *args, **kwargs)
879 except GeoRestrictedError as e:
880 msg = e.msg
881 if e.countries:
882 msg += '\nThis video is available in %s.' % ', '.join(
883 map(ISO3166Utils.short2full, e.countries))
884 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
885 self.report_error(msg)
886 except ExtractorError as e: # An error we somewhat expected
887 self.report_error(compat_str(e), e.format_traceback())
888 except MaxDownloadsReached:
889 raise
890 except Exception as e:
891 if self.params.get('ignoreerrors', False):
892 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
893 else:
894 raise
895 return wrapper
896
897 @__handle_extraction_exceptions
898 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
899 ie_result = ie.extract(url)
900 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
901 return
902 if isinstance(ie_result, list):
903 # Backwards compatibility: old IE result format
904 ie_result = {
905 '_type': 'compat_list',
906 'entries': ie_result,
907 }
908 if info_dict:
909 if info_dict.get('id'):
910 ie_result['id'] = info_dict['id']
911 if info_dict.get('title'):
912 ie_result['title'] = info_dict['title']
913 self.add_default_extra_info(ie_result, ie, url)
914 if process:
915 return self.process_ie_result(ie_result, download, extra_info)
916 else:
917 return ie_result
918
919 def add_default_extra_info(self, ie_result, ie, url):
920 self.add_extra_info(ie_result, {
921 'extractor': ie.IE_NAME,
922 'webpage_url': url,
923 'duration_string': (
924 formatSeconds(ie_result['duration'], '-')
925 if ie_result.get('duration', None) is not None
926 else None),
927 'webpage_url_basename': url_basename(url),
928 'extractor_key': ie.ie_key(),
929 })
930
931 def process_ie_result(self, ie_result, download=True, extra_info={}):
932 """
933 Take the result of the ie(may be modified) and resolve all unresolved
934 references (URLs, playlist items).
935
936 It will also download the videos if 'download'.
937 Returns the resolved ie_result.
938 """
939 result_type = ie_result.get('_type', 'video')
940
941 if result_type in ('url', 'url_transparent'):
942 ie_result['url'] = sanitize_url(ie_result['url'])
943 extract_flat = self.params.get('extract_flat', False)
944 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
945 or extract_flat is True):
946 self.__forced_printings(
947 ie_result, self.prepare_filename(ie_result),
948 incomplete=True)
949 return ie_result
950
951 if result_type == 'video':
952 self.add_extra_info(ie_result, extra_info)
953 return self.process_video_result(ie_result, download=download)
954 elif result_type == 'url':
955 # We have to add extra_info to the results because it may be
956 # contained in a playlist
957 return self.extract_info(ie_result['url'],
958 download, info_dict=ie_result,
959 ie_key=ie_result.get('ie_key'),
960 extra_info=extra_info)
961 elif result_type == 'url_transparent':
962 # Use the information from the embedding page
963 info = self.extract_info(
964 ie_result['url'], ie_key=ie_result.get('ie_key'),
965 extra_info=extra_info, download=False, process=False)
966
967 # extract_info may return None when ignoreerrors is enabled and
968 # extraction failed with an error, don't crash and return early
969 # in this case
970 if not info:
971 return info
972
973 force_properties = dict(
974 (k, v) for k, v in ie_result.items() if v is not None)
975 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
976 if f in force_properties:
977 del force_properties[f]
978 new_result = info.copy()
979 new_result.update(force_properties)
980
981 # Extracted info may not be a video result (i.e.
982 # info.get('_type', 'video') != video) but rather an url or
983 # url_transparent. In such cases outer metadata (from ie_result)
984 # should be propagated to inner one (info). For this to happen
985 # _type of info should be overridden with url_transparent. This
986 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
987 if new_result.get('_type') == 'url':
988 new_result['_type'] = 'url_transparent'
989
990 return self.process_ie_result(
991 new_result, download=download, extra_info=extra_info)
992 elif result_type in ('playlist', 'multi_video'):
993 # We process each entry in the playlist
994 playlist = ie_result.get('title') or ie_result.get('id')
995 self.to_screen('[download] Downloading playlist: %s' % playlist)
996
997 playlist_results = []
998
999 playliststart = self.params.get('playliststart', 1) - 1
1000 playlistend = self.params.get('playlistend')
1001 # For backwards compatibility, interpret -1 as whole list
1002 if playlistend == -1:
1003 playlistend = None
1004
1005 playlistitems_str = self.params.get('playlist_items')
1006 playlistitems = None
1007 if playlistitems_str is not None:
1008 def iter_playlistitems(format):
1009 for string_segment in format.split(','):
1010 if '-' in string_segment:
1011 start, end = string_segment.split('-')
1012 for item in range(int(start), int(end) + 1):
1013 yield int(item)
1014 else:
1015 yield int(string_segment)
1016 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1017
1018 ie_entries = ie_result['entries']
1019
1020 def make_playlistitems_entries(list_ie_entries):
1021 num_entries = len(list_ie_entries)
1022 return [
1023 list_ie_entries[i - 1] for i in playlistitems
1024 if -num_entries <= i - 1 < num_entries]
1025
1026 def report_download(num_entries):
1027 self.to_screen(
1028 '[%s] playlist %s: Downloading %d videos' %
1029 (ie_result['extractor'], playlist, num_entries))
1030
1031 if isinstance(ie_entries, list):
1032 n_all_entries = len(ie_entries)
1033 if playlistitems:
1034 entries = make_playlistitems_entries(ie_entries)
1035 else:
1036 entries = ie_entries[playliststart:playlistend]
1037 n_entries = len(entries)
1038 self.to_screen(
1039 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1040 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1041 elif isinstance(ie_entries, PagedList):
1042 if playlistitems:
1043 entries = []
1044 for item in playlistitems:
1045 entries.extend(ie_entries.getslice(
1046 item - 1, item
1047 ))
1048 else:
1049 entries = ie_entries.getslice(
1050 playliststart, playlistend)
1051 n_entries = len(entries)
1052 report_download(n_entries)
1053 else: # iterable
1054 if playlistitems:
1055 entries = make_playlistitems_entries(list(itertools.islice(
1056 ie_entries, 0, max(playlistitems))))
1057 else:
1058 entries = list(itertools.islice(
1059 ie_entries, playliststart, playlistend))
1060 n_entries = len(entries)
1061 report_download(n_entries)
1062
1063 if self.params.get('playlistreverse', False):
1064 entries = entries[::-1]
1065
1066 if self.params.get('playlistrandom', False):
1067 random.shuffle(entries)
1068
1069 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1070
1071 for i, entry in enumerate(entries, 1):
1072 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1073 # This __x_forwarded_for_ip thing is a bit ugly but requires
1074 # minimal changes
1075 if x_forwarded_for:
1076 entry['__x_forwarded_for_ip'] = x_forwarded_for
1077 extra = {
1078 'n_entries': n_entries,
1079 'playlist': playlist,
1080 'playlist_id': ie_result.get('id'),
1081 'playlist_title': ie_result.get('title'),
1082 'playlist_uploader': ie_result.get('uploader'),
1083 'playlist_uploader_id': ie_result.get('uploader_id'),
1084 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1085 'extractor': ie_result['extractor'],
1086 'webpage_url': ie_result['webpage_url'],
1087 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1088 'extractor_key': ie_result['extractor_key'],
1089 }
1090
1091 reason = self._match_entry(entry, incomplete=True)
1092 if reason is not None:
1093 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1094 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1095 break
1096 else:
1097 self.to_screen('[download] ' + reason)
1098 continue
1099
1100 entry_result = self.__process_iterable_entry(entry, download, extra)
1101 # TODO: skip failed (empty) entries?
1102 playlist_results.append(entry_result)
1103 ie_result['entries'] = playlist_results
1104 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1105 return ie_result
1106 elif result_type == 'compat_list':
1107 self.report_warning(
1108 'Extractor %s returned a compat_list result. '
1109 'It needs to be updated.' % ie_result.get('extractor'))
1110
1111 def _fixup(r):
1112 self.add_extra_info(
1113 r,
1114 {
1115 'extractor': ie_result['extractor'],
1116 'webpage_url': ie_result['webpage_url'],
1117 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1118 'extractor_key': ie_result['extractor_key'],
1119 }
1120 )
1121 return r
1122 ie_result['entries'] = [
1123 self.process_ie_result(_fixup(r), download, extra_info)
1124 for r in ie_result['entries']
1125 ]
1126 return ie_result
1127 else:
1128 raise Exception('Invalid result type: %s' % result_type)
1129
1130 @__handle_extraction_exceptions
1131 def __process_iterable_entry(self, entry, download, extra_info):
1132 return self.process_ie_result(
1133 entry, download=download, extra_info=extra_info)
1134
1135 def _build_format_filter(self, filter_spec):
1136 " Returns a function to filter the formats according to the filter_spec "
1137
1138 OPERATORS = {
1139 '<': operator.lt,
1140 '<=': operator.le,
1141 '>': operator.gt,
1142 '>=': operator.ge,
1143 '=': operator.eq,
1144 '!=': operator.ne,
1145 }
1146 operator_rex = re.compile(r'''(?x)\s*
1147 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1148 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1149 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1150 $
1151 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1152 m = operator_rex.search(filter_spec)
1153 if m:
1154 try:
1155 comparison_value = int(m.group('value'))
1156 except ValueError:
1157 comparison_value = parse_filesize(m.group('value'))
1158 if comparison_value is None:
1159 comparison_value = parse_filesize(m.group('value') + 'B')
1160 if comparison_value is None:
1161 raise ValueError(
1162 'Invalid value %r in format specification %r' % (
1163 m.group('value'), filter_spec))
1164 op = OPERATORS[m.group('op')]
1165
1166 if not m:
1167 STR_OPERATORS = {
1168 '=': operator.eq,
1169 '^=': lambda attr, value: attr.startswith(value),
1170 '$=': lambda attr, value: attr.endswith(value),
1171 '*=': lambda attr, value: value in attr,
1172 }
1173 str_operator_rex = re.compile(r'''(?x)
1174 \s*(?P<key>[a-zA-Z0-9._-]+)
1175 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1176 \s*(?P<value>[a-zA-Z0-9._-]+)
1177 \s*$
1178 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1179 m = str_operator_rex.search(filter_spec)
1180 if m:
1181 comparison_value = m.group('value')
1182 str_op = STR_OPERATORS[m.group('op')]
1183 if m.group('negation'):
1184 op = lambda attr, value: not str_op(attr, value)
1185 else:
1186 op = str_op
1187
1188 if not m:
1189 raise ValueError('Invalid filter specification %r' % filter_spec)
1190
1191 def _filter(f):
1192 actual_value = f.get(m.group('key'))
1193 if actual_value is None:
1194 return m.group('none_inclusive')
1195 return op(actual_value, comparison_value)
1196 return _filter
1197
1198 def _default_format_spec(self, info_dict, download=True):
1199
1200 def can_merge():
1201 merger = FFmpegMergerPP(self)
1202 return merger.available and merger.can_merge()
1203
1204 prefer_best = (
1205 not self.params.get('simulate', False)
1206 and download
1207 and (
1208 not can_merge()
1209 or info_dict.get('is_live', False)
1210 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1211
1212 return (
1213 'best/bestvideo+bestaudio'
1214 if prefer_best
1215 else 'bestvideo*+bestaudio/best'
1216 if not self.params.get('allow_multiple_audio_streams', False)
1217 else 'bestvideo+bestaudio/best')
1218
1219 def build_format_selector(self, format_spec):
1220 def syntax_error(note, start):
1221 message = (
1222 'Invalid format specification: '
1223 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1224 return SyntaxError(message)
1225
1226 PICKFIRST = 'PICKFIRST'
1227 MERGE = 'MERGE'
1228 SINGLE = 'SINGLE'
1229 GROUP = 'GROUP'
1230 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1231
1232 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1233 'video': self.params.get('allow_multiple_video_streams', False)}
1234
1235 def _parse_filter(tokens):
1236 filter_parts = []
1237 for type, string, start, _, _ in tokens:
1238 if type == tokenize.OP and string == ']':
1239 return ''.join(filter_parts)
1240 else:
1241 filter_parts.append(string)
1242
1243 def _remove_unused_ops(tokens):
1244 # Remove operators that we don't use and join them with the surrounding strings
1245 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1246 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1247 last_string, last_start, last_end, last_line = None, None, None, None
1248 for type, string, start, end, line in tokens:
1249 if type == tokenize.OP and string == '[':
1250 if last_string:
1251 yield tokenize.NAME, last_string, last_start, last_end, last_line
1252 last_string = None
1253 yield type, string, start, end, line
1254 # everything inside brackets will be handled by _parse_filter
1255 for type, string, start, end, line in tokens:
1256 yield type, string, start, end, line
1257 if type == tokenize.OP and string == ']':
1258 break
1259 elif type == tokenize.OP and string in ALLOWED_OPS:
1260 if last_string:
1261 yield tokenize.NAME, last_string, last_start, last_end, last_line
1262 last_string = None
1263 yield type, string, start, end, line
1264 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1265 if not last_string:
1266 last_string = string
1267 last_start = start
1268 last_end = end
1269 else:
1270 last_string += string
1271 if last_string:
1272 yield tokenize.NAME, last_string, last_start, last_end, last_line
1273
1274 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1275 selectors = []
1276 current_selector = None
1277 for type, string, start, _, _ in tokens:
1278 # ENCODING is only defined in python 3.x
1279 if type == getattr(tokenize, 'ENCODING', None):
1280 continue
1281 elif type in [tokenize.NAME, tokenize.NUMBER]:
1282 current_selector = FormatSelector(SINGLE, string, [])
1283 elif type == tokenize.OP:
1284 if string == ')':
1285 if not inside_group:
1286 # ')' will be handled by the parentheses group
1287 tokens.restore_last_token()
1288 break
1289 elif inside_merge and string in ['/', ',']:
1290 tokens.restore_last_token()
1291 break
1292 elif inside_choice and string == ',':
1293 tokens.restore_last_token()
1294 break
1295 elif string == ',':
1296 if not current_selector:
1297 raise syntax_error('"," must follow a format selector', start)
1298 selectors.append(current_selector)
1299 current_selector = None
1300 elif string == '/':
1301 if not current_selector:
1302 raise syntax_error('"/" must follow a format selector', start)
1303 first_choice = current_selector
1304 second_choice = _parse_format_selection(tokens, inside_choice=True)
1305 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1306 elif string == '[':
1307 if not current_selector:
1308 current_selector = FormatSelector(SINGLE, 'best', [])
1309 format_filter = _parse_filter(tokens)
1310 current_selector.filters.append(format_filter)
1311 elif string == '(':
1312 if current_selector:
1313 raise syntax_error('Unexpected "("', start)
1314 group = _parse_format_selection(tokens, inside_group=True)
1315 current_selector = FormatSelector(GROUP, group, [])
1316 elif string == '+':
1317 if not current_selector:
1318 raise syntax_error('Unexpected "+"', start)
1319 selector_1 = current_selector
1320 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1321 if not selector_2:
1322 raise syntax_error('Expected a selector', start)
1323 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1324 else:
1325 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1326 elif type == tokenize.ENDMARKER:
1327 break
1328 if current_selector:
1329 selectors.append(current_selector)
1330 return selectors
1331
1332 def _build_selector_function(selector):
1333 if isinstance(selector, list): # ,
1334 fs = [_build_selector_function(s) for s in selector]
1335
1336 def selector_function(ctx):
1337 for f in fs:
1338 for format in f(ctx):
1339 yield format
1340 return selector_function
1341
1342 elif selector.type == GROUP: # ()
1343 selector_function = _build_selector_function(selector.selector)
1344
1345 elif selector.type == PICKFIRST: # /
1346 fs = [_build_selector_function(s) for s in selector.selector]
1347
1348 def selector_function(ctx):
1349 for f in fs:
1350 picked_formats = list(f(ctx))
1351 if picked_formats:
1352 return picked_formats
1353 return []
1354
1355 elif selector.type == SINGLE: # atom
1356 format_spec = selector.selector if selector.selector is not None else 'best'
1357
1358 if format_spec == 'all':
1359 def selector_function(ctx):
1360 formats = list(ctx['formats'])
1361 if formats:
1362 for f in formats:
1363 yield f
1364
1365 else:
1366 format_fallback = False
1367 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1368 if format_spec_obj is not None:
1369 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1370 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1371 not_format_type = 'v' if format_type == 'a' else 'a'
1372 format_modified = format_spec_obj.group(3) is not None
1373
1374 format_fallback = not format_type and not format_modified # for b, w
1375 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1376 if format_type and format_modified # bv*, ba*, wv*, wa*
1377 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1378 if format_type # bv, ba, wv, wa
1379 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1380 if not format_modified # b, w
1381 else None) # b*, w*
1382 else:
1383 format_idx = -1
1384 filter_f = ((lambda f: f.get('ext') == format_spec)
1385 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1386 else (lambda f: f.get('format_id') == format_spec)) # id
1387
1388 def selector_function(ctx):
1389 formats = list(ctx['formats'])
1390 if not formats:
1391 return
1392 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1393 if matches:
1394 yield matches[format_idx]
1395 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1396 # for extractors with incomplete formats (audio only (soundcloud)
1397 # or video only (imgur)) best/worst will fallback to
1398 # best/worst {video,audio}-only format
1399 yield formats[format_idx]
1400
1401 elif selector.type == MERGE: # +
1402 def _merge(formats_pair):
1403 format_1, format_2 = formats_pair
1404
1405 formats_info = []
1406 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1407 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1408
1409 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1410 get_no_more = {"video": False, "audio": False}
1411 for (i, fmt_info) in enumerate(formats_info):
1412 for aud_vid in ["audio", "video"]:
1413 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1414 if get_no_more[aud_vid]:
1415 formats_info.pop(i)
1416 get_no_more[aud_vid] = True
1417
1418 if len(formats_info) == 1:
1419 return formats_info[0]
1420
1421 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1422 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1423
1424 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1425 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1426
1427 output_ext = self.params.get('merge_output_format')
1428 if not output_ext:
1429 if the_only_video:
1430 output_ext = the_only_video['ext']
1431 elif the_only_audio and not video_fmts:
1432 output_ext = the_only_audio['ext']
1433 else:
1434 output_ext = 'mkv'
1435
1436 new_dict = {
1437 'requested_formats': formats_info,
1438 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1439 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1440 'ext': output_ext,
1441 }
1442
1443 if the_only_video:
1444 new_dict.update({
1445 'width': the_only_video.get('width'),
1446 'height': the_only_video.get('height'),
1447 'resolution': the_only_video.get('resolution'),
1448 'fps': the_only_video.get('fps'),
1449 'vcodec': the_only_video.get('vcodec'),
1450 'vbr': the_only_video.get('vbr'),
1451 'stretched_ratio': the_only_video.get('stretched_ratio'),
1452 })
1453
1454 if the_only_audio:
1455 new_dict.update({
1456 'acodec': the_only_audio.get('acodec'),
1457 'abr': the_only_audio.get('abr'),
1458 })
1459
1460 return new_dict
1461
1462 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1463
1464 def selector_function(ctx):
1465 for pair in itertools.product(
1466 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1467 yield _merge(pair)
1468
1469 filters = [self._build_format_filter(f) for f in selector.filters]
1470
1471 def final_selector(ctx):
1472 ctx_copy = copy.deepcopy(ctx)
1473 for _filter in filters:
1474 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1475 return selector_function(ctx_copy)
1476 return final_selector
1477
1478 stream = io.BytesIO(format_spec.encode('utf-8'))
1479 try:
1480 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1481 except tokenize.TokenError:
1482 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1483
1484 class TokenIterator(object):
1485 def __init__(self, tokens):
1486 self.tokens = tokens
1487 self.counter = 0
1488
1489 def __iter__(self):
1490 return self
1491
1492 def __next__(self):
1493 if self.counter >= len(self.tokens):
1494 raise StopIteration()
1495 value = self.tokens[self.counter]
1496 self.counter += 1
1497 return value
1498
1499 next = __next__
1500
1501 def restore_last_token(self):
1502 self.counter -= 1
1503
1504 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1505 return _build_selector_function(parsed_selector)
1506
1507 def _calc_headers(self, info_dict):
1508 res = std_headers.copy()
1509
1510 add_headers = info_dict.get('http_headers')
1511 if add_headers:
1512 res.update(add_headers)
1513
1514 cookies = self._calc_cookies(info_dict)
1515 if cookies:
1516 res['Cookie'] = cookies
1517
1518 if 'X-Forwarded-For' not in res:
1519 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1520 if x_forwarded_for_ip:
1521 res['X-Forwarded-For'] = x_forwarded_for_ip
1522
1523 return res
1524
1525 def _calc_cookies(self, info_dict):
1526 pr = sanitized_Request(info_dict['url'])
1527 self.cookiejar.add_cookie_header(pr)
1528 return pr.get_header('Cookie')
1529
1530 def process_video_result(self, info_dict, download=True):
1531 assert info_dict.get('_type', 'video') == 'video'
1532
1533 if 'id' not in info_dict:
1534 raise ExtractorError('Missing "id" field in extractor result')
1535 if 'title' not in info_dict:
1536 raise ExtractorError('Missing "title" field in extractor result')
1537
1538 def report_force_conversion(field, field_not, conversion):
1539 self.report_warning(
1540 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1541 % (field, field_not, conversion))
1542
1543 def sanitize_string_field(info, string_field):
1544 field = info.get(string_field)
1545 if field is None or isinstance(field, compat_str):
1546 return
1547 report_force_conversion(string_field, 'a string', 'string')
1548 info[string_field] = compat_str(field)
1549
1550 def sanitize_numeric_fields(info):
1551 for numeric_field in self._NUMERIC_FIELDS:
1552 field = info.get(numeric_field)
1553 if field is None or isinstance(field, compat_numeric_types):
1554 continue
1555 report_force_conversion(numeric_field, 'numeric', 'int')
1556 info[numeric_field] = int_or_none(field)
1557
1558 sanitize_string_field(info_dict, 'id')
1559 sanitize_numeric_fields(info_dict)
1560
1561 if 'playlist' not in info_dict:
1562 # It isn't part of a playlist
1563 info_dict['playlist'] = None
1564 info_dict['playlist_index'] = None
1565
1566 thumbnails = info_dict.get('thumbnails')
1567 if thumbnails is None:
1568 thumbnail = info_dict.get('thumbnail')
1569 if thumbnail:
1570 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1571 if thumbnails:
1572 thumbnails.sort(key=lambda t: (
1573 t.get('preference') if t.get('preference') is not None else -1,
1574 t.get('width') if t.get('width') is not None else -1,
1575 t.get('height') if t.get('height') is not None else -1,
1576 t.get('id') if t.get('id') is not None else '', t.get('url')))
1577 for i, t in enumerate(thumbnails):
1578 t['url'] = sanitize_url(t['url'])
1579 if t.get('width') and t.get('height'):
1580 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1581 if t.get('id') is None:
1582 t['id'] = '%d' % i
1583
1584 if self.params.get('list_thumbnails'):
1585 self.list_thumbnails(info_dict)
1586 return
1587
1588 thumbnail = info_dict.get('thumbnail')
1589 if thumbnail:
1590 info_dict['thumbnail'] = sanitize_url(thumbnail)
1591 elif thumbnails:
1592 info_dict['thumbnail'] = thumbnails[-1]['url']
1593
1594 if 'display_id' not in info_dict and 'id' in info_dict:
1595 info_dict['display_id'] = info_dict['id']
1596
1597 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1598 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1599 # see http://bugs.python.org/issue1646728)
1600 try:
1601 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1602 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1603 except (ValueError, OverflowError, OSError):
1604 pass
1605
1606 # Auto generate title fields corresponding to the *_number fields when missing
1607 # in order to always have clean titles. This is very common for TV series.
1608 for field in ('chapter', 'season', 'episode'):
1609 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1610 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1611
1612 for cc_kind in ('subtitles', 'automatic_captions'):
1613 cc = info_dict.get(cc_kind)
1614 if cc:
1615 for _, subtitle in cc.items():
1616 for subtitle_format in subtitle:
1617 if subtitle_format.get('url'):
1618 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1619 if subtitle_format.get('ext') is None:
1620 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1621
1622 automatic_captions = info_dict.get('automatic_captions')
1623 subtitles = info_dict.get('subtitles')
1624
1625 if self.params.get('listsubtitles', False):
1626 if 'automatic_captions' in info_dict:
1627 self.list_subtitles(
1628 info_dict['id'], automatic_captions, 'automatic captions')
1629 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1630 return
1631
1632 info_dict['requested_subtitles'] = self.process_subtitles(
1633 info_dict['id'], subtitles, automatic_captions)
1634
1635 # We now pick which formats have to be downloaded
1636 if info_dict.get('formats') is None:
1637 # There's only one format available
1638 formats = [info_dict]
1639 else:
1640 formats = info_dict['formats']
1641
1642 if not formats:
1643 raise ExtractorError('No video formats found!')
1644
1645 def is_wellformed(f):
1646 url = f.get('url')
1647 if not url:
1648 self.report_warning(
1649 '"url" field is missing or empty - skipping format, '
1650 'there is an error in extractor')
1651 return False
1652 if isinstance(url, bytes):
1653 sanitize_string_field(f, 'url')
1654 return True
1655
1656 # Filter out malformed formats for better extraction robustness
1657 formats = list(filter(is_wellformed, formats))
1658
1659 formats_dict = {}
1660
1661 # We check that all the formats have the format and format_id fields
1662 for i, format in enumerate(formats):
1663 sanitize_string_field(format, 'format_id')
1664 sanitize_numeric_fields(format)
1665 format['url'] = sanitize_url(format['url'])
1666 if not format.get('format_id'):
1667 format['format_id'] = compat_str(i)
1668 else:
1669 # Sanitize format_id from characters used in format selector expression
1670 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1671 format_id = format['format_id']
1672 if format_id not in formats_dict:
1673 formats_dict[format_id] = []
1674 formats_dict[format_id].append(format)
1675
1676 # Make sure all formats have unique format_id
1677 for format_id, ambiguous_formats in formats_dict.items():
1678 if len(ambiguous_formats) > 1:
1679 for i, format in enumerate(ambiguous_formats):
1680 format['format_id'] = '%s-%d' % (format_id, i)
1681
1682 for i, format in enumerate(formats):
1683 if format.get('format') is None:
1684 format['format'] = '{id} - {res}{note}'.format(
1685 id=format['format_id'],
1686 res=self.format_resolution(format),
1687 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1688 )
1689 # Automatically determine file extension if missing
1690 if format.get('ext') is None:
1691 format['ext'] = determine_ext(format['url']).lower()
1692 # Automatically determine protocol if missing (useful for format
1693 # selection purposes)
1694 if format.get('protocol') is None:
1695 format['protocol'] = determine_protocol(format)
1696 # Add HTTP headers, so that external programs can use them from the
1697 # json output
1698 full_format_info = info_dict.copy()
1699 full_format_info.update(format)
1700 format['http_headers'] = self._calc_headers(full_format_info)
1701 # Remove private housekeeping stuff
1702 if '__x_forwarded_for_ip' in info_dict:
1703 del info_dict['__x_forwarded_for_ip']
1704
1705 # TODO Central sorting goes here
1706
1707 if formats[0] is not info_dict:
1708 # only set the 'formats' fields if the original info_dict list them
1709 # otherwise we end up with a circular reference, the first (and unique)
1710 # element in the 'formats' field in info_dict is info_dict itself,
1711 # which can't be exported to json
1712 info_dict['formats'] = formats
1713 if self.params.get('listformats'):
1714 self.list_formats(info_dict)
1715 return
1716
1717 req_format = self.params.get('format')
1718 if req_format is None:
1719 req_format = self._default_format_spec(info_dict, download=download)
1720 if self.params.get('verbose'):
1721 self._write_string('[debug] Default format spec: %s\n' % req_format)
1722
1723 format_selector = self.build_format_selector(req_format)
1724
1725 # While in format selection we may need to have an access to the original
1726 # format set in order to calculate some metrics or do some processing.
1727 # For now we need to be able to guess whether original formats provided
1728 # by extractor are incomplete or not (i.e. whether extractor provides only
1729 # video-only or audio-only formats) for proper formats selection for
1730 # extractors with such incomplete formats (see
1731 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1732 # Since formats may be filtered during format selection and may not match
1733 # the original formats the results may be incorrect. Thus original formats
1734 # or pre-calculated metrics should be passed to format selection routines
1735 # as well.
1736 # We will pass a context object containing all necessary additional data
1737 # instead of just formats.
1738 # This fixes incorrect format selection issue (see
1739 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1740 incomplete_formats = (
1741 # All formats are video-only or
1742 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1743 # all formats are audio-only
1744 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1745
1746 ctx = {
1747 'formats': formats,
1748 'incomplete_formats': incomplete_formats,
1749 }
1750
1751 formats_to_download = list(format_selector(ctx))
1752 if not formats_to_download:
1753 raise ExtractorError('requested format not available',
1754 expected=True)
1755
1756 if download:
1757 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1758 if len(formats_to_download) > 1:
1759 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1760 for format in formats_to_download:
1761 new_info = dict(info_dict)
1762 new_info.update(format)
1763 self.process_info(new_info)
1764 # We update the info dict with the best quality format (backwards compatibility)
1765 info_dict.update(formats_to_download[-1])
1766 return info_dict
1767
1768 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1769 """Select the requested subtitles and their format"""
1770 available_subs = {}
1771 if normal_subtitles and self.params.get('writesubtitles'):
1772 available_subs.update(normal_subtitles)
1773 if automatic_captions and self.params.get('writeautomaticsub'):
1774 for lang, cap_info in automatic_captions.items():
1775 if lang not in available_subs:
1776 available_subs[lang] = cap_info
1777
1778 if (not self.params.get('writesubtitles') and not
1779 self.params.get('writeautomaticsub') or not
1780 available_subs):
1781 return None
1782
1783 if self.params.get('allsubtitles', False):
1784 requested_langs = available_subs.keys()
1785 else:
1786 if self.params.get('subtitleslangs', False):
1787 requested_langs = self.params.get('subtitleslangs')
1788 elif 'en' in available_subs:
1789 requested_langs = ['en']
1790 else:
1791 requested_langs = [list(available_subs.keys())[0]]
1792
1793 formats_query = self.params.get('subtitlesformat', 'best')
1794 formats_preference = formats_query.split('/') if formats_query else []
1795 subs = {}
1796 for lang in requested_langs:
1797 formats = available_subs.get(lang)
1798 if formats is None:
1799 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1800 continue
1801 for ext in formats_preference:
1802 if ext == 'best':
1803 f = formats[-1]
1804 break
1805 matches = list(filter(lambda f: f['ext'] == ext, formats))
1806 if matches:
1807 f = matches[-1]
1808 break
1809 else:
1810 f = formats[-1]
1811 self.report_warning(
1812 'No subtitle format found matching "%s" for language %s, '
1813 'using %s' % (formats_query, lang, f['ext']))
1814 subs[lang] = f
1815 return subs
1816
1817 def __forced_printings(self, info_dict, filename, incomplete):
1818 def print_mandatory(field):
1819 if (self.params.get('force%s' % field, False)
1820 and (not incomplete or info_dict.get(field) is not None)):
1821 self.to_stdout(info_dict[field])
1822
1823 def print_optional(field):
1824 if (self.params.get('force%s' % field, False)
1825 and info_dict.get(field) is not None):
1826 self.to_stdout(info_dict[field])
1827
1828 print_mandatory('title')
1829 print_mandatory('id')
1830 if self.params.get('forceurl', False) and not incomplete:
1831 if info_dict.get('requested_formats') is not None:
1832 for f in info_dict['requested_formats']:
1833 self.to_stdout(f['url'] + f.get('play_path', ''))
1834 else:
1835 # For RTMP URLs, also include the playpath
1836 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1837 print_optional('thumbnail')
1838 print_optional('description')
1839 if self.params.get('forcefilename', False) and filename is not None:
1840 self.to_stdout(filename)
1841 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1842 self.to_stdout(formatSeconds(info_dict['duration']))
1843 print_mandatory('format')
1844 if self.params.get('forcejson', False):
1845 self.to_stdout(json.dumps(info_dict))
1846
1847 def process_info(self, info_dict):
1848 """Process a single resolved IE result."""
1849
1850 assert info_dict.get('_type', 'video') == 'video'
1851
1852 max_downloads = self.params.get('max_downloads')
1853 if max_downloads is not None:
1854 if self._num_downloads >= int(max_downloads):
1855 raise MaxDownloadsReached()
1856
1857 # TODO: backward compatibility, to be removed
1858 info_dict['fulltitle'] = info_dict['title']
1859
1860 if 'format' not in info_dict:
1861 info_dict['format'] = info_dict['ext']
1862
1863 reason = self._match_entry(info_dict, incomplete=False)
1864 if reason is not None:
1865 self.to_screen('[download] ' + reason)
1866 return
1867
1868 self._num_downloads += 1
1869
1870 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1871
1872 # Forced printings
1873 self.__forced_printings(info_dict, filename, incomplete=False)
1874
1875 if self.params.get('simulate', False):
1876 if self.params.get('force_write_download_archive', False):
1877 self.record_download_archive(info_dict)
1878
1879 # Do nothing else if in simulate mode
1880 return
1881
1882 if filename is None:
1883 return
1884
1885 def ensure_dir_exists(path):
1886 try:
1887 dn = os.path.dirname(path)
1888 if dn and not os.path.exists(dn):
1889 os.makedirs(dn)
1890 return True
1891 except (OSError, IOError) as err:
1892 self.report_error('unable to create directory ' + error_to_compat_str(err))
1893 return False
1894
1895 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1896 return
1897
1898 if self.params.get('writedescription', False):
1899 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1901 self.to_screen('[info] Video description is already present')
1902 elif info_dict.get('description') is None:
1903 self.report_warning('There\'s no description to write.')
1904 else:
1905 try:
1906 self.to_screen('[info] Writing video description to: ' + descfn)
1907 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1908 descfile.write(info_dict['description'])
1909 except (OSError, IOError):
1910 self.report_error('Cannot write description file ' + descfn)
1911 return
1912
1913 if self.params.get('writeannotations', False):
1914 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1915 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1916 self.to_screen('[info] Video annotations are already present')
1917 elif not info_dict.get('annotations'):
1918 self.report_warning('There are no annotations to write.')
1919 else:
1920 try:
1921 self.to_screen('[info] Writing video annotations to: ' + annofn)
1922 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1923 annofile.write(info_dict['annotations'])
1924 except (KeyError, TypeError):
1925 self.report_warning('There are no annotations to write.')
1926 except (OSError, IOError):
1927 self.report_error('Cannot write annotations file: ' + annofn)
1928 return
1929
1930 def dl(name, info, subtitle=False):
1931 fd = get_suitable_downloader(info, self.params)(self, self.params)
1932 for ph in self._progress_hooks:
1933 fd.add_progress_hook(ph)
1934 if self.params.get('verbose'):
1935 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1936 return fd.download(name, info, subtitle)
1937
1938 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1939 self.params.get('writeautomaticsub')])
1940
1941 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1942 # subtitles download errors are already managed as troubles in relevant IE
1943 # that way it will silently go on when used with unsupporting IE
1944 subtitles = info_dict['requested_subtitles']
1945 # ie = self.get_info_extractor(info_dict['extractor_key'])
1946 for sub_lang, sub_info in subtitles.items():
1947 sub_format = sub_info['ext']
1948 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1949 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1950 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1951 else:
1952 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1953 if sub_info.get('data') is not None:
1954 try:
1955 # Use newline='' to prevent conversion of newline characters
1956 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1957 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1958 subfile.write(sub_info['data'])
1959 except (OSError, IOError):
1960 self.report_error('Cannot write subtitles file ' + sub_filename)
1961 return
1962 else:
1963 try:
1964 dl(sub_filename, sub_info, subtitle=True)
1965 '''
1966 if self.params.get('sleep_interval_subtitles', False):
1967 dl(sub_filename, sub_info)
1968 else:
1969 sub_data = ie._request_webpage(
1970 sub_info['url'], info_dict['id'], note=False).read()
1971 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1972 subfile.write(sub_data)
1973 '''
1974 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1975 self.report_warning('Unable to download subtitle for "%s": %s' %
1976 (sub_lang, error_to_compat_str(err)))
1977 continue
1978
1979 if self.params.get('skip_download', False):
1980 if self.params.get('convertsubtitles', False):
1981 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1982 filename_real_ext = os.path.splitext(filename)[1][1:]
1983 filename_wo_ext = (
1984 os.path.splitext(filename)[0]
1985 if filename_real_ext == info_dict['ext']
1986 else filename)
1987 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1988 if subconv.available:
1989 info_dict.setdefault('__postprocessors', [])
1990 # info_dict['__postprocessors'].append(subconv)
1991 if os.path.exists(encodeFilename(afilename)):
1992 self.to_screen(
1993 '[download] %s has already been downloaded and '
1994 'converted' % afilename)
1995 else:
1996 try:
1997 self.post_process(filename, info_dict)
1998 except (PostProcessingError) as err:
1999 self.report_error('postprocessing: %s' % str(err))
2000 return
2001
2002 if self.params.get('writeinfojson', False):
2003 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2004 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
2005 self.to_screen('[info] Video description metadata is already present')
2006 else:
2007 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2008 try:
2009 write_json_file(self.filter_requested_info(info_dict), infofn)
2010 except (OSError, IOError):
2011 self.report_error('Cannot write metadata to JSON file ' + infofn)
2012 return
2013
2014 self._write_thumbnails(info_dict, filename)
2015
2016 # Write internet shortcut files
2017 url_link = webloc_link = desktop_link = False
2018 if self.params.get('writelink', False):
2019 if sys.platform == "darwin": # macOS.
2020 webloc_link = True
2021 elif sys.platform.startswith("linux"):
2022 desktop_link = True
2023 else: # if sys.platform in ['win32', 'cygwin']:
2024 url_link = True
2025 if self.params.get('writeurllink', False):
2026 url_link = True
2027 if self.params.get('writewebloclink', False):
2028 webloc_link = True
2029 if self.params.get('writedesktoplink', False):
2030 desktop_link = True
2031
2032 if url_link or webloc_link or desktop_link:
2033 if 'webpage_url' not in info_dict:
2034 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2035 return
2036 ascii_url = iri_to_uri(info_dict['webpage_url'])
2037
2038 def _write_link_file(extension, template, newline, embed_filename):
2039 linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2040 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2041 self.to_screen('[info] Internet shortcut is already present')
2042 else:
2043 try:
2044 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2045 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2046 template_vars = {'url': ascii_url}
2047 if embed_filename:
2048 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2049 linkfile.write(template % template_vars)
2050 except (OSError, IOError):
2051 self.report_error('Cannot write internet shortcut ' + linkfn)
2052 return False
2053 return True
2054
2055 if url_link:
2056 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2057 return
2058 if webloc_link:
2059 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2060 return
2061 if desktop_link:
2062 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2063 return
2064
2065 # Download
2066 must_record_download_archive = False
2067 if not self.params.get('skip_download', False):
2068 try:
2069 if info_dict.get('requested_formats') is not None:
2070 downloaded = []
2071 success = True
2072 merger = FFmpegMergerPP(self)
2073 if not merger.available:
2074 postprocessors = []
2075 self.report_warning('You have requested multiple '
2076 'formats but ffmpeg or avconv are not installed.'
2077 ' The formats won\'t be merged.')
2078 else:
2079 postprocessors = [merger]
2080
2081 def compatible_formats(formats):
2082 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2083 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2084 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2085 if len(video_formats) > 2 or len(audio_formats) > 2:
2086 return False
2087
2088 # Check extension
2089 exts = set(format.get('ext') for format in formats)
2090 COMPATIBLE_EXTS = (
2091 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2092 set(('webm',)),
2093 )
2094 for ext_sets in COMPATIBLE_EXTS:
2095 if ext_sets.issuperset(exts):
2096 return True
2097 # TODO: Check acodec/vcodec
2098 return False
2099
2100 filename_real_ext = os.path.splitext(filename)[1][1:]
2101 filename_wo_ext = (
2102 os.path.splitext(filename)[0]
2103 if filename_real_ext == info_dict['ext']
2104 else filename)
2105 requested_formats = info_dict['requested_formats']
2106 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2107 info_dict['ext'] = 'mkv'
2108 self.report_warning(
2109 'Requested formats are incompatible for merge and will be merged into mkv.')
2110 # Ensure filename always has a correct extension for successful merge
2111 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2112 if os.path.exists(encodeFilename(filename)):
2113 self.to_screen(
2114 '[download] %s has already been downloaded and '
2115 'merged' % filename)
2116 else:
2117 for f in requested_formats:
2118 new_info = dict(info_dict)
2119 new_info.update(f)
2120 fname = prepend_extension(
2121 self.prepare_filename(new_info),
2122 'f%s' % f['format_id'], new_info['ext'])
2123 if not ensure_dir_exists(fname):
2124 return
2125 downloaded.append(fname)
2126 partial_success, real_download = dl(fname, new_info)
2127 success = success and partial_success
2128 info_dict['__postprocessors'] = postprocessors
2129 info_dict['__files_to_merge'] = downloaded
2130 # Even if there were no downloads, it is being merged only now
2131 info_dict['__real_download'] = True
2132 else:
2133 # Just a single file
2134 success, real_download = dl(filename, info_dict)
2135 info_dict['__real_download'] = real_download
2136 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2137 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2138 return
2139 except (OSError, IOError) as err:
2140 raise UnavailableVideoError(err)
2141 except (ContentTooShortError, ) as err:
2142 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2143 return
2144
2145 if success and filename != '-':
2146 # Fixup content
2147 fixup_policy = self.params.get('fixup')
2148 if fixup_policy is None:
2149 fixup_policy = 'detect_or_warn'
2150
2151 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2152
2153 stretched_ratio = info_dict.get('stretched_ratio')
2154 if stretched_ratio is not None and stretched_ratio != 1:
2155 if fixup_policy == 'warn':
2156 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2157 info_dict['id'], stretched_ratio))
2158 elif fixup_policy == 'detect_or_warn':
2159 stretched_pp = FFmpegFixupStretchedPP(self)
2160 if stretched_pp.available:
2161 info_dict.setdefault('__postprocessors', [])
2162 info_dict['__postprocessors'].append(stretched_pp)
2163 else:
2164 self.report_warning(
2165 '%s: Non-uniform pixel ratio (%s). %s'
2166 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2167 else:
2168 assert fixup_policy in ('ignore', 'never')
2169
2170 if (info_dict.get('requested_formats') is None
2171 and info_dict.get('container') == 'm4a_dash'):
2172 if fixup_policy == 'warn':
2173 self.report_warning(
2174 '%s: writing DASH m4a. '
2175 'Only some players support this container.'
2176 % info_dict['id'])
2177 elif fixup_policy == 'detect_or_warn':
2178 fixup_pp = FFmpegFixupM4aPP(self)
2179 if fixup_pp.available:
2180 info_dict.setdefault('__postprocessors', [])
2181 info_dict['__postprocessors'].append(fixup_pp)
2182 else:
2183 self.report_warning(
2184 '%s: writing DASH m4a. '
2185 'Only some players support this container. %s'
2186 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2187 else:
2188 assert fixup_policy in ('ignore', 'never')
2189
2190 if (info_dict.get('protocol') == 'm3u8_native'
2191 or info_dict.get('protocol') == 'm3u8'
2192 and self.params.get('hls_prefer_native')):
2193 if fixup_policy == 'warn':
2194 self.report_warning('%s: malformed AAC bitstream detected.' % (
2195 info_dict['id']))
2196 elif fixup_policy == 'detect_or_warn':
2197 fixup_pp = FFmpegFixupM3u8PP(self)
2198 if fixup_pp.available:
2199 info_dict.setdefault('__postprocessors', [])
2200 info_dict['__postprocessors'].append(fixup_pp)
2201 else:
2202 self.report_warning(
2203 '%s: malformed AAC bitstream detected. %s'
2204 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2205 else:
2206 assert fixup_policy in ('ignore', 'never')
2207
2208 try:
2209 self.post_process(filename, info_dict)
2210 except (PostProcessingError) as err:
2211 self.report_error('postprocessing: %s' % str(err))
2212 return
2213 try:
2214 for ph in self._post_hooks:
2215 ph(filename)
2216 except Exception as err:
2217 self.report_error('post hooks: %s' % str(err))
2218 return
2219 must_record_download_archive = True
2220
2221 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2222 self.record_download_archive(info_dict)
2223 max_downloads = self.params.get('max_downloads')
2224 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2225 raise MaxDownloadsReached()
2226
2227 def download(self, url_list):
2228 """Download a given list of URLs."""
2229 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2230 if (len(url_list) > 1
2231 and outtmpl != '-'
2232 and '%' not in outtmpl
2233 and self.params.get('max_downloads') != 1):
2234 raise SameFileError(outtmpl)
2235
2236 for url in url_list:
2237 try:
2238 # It also downloads the videos
2239 res = self.extract_info(
2240 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2241 except UnavailableVideoError:
2242 self.report_error('unable to download video')
2243 except MaxDownloadsReached:
2244 self.to_screen('[info] Maximum number of downloaded files reached.')
2245 raise
2246 else:
2247 if self.params.get('dump_single_json', False):
2248 self.to_stdout(json.dumps(res))
2249
2250 return self._download_retcode
2251
2252 def download_with_info_file(self, info_filename):
2253 with contextlib.closing(fileinput.FileInput(
2254 [info_filename], mode='r',
2255 openhook=fileinput.hook_encoded('utf-8'))) as f:
2256 # FileInput doesn't have a read method, we can't call json.load
2257 info = self.filter_requested_info(json.loads('\n'.join(f)))
2258 try:
2259 self.process_ie_result(info, download=True)
2260 except DownloadError:
2261 webpage_url = info.get('webpage_url')
2262 if webpage_url is not None:
2263 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2264 return self.download([webpage_url])
2265 else:
2266 raise
2267 return self._download_retcode
2268
2269 @staticmethod
2270 def filter_requested_info(info_dict):
2271 return dict(
2272 (k, v) for k, v in info_dict.items()
2273 if k not in ['requested_formats', 'requested_subtitles'])
2274
2275 def post_process(self, filename, ie_info):
2276 """Run all the postprocessors on the given file."""
2277 info = dict(ie_info)
2278 info['filepath'] = filename
2279 pps_chain = []
2280 if ie_info.get('__postprocessors') is not None:
2281 pps_chain.extend(ie_info['__postprocessors'])
2282 pps_chain.extend(self._pps)
2283 for pp in pps_chain:
2284 files_to_delete = []
2285 try:
2286 files_to_delete, info = pp.run(info)
2287 except PostProcessingError as e:
2288 self.report_error(e.msg)
2289 if files_to_delete and not self.params.get('keepvideo', False):
2290 for old_filename in set(files_to_delete):
2291 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2292 try:
2293 os.remove(encodeFilename(old_filename))
2294 except (IOError, OSError):
2295 self.report_warning('Unable to remove downloaded original file')
2296
2297 def _make_archive_id(self, info_dict):
2298 video_id = info_dict.get('id')
2299 if not video_id:
2300 return
2301 # Future-proof against any change in case
2302 # and backwards compatibility with prior versions
2303 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2304 if extractor is None:
2305 url = str_or_none(info_dict.get('url'))
2306 if not url:
2307 return
2308 # Try to find matching extractor for the URL and take its ie_key
2309 for ie in self._ies:
2310 if ie.suitable(url):
2311 extractor = ie.ie_key()
2312 break
2313 else:
2314 return
2315 return extractor.lower() + ' ' + video_id
2316
2317 def in_download_archive(self, info_dict):
2318 fn = self.params.get('download_archive')
2319 if fn is None:
2320 return False
2321
2322 vid_id = self._make_archive_id(info_dict)
2323 if not vid_id:
2324 return False # Incomplete video information
2325
2326 return vid_id in self.archive
2327
2328 def record_download_archive(self, info_dict):
2329 fn = self.params.get('download_archive')
2330 if fn is None:
2331 return
2332 vid_id = self._make_archive_id(info_dict)
2333 assert vid_id
2334 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2335 archive_file.write(vid_id + '\n')
2336 self.archive.add(vid_id)
2337
2338 @staticmethod
2339 def format_resolution(format, default='unknown'):
2340 if format.get('vcodec') == 'none':
2341 return 'audio only'
2342 if format.get('resolution') is not None:
2343 return format['resolution']
2344 if format.get('height') is not None:
2345 if format.get('width') is not None:
2346 res = '%sx%s' % (format['width'], format['height'])
2347 else:
2348 res = '%sp' % format['height']
2349 elif format.get('width') is not None:
2350 res = '%dx?' % format['width']
2351 else:
2352 res = default
2353 return res
2354
2355 def _format_note(self, fdict):
2356 res = ''
2357 if fdict.get('ext') in ['f4f', 'f4m']:
2358 res += '(unsupported) '
2359 if fdict.get('language'):
2360 if res:
2361 res += ' '
2362 res += '[%s] ' % fdict['language']
2363 if fdict.get('format_note') is not None:
2364 res += fdict['format_note'] + ' '
2365 if fdict.get('tbr') is not None:
2366 res += '%4dk ' % fdict['tbr']
2367 if fdict.get('container') is not None:
2368 if res:
2369 res += ', '
2370 res += '%s container' % fdict['container']
2371 if (fdict.get('vcodec') is not None
2372 and fdict.get('vcodec') != 'none'):
2373 if res:
2374 res += ', '
2375 res += fdict['vcodec']
2376 if fdict.get('vbr') is not None:
2377 res += '@'
2378 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2379 res += 'video@'
2380 if fdict.get('vbr') is not None:
2381 res += '%4dk' % fdict['vbr']
2382 if fdict.get('fps') is not None:
2383 if res:
2384 res += ', '
2385 res += '%sfps' % fdict['fps']
2386 if fdict.get('acodec') is not None:
2387 if res:
2388 res += ', '
2389 if fdict['acodec'] == 'none':
2390 res += 'video only'
2391 else:
2392 res += '%-5s' % fdict['acodec']
2393 elif fdict.get('abr') is not None:
2394 if res:
2395 res += ', '
2396 res += 'audio'
2397 if fdict.get('abr') is not None:
2398 res += '@%3dk' % fdict['abr']
2399 if fdict.get('asr') is not None:
2400 res += ' (%5dHz)' % fdict['asr']
2401 if fdict.get('filesize') is not None:
2402 if res:
2403 res += ', '
2404 res += format_bytes(fdict['filesize'])
2405 elif fdict.get('filesize_approx') is not None:
2406 if res:
2407 res += ', '
2408 res += '~' + format_bytes(fdict['filesize_approx'])
2409 return res
2410
2411 def _format_note_table(self, f):
2412 def join_fields(*vargs):
2413 return ', '.join((val for val in vargs if val != ''))
2414
2415 return join_fields(
2416 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2417 format_field(f, 'language', '[%s]'),
2418 format_field(f, 'format_note'),
2419 format_field(f, 'container', ignore=(None, f.get('ext'))),
2420 format_field(f, 'asr', '%5dHz'))
2421
2422 def list_formats(self, info_dict):
2423 formats = info_dict.get('formats', [info_dict])
2424 new_format = self.params.get('listformats_table', False)
2425 if new_format:
2426 table = [
2427 [
2428 format_field(f, 'format_id'),
2429 format_field(f, 'ext'),
2430 self.format_resolution(f),
2431 format_field(f, 'fps', '%d'),
2432 '|',
2433 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2434 format_field(f, 'tbr', '%4dk'),
2435 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2436 '|',
2437 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2438 format_field(f, 'vbr', '%4dk'),
2439 format_field(f, 'acodec', default='unknown').replace('none', ''),
2440 format_field(f, 'abr', '%3dk'),
2441 format_field(f, 'asr', '%5dHz'),
2442 self._format_note_table(f)]
2443 for f in formats
2444 if f.get('preference') is None or f['preference'] >= -1000]
2445 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2446 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2447 else:
2448 table = [
2449 [
2450 format_field(f, 'format_id'),
2451 format_field(f, 'ext'),
2452 self.format_resolution(f),
2453 self._format_note(f)]
2454 for f in formats
2455 if f.get('preference') is None or f['preference'] >= -1000]
2456 header_line = ['format code', 'extension', 'resolution', 'note']
2457
2458 # if len(formats) > 1:
2459 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2460 self.to_screen(
2461 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2462 header_line,
2463 table,
2464 delim=new_format,
2465 extraGap=(0 if new_format else 1),
2466 hideEmpty=new_format)))
2467
2468 def list_thumbnails(self, info_dict):
2469 thumbnails = info_dict.get('thumbnails')
2470 if not thumbnails:
2471 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2472 return
2473
2474 self.to_screen(
2475 '[info] Thumbnails for %s:' % info_dict['id'])
2476 self.to_screen(render_table(
2477 ['ID', 'width', 'height', 'URL'],
2478 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2479
2480 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2481 if not subtitles:
2482 self.to_screen('%s has no %s' % (video_id, name))
2483 return
2484 self.to_screen(
2485 'Available %s for %s:' % (name, video_id))
2486 self.to_screen(render_table(
2487 ['Language', 'formats'],
2488 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2489 for lang, formats in subtitles.items()]))
2490
2491 def urlopen(self, req):
2492 """ Start an HTTP download """
2493 if isinstance(req, compat_basestring):
2494 req = sanitized_Request(req)
2495 return self._opener.open(req, timeout=self._socket_timeout)
2496
2497 def print_debug_header(self):
2498 if not self.params.get('verbose'):
2499 return
2500
2501 if type('') is not compat_str:
2502 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2503 self.report_warning(
2504 'Your Python is broken! Update to a newer and supported version')
2505
2506 stdout_encoding = getattr(
2507 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2508 encoding_str = (
2509 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2510 locale.getpreferredencoding(),
2511 sys.getfilesystemencoding(),
2512 stdout_encoding,
2513 self.get_encoding()))
2514 write_string(encoding_str, encoding=None)
2515
2516 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2517 if _LAZY_LOADER:
2518 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2519 try:
2520 sp = subprocess.Popen(
2521 ['git', 'rev-parse', '--short', 'HEAD'],
2522 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2523 cwd=os.path.dirname(os.path.abspath(__file__)))
2524 out, err = sp.communicate()
2525 out = out.decode().strip()
2526 if re.match('[0-9a-f]+', out):
2527 self._write_string('[debug] Git HEAD: ' + out + '\n')
2528 except Exception:
2529 try:
2530 sys.exc_clear()
2531 except Exception:
2532 pass
2533
2534 def python_implementation():
2535 impl_name = platform.python_implementation()
2536 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2537 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2538 return impl_name
2539
2540 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2541 platform.python_version(), python_implementation(),
2542 platform_name()))
2543
2544 exe_versions = FFmpegPostProcessor.get_versions(self)
2545 exe_versions['rtmpdump'] = rtmpdump_version()
2546 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2547 exe_str = ', '.join(
2548 '%s %s' % (exe, v)
2549 for exe, v in sorted(exe_versions.items())
2550 if v
2551 )
2552 if not exe_str:
2553 exe_str = 'none'
2554 self._write_string('[debug] exe versions: %s\n' % exe_str)
2555
2556 proxy_map = {}
2557 for handler in self._opener.handlers:
2558 if hasattr(handler, 'proxies'):
2559 proxy_map.update(handler.proxies)
2560 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2561
2562 if self.params.get('call_home', False):
2563 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2564 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2565 latest_version = self.urlopen(
2566 'https://yt-dl.org/latest/version').read().decode('utf-8')
2567 if version_tuple(latest_version) > version_tuple(__version__):
2568 self.report_warning(
2569 'You are using an outdated version (newest version: %s)! '
2570 'See https://yt-dl.org/update if you need help updating.' %
2571 latest_version)
2572
2573 def _setup_opener(self):
2574 timeout_val = self.params.get('socket_timeout')
2575 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2576
2577 opts_cookiefile = self.params.get('cookiefile')
2578 opts_proxy = self.params.get('proxy')
2579
2580 if opts_cookiefile is None:
2581 self.cookiejar = compat_cookiejar.CookieJar()
2582 else:
2583 opts_cookiefile = expand_path(opts_cookiefile)
2584 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2585 if os.access(opts_cookiefile, os.R_OK):
2586 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2587
2588 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2589 if opts_proxy is not None:
2590 if opts_proxy == '':
2591 proxies = {}
2592 else:
2593 proxies = {'http': opts_proxy, 'https': opts_proxy}
2594 else:
2595 proxies = compat_urllib_request.getproxies()
2596 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2597 if 'http' in proxies and 'https' not in proxies:
2598 proxies['https'] = proxies['http']
2599 proxy_handler = PerRequestProxyHandler(proxies)
2600
2601 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2602 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2603 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2604 redirect_handler = YoutubeDLRedirectHandler()
2605 data_handler = compat_urllib_request_DataHandler()
2606
2607 # When passing our own FileHandler instance, build_opener won't add the
2608 # default FileHandler and allows us to disable the file protocol, which
2609 # can be used for malicious purposes (see
2610 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2611 file_handler = compat_urllib_request.FileHandler()
2612
2613 def file_open(*args, **kwargs):
2614 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2615 file_handler.file_open = file_open
2616
2617 opener = compat_urllib_request.build_opener(
2618 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2619
2620 # Delete the default user-agent header, which would otherwise apply in
2621 # cases where our custom HTTP handler doesn't come into play
2622 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2623 opener.addheaders = []
2624 self._opener = opener
2625
2626 def encode(self, s):
2627 if isinstance(s, bytes):
2628 return s # Already encoded
2629
2630 try:
2631 return s.encode(self.get_encoding())
2632 except UnicodeEncodeError as err:
2633 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2634 raise
2635
2636 def get_encoding(self):
2637 encoding = self.params.get('encoding')
2638 if encoding is None:
2639 encoding = preferredencoding()
2640 return encoding
2641
2642 def _write_thumbnails(self, info_dict, filename):
2643 if self.params.get('writethumbnail', False):
2644 thumbnails = info_dict.get('thumbnails')
2645 if thumbnails:
2646 thumbnails = [thumbnails[-1]]
2647 elif self.params.get('write_all_thumbnails', False):
2648 thumbnails = info_dict.get('thumbnails')
2649 else:
2650 return
2651
2652 if not thumbnails:
2653 # No thumbnails present, so return immediately
2654 return
2655
2656 for t in thumbnails:
2657 thumb_ext = determine_ext(t['url'], 'jpg')
2658 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2659 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2660 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2661
2662 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2663 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2664 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2665 else:
2666 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2667 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2668 try:
2669 uf = self.urlopen(t['url'])
2670 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2671 shutil.copyfileobj(uf, thumbf)
2672 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2673 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2674 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2675 self.report_warning('Unable to download thumbnail "%s": %s' %
2676 (t['url'], error_to_compat_str(err)))