]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
Changed repo name to yt-dlp
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
57 DownloadError,
58 encode_compat_str,
59 encodeFilename,
60 error_to_compat_str,
61 expand_path,
62 ExtractorError,
63 format_bytes,
64 format_field,
65 formatSeconds,
66 GeoRestrictedError,
67 int_or_none,
68 iri_to_uri,
69 ISO3166Utils,
70 locked_file,
71 make_HTTPS_handler,
72 MaxDownloadsReached,
73 orderedSet,
74 PagedList,
75 parse_filesize,
76 PerRequestProxyHandler,
77 platform_name,
78 PostProcessingError,
79 preferredencoding,
80 prepend_extension,
81 register_socks_protocols,
82 render_table,
83 replace_extension,
84 SameFileError,
85 sanitize_filename,
86 sanitize_path,
87 sanitize_url,
88 sanitized_Request,
89 std_headers,
90 str_or_none,
91 subtitles_filename,
92 to_high_limit_path,
93 UnavailableVideoError,
94 url_basename,
95 version_tuple,
96 write_json_file,
97 write_string,
98 YoutubeDLCookieJar,
99 YoutubeDLCookieProcessor,
100 YoutubeDLHandler,
101 YoutubeDLRedirectHandler,
102 process_communicate_or_kill,
103 )
104 from .cache import Cache
105 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
106 from .extractor.openload import PhantomJSwrapper
107 from .downloader import get_suitable_downloader
108 from .downloader.rtmp import rtmpdump_version
109 from .postprocessor import (
110 FFmpegFixupM3u8PP,
111 FFmpegFixupM4aPP,
112 FFmpegFixupStretchedPP,
113 FFmpegMergerPP,
114 FFmpegPostProcessor,
115 FFmpegSubtitlesConvertorPP,
116 get_postprocessor,
117 )
118 from .version import __version__
119
120 if compat_os_name == 'nt':
121 import ctypes
122
123
124 class YoutubeDL(object):
125 """YoutubeDL class.
126
127 YoutubeDL objects are the ones responsible of downloading the
128 actual video file and writing it to disk if the user has requested
129 it, among some other tasks. In most cases there should be one per
130 program. As, given a video URL, the downloader doesn't know how to
131 extract all the needed information, task that InfoExtractors do, it
132 has to pass the URL to one of them.
133
134 For this, YoutubeDL objects have a method that allows
135 InfoExtractors to be registered in a given order. When it is passed
136 a URL, the YoutubeDL object handles it to the first InfoExtractor it
137 finds that reports being able to handle it. The InfoExtractor extracts
138 all the information about the video or videos the URL refers to, and
139 YoutubeDL process the extracted information, possibly using a File
140 Downloader to download the video.
141
142 YoutubeDL objects accept a lot of parameters. In order not to saturate
143 the object constructor with arguments, it receives a dictionary of
144 options instead. These options are available through the params
145 attribute for the InfoExtractors to use. The YoutubeDL also
146 registers itself as the downloader in charge for the InfoExtractors
147 that are added to it, so this is a "mutual registration".
148
149 Available options:
150
151 username: Username for authentication purposes.
152 password: Password for authentication purposes.
153 videopassword: Password for accessing a video.
154 ap_mso: Adobe Pass multiple-system operator identifier.
155 ap_username: Multiple-system operator account username.
156 ap_password: Multiple-system operator account password.
157 usenetrc: Use netrc for authentication instead.
158 verbose: Print additional info to stdout.
159 quiet: Do not print messages to stdout.
160 no_warnings: Do not print out anything for warnings.
161 forceurl: Force printing final URL.
162 forcetitle: Force printing title.
163 forceid: Force printing ID.
164 forcethumbnail: Force printing thumbnail URL.
165 forcedescription: Force printing description.
166 forcefilename: Force printing final filename.
167 forceduration: Force printing duration.
168 forcejson: Force printing info_dict as JSON.
169 dump_single_json: Force printing the info_dict of the whole playlist
170 (or video) as a single JSON line.
171 force_write_download_archive: Force writing download archive regardless of
172 'skip_download' or 'simulate'.
173 simulate: Do not download the video files.
174 format: Video format code. see "FORMAT SELECTION" for more details.
175 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
176 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
177 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
178 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
179 outtmpl: Template for output names.
180 restrictfilenames: Do not allow "&" and spaces in file names.
181 trim_file_name: Limit length of filename (extension excluded).
182 ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
183 force_generic_extractor: Force downloader to use the generic extractor
184 overwrites: Overwrite all video and metadata files if True,
185 overwrite only non-video files if None
186 and don't overwrite any file if False
187 playliststart: Playlist item to start at.
188 playlistend: Playlist item to end at.
189 playlist_items: Specific indices of playlist to download.
190 playlistreverse: Download playlist items in reverse order.
191 playlistrandom: Download playlist items in random order.
192 matchtitle: Download only matching titles.
193 rejecttitle: Reject downloads for matching titles.
194 logger: Log messages to a logging.Logger instance.
195 logtostderr: Log messages to stderr instead of stdout.
196 writedescription: Write the video description to a .description file
197 writeinfojson: Write the video description to a .info.json file
198 writeannotations: Write the video annotations to a .annotations.xml file
199 writethumbnail: Write the thumbnail image to a file
200 write_all_thumbnails: Write all thumbnail formats to files
201 writelink: Write an internet shortcut file, depending on the
202 current platform (.url/.webloc/.desktop)
203 writeurllink: Write a Windows internet shortcut file (.url)
204 writewebloclink: Write a macOS internet shortcut file (.webloc)
205 writedesktoplink: Write a Linux internet shortcut file (.desktop)
206 writesubtitles: Write the video subtitles to a file
207 writeautomaticsub: Write the automatically generated subtitles to a file
208 allsubtitles: Downloads all the subtitles of the video
209 (requires writesubtitles or writeautomaticsub)
210 listsubtitles: Lists all available subtitles for the video
211 subtitlesformat: The format code for subtitles
212 subtitleslangs: List of languages of the subtitles to download
213 keepvideo: Keep the video file after post-processing
214 daterange: A DateRange object, download only if the upload_date is in the range.
215 skip_download: Skip the actual download of the video file
216 cachedir: Location of the cache files in the filesystem.
217 False to disable filesystem cache.
218 noplaylist: Download single video instead of a playlist if in doubt.
219 age_limit: An integer representing the user's age in years.
220 Unsuitable videos for the given age are skipped.
221 min_views: An integer representing the minimum view count the video
222 must have in order to not be skipped.
223 Videos without view count information are always
224 downloaded. None for no limit.
225 max_views: An integer representing the maximum view count.
226 Videos that are more popular than that are not
227 downloaded.
228 Videos without view count information are always
229 downloaded. None for no limit.
230 download_archive: File name of a file where all downloads are recorded.
231 Videos already present in the file are not downloaded
232 again.
233 break_on_existing: Stop the download process after attempting to download a file that's
234 in the archive.
235 cookiefile: File name where cookies should be read from and dumped to.
236 nocheckcertificate:Do not verify SSL certificates
237 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
238 At the moment, this is only supported by YouTube.
239 proxy: URL of the proxy server to use
240 geo_verification_proxy: URL of the proxy to use for IP address verification
241 on geo-restricted sites.
242 socket_timeout: Time to wait for unresponsive hosts, in seconds
243 bidi_workaround: Work around buggy terminals without bidirectional text
244 support, using fridibi
245 debug_printtraffic:Print out sent and received HTTP traffic
246 include_ads: Download ads as well
247 default_search: Prepend this string if an input url is not valid.
248 'auto' for elaborate guessing
249 encoding: Use this encoding instead of the system-specified.
250 extract_flat: Do not resolve URLs, return the immediate result.
251 Pass in 'in_playlist' to only show this behavior for
252 playlist items.
253 postprocessors: A list of dictionaries, each with an entry
254 * key: The name of the postprocessor. See
255 youtube_dlc/postprocessor/__init__.py for a list.
256 as well as any further keyword arguments for the
257 postprocessor.
258 post_hooks: A list of functions that get called as the final step
259 for each video file, after all postprocessors have been
260 called. The filename will be passed as the only argument.
261 progress_hooks: A list of functions that get called on download
262 progress, with a dictionary with the entries
263 * status: One of "downloading", "error", or "finished".
264 Check this first and ignore unknown values.
265
266 If status is one of "downloading", or "finished", the
267 following properties may also be present:
268 * filename: The final filename (always present)
269 * tmpfilename: The filename we're currently writing to
270 * downloaded_bytes: Bytes on disk
271 * total_bytes: Size of the whole file, None if unknown
272 * total_bytes_estimate: Guess of the eventual file size,
273 None if unavailable.
274 * elapsed: The number of seconds since download started.
275 * eta: The estimated time in seconds, None if unknown
276 * speed: The download speed in bytes/second, None if
277 unknown
278 * fragment_index: The counter of the currently
279 downloaded video fragment.
280 * fragment_count: The number of fragments (= individual
281 files that will be merged)
282
283 Progress hooks are guaranteed to be called at least once
284 (with status "finished") if the download is successful.
285 merge_output_format: Extension to use when merging formats.
286 fixup: Automatically correct known faults of the file.
287 One of:
288 - "never": do nothing
289 - "warn": only emit a warning
290 - "detect_or_warn": check whether we can do anything
291 about it, warn otherwise (default)
292 source_address: Client-side IP address to bind to.
293 call_home: Boolean, true iff we are allowed to contact the
294 youtube-dlc servers for debugging.
295 sleep_interval: Number of seconds to sleep before each download when
296 used alone or a lower bound of a range for randomized
297 sleep before each download (minimum possible number
298 of seconds to sleep) when used along with
299 max_sleep_interval.
300 max_sleep_interval:Upper bound of a range for randomized sleep before each
301 download (maximum possible number of seconds to sleep).
302 Must only be used along with sleep_interval.
303 Actual sleep time will be a random float from range
304 [sleep_interval; max_sleep_interval].
305 listformats: Print an overview of available video formats and exit.
306 list_thumbnails: Print a table of all thumbnails and exit.
307 match_filter: A function that gets called with the info_dict of
308 every video.
309 If it returns a message, the video is ignored.
310 If it returns None, the video is downloaded.
311 match_filter_func in utils.py is one example for this.
312 no_color: Do not emit color codes in output.
313 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
314 HTTP header
315 geo_bypass_country:
316 Two-letter ISO 3166-2 country code that will be used for
317 explicit geographic restriction bypassing via faking
318 X-Forwarded-For HTTP header
319 geo_bypass_ip_block:
320 IP range in CIDR notation that will be used similarly to
321 geo_bypass_country
322
323 The following options determine which downloader is picked:
324 external_downloader: Executable of the external downloader to call.
325 None or unset for standard (built-in) downloader.
326 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
327 if True, otherwise use ffmpeg/avconv if False, otherwise
328 use downloader suggested by extractor if None.
329
330 The following parameters are not used by YoutubeDL itself, they are used by
331 the downloader (see youtube_dlc/downloader/common.py):
332 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
333 noresizebuffer, retries, continuedl, noprogress, consoletitle,
334 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
335 http_chunk_size.
336
337 The following options are used by the post processors:
338 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
339 otherwise prefer ffmpeg.
340 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
341 to the binary or its containing directory.
342 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
343 of additional command-line arguments for the postprocessor.
344 Use 'default' as the name for arguments to passed to all PP.
345
346 The following options are used by the Youtube extractor:
347 youtube_include_dash_manifest: If True (default), DASH manifests and related
348 data will be downloaded and processed by extractor.
349 You can reduce network I/O by disabling it if you don't
350 care about DASH.
351 """
352
353 _NUMERIC_FIELDS = set((
354 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
355 'timestamp', 'upload_year', 'upload_month', 'upload_day',
356 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
357 'average_rating', 'comment_count', 'age_limit',
358 'start_time', 'end_time',
359 'chapter_number', 'season_number', 'episode_number',
360 'track_number', 'disc_number', 'release_year',
361 'playlist_index',
362 ))
363
364 params = None
365 _ies = []
366 _pps = []
367 _download_retcode = None
368 _num_downloads = None
369 _screen_file = None
370
371 def __init__(self, params=None, auto_init=True):
372 """Create a FileDownloader object with the given options."""
373 if params is None:
374 params = {}
375 self._ies = []
376 self._ies_instances = {}
377 self._pps = []
378 self._post_hooks = []
379 self._progress_hooks = []
380 self._download_retcode = 0
381 self._num_downloads = 0
382 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
383 self._err_file = sys.stderr
384 self.params = {
385 # Default parameters
386 'nocheckcertificate': False,
387 }
388 self.params.update(params)
389 self.cache = Cache(self)
390 self.archive = set()
391
392 """Preload the archive, if any is specified"""
393 def preload_download_archive(self):
394 fn = self.params.get('download_archive')
395 if fn is None:
396 return False
397 try:
398 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
399 for line in archive_file:
400 self.archive.add(line.strip())
401 except IOError as ioe:
402 if ioe.errno != errno.ENOENT:
403 raise
404 return False
405 return True
406
407 def check_deprecated(param, option, suggestion):
408 if self.params.get(param) is not None:
409 self.report_warning(
410 '%s is deprecated. Use %s instead.' % (option, suggestion))
411 return True
412 return False
413
414 if self.params.get('verbose'):
415 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
416
417 preload_download_archive(self)
418
419 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
420 if self.params.get('geo_verification_proxy') is None:
421 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
422
423 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
424 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
425 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
426
427 if params.get('bidi_workaround', False):
428 try:
429 import pty
430 master, slave = pty.openpty()
431 width = compat_get_terminal_size().columns
432 if width is None:
433 width_args = []
434 else:
435 width_args = ['-w', str(width)]
436 sp_kwargs = dict(
437 stdin=subprocess.PIPE,
438 stdout=slave,
439 stderr=self._err_file)
440 try:
441 self._output_process = subprocess.Popen(
442 ['bidiv'] + width_args, **sp_kwargs
443 )
444 except OSError:
445 self._output_process = subprocess.Popen(
446 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
447 self._output_channel = os.fdopen(master, 'rb')
448 except OSError as ose:
449 if ose.errno == errno.ENOENT:
450 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
451 else:
452 raise
453
454 if (sys.platform != 'win32'
455 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
456 and not params.get('restrictfilenames', False)):
457 # Unicode filesystem API will throw errors (#1474, #13027)
458 self.report_warning(
459 'Assuming --restrict-filenames since file system encoding '
460 'cannot encode all characters. '
461 'Set the LC_ALL environment variable to fix this.')
462 self.params['restrictfilenames'] = True
463
464 if isinstance(params.get('outtmpl'), bytes):
465 self.report_warning(
466 'Parameter outtmpl is bytes, but should be a unicode string. '
467 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
468
469 self._setup_opener()
470
471 if auto_init:
472 self.print_debug_header()
473 self.add_default_info_extractors()
474
475 for pp_def_raw in self.params.get('postprocessors', []):
476 pp_class = get_postprocessor(pp_def_raw['key'])
477 pp_def = dict(pp_def_raw)
478 del pp_def['key']
479 pp = pp_class(self, **compat_kwargs(pp_def))
480 self.add_post_processor(pp)
481
482 for ph in self.params.get('post_hooks', []):
483 self.add_post_hook(ph)
484
485 for ph in self.params.get('progress_hooks', []):
486 self.add_progress_hook(ph)
487
488 register_socks_protocols()
489
490 def warn_if_short_id(self, argv):
491 # short YouTube ID starting with dash?
492 idxs = [
493 i for i, a in enumerate(argv)
494 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
495 if idxs:
496 correct_argv = (
497 ['youtube-dlc']
498 + [a for i, a in enumerate(argv) if i not in idxs]
499 + ['--'] + [argv[i] for i in idxs]
500 )
501 self.report_warning(
502 'Long argument string detected. '
503 'Use -- to separate parameters and URLs, like this:\n%s\n' %
504 args_to_str(correct_argv))
505
506 def add_info_extractor(self, ie):
507 """Add an InfoExtractor object to the end of the list."""
508 self._ies.append(ie)
509 if not isinstance(ie, type):
510 self._ies_instances[ie.ie_key()] = ie
511 ie.set_downloader(self)
512
513 def get_info_extractor(self, ie_key):
514 """
515 Get an instance of an IE with name ie_key, it will try to get one from
516 the _ies list, if there's no instance it will create a new one and add
517 it to the extractor list.
518 """
519 ie = self._ies_instances.get(ie_key)
520 if ie is None:
521 ie = get_info_extractor(ie_key)()
522 self.add_info_extractor(ie)
523 return ie
524
525 def add_default_info_extractors(self):
526 """
527 Add the InfoExtractors returned by gen_extractors to the end of the list
528 """
529 for ie in gen_extractor_classes():
530 self.add_info_extractor(ie)
531
532 def add_post_processor(self, pp):
533 """Add a PostProcessor object to the end of the chain."""
534 self._pps.append(pp)
535 pp.set_downloader(self)
536
537 def add_post_hook(self, ph):
538 """Add the post hook"""
539 self._post_hooks.append(ph)
540
541 def add_progress_hook(self, ph):
542 """Add the progress hook (currently only for the file downloader)"""
543 self._progress_hooks.append(ph)
544
545 def _bidi_workaround(self, message):
546 if not hasattr(self, '_output_channel'):
547 return message
548
549 assert hasattr(self, '_output_process')
550 assert isinstance(message, compat_str)
551 line_count = message.count('\n') + 1
552 self._output_process.stdin.write((message + '\n').encode('utf-8'))
553 self._output_process.stdin.flush()
554 res = ''.join(self._output_channel.readline().decode('utf-8')
555 for _ in range(line_count))
556 return res[:-len('\n')]
557
558 def to_screen(self, message, skip_eol=False):
559 """Print message to stdout if not in quiet mode."""
560 return self.to_stdout(message, skip_eol, check_quiet=True)
561
562 def _write_string(self, s, out=None):
563 write_string(s, out=out, encoding=self.params.get('encoding'))
564
565 def to_stdout(self, message, skip_eol=False, check_quiet=False):
566 """Print message to stdout if not in quiet mode."""
567 if self.params.get('logger'):
568 self.params['logger'].debug(message)
569 elif not check_quiet or not self.params.get('quiet', False):
570 message = self._bidi_workaround(message)
571 terminator = ['\n', ''][skip_eol]
572 output = message + terminator
573
574 self._write_string(output, self._screen_file)
575
576 def to_stderr(self, message):
577 """Print message to stderr."""
578 assert isinstance(message, compat_str)
579 if self.params.get('logger'):
580 self.params['logger'].error(message)
581 else:
582 message = self._bidi_workaround(message)
583 output = message + '\n'
584 self._write_string(output, self._err_file)
585
586 def to_console_title(self, message):
587 if not self.params.get('consoletitle', False):
588 return
589 if compat_os_name == 'nt':
590 if ctypes.windll.kernel32.GetConsoleWindow():
591 # c_wchar_p() might not be necessary if `message` is
592 # already of type unicode()
593 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
594 elif 'TERM' in os.environ:
595 self._write_string('\033[0;%s\007' % message, self._screen_file)
596
597 def save_console_title(self):
598 if not self.params.get('consoletitle', False):
599 return
600 if self.params.get('simulate', False):
601 return
602 if compat_os_name != 'nt' and 'TERM' in os.environ:
603 # Save the title on stack
604 self._write_string('\033[22;0t', self._screen_file)
605
606 def restore_console_title(self):
607 if not self.params.get('consoletitle', False):
608 return
609 if self.params.get('simulate', False):
610 return
611 if compat_os_name != 'nt' and 'TERM' in os.environ:
612 # Restore the title from stack
613 self._write_string('\033[23;0t', self._screen_file)
614
615 def __enter__(self):
616 self.save_console_title()
617 return self
618
619 def __exit__(self, *args):
620 self.restore_console_title()
621
622 if self.params.get('cookiefile') is not None:
623 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
624
625 def trouble(self, message=None, tb=None):
626 """Determine action to take when a download problem appears.
627
628 Depending on if the downloader has been configured to ignore
629 download errors or not, this method may throw an exception or
630 not when errors are found, after printing the message.
631
632 tb, if given, is additional traceback information.
633 """
634 if message is not None:
635 self.to_stderr(message)
636 if self.params.get('verbose'):
637 if tb is None:
638 if sys.exc_info()[0]: # if .trouble has been called from an except block
639 tb = ''
640 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
641 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
642 tb += encode_compat_str(traceback.format_exc())
643 else:
644 tb_data = traceback.format_list(traceback.extract_stack())
645 tb = ''.join(tb_data)
646 self.to_stderr(tb)
647 if not self.params.get('ignoreerrors', False):
648 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
649 exc_info = sys.exc_info()[1].exc_info
650 else:
651 exc_info = sys.exc_info()
652 raise DownloadError(message, exc_info)
653 self._download_retcode = 1
654
655 def report_warning(self, message):
656 '''
657 Print the message to stderr, it will be prefixed with 'WARNING:'
658 If stderr is a tty file the 'WARNING:' will be colored
659 '''
660 if self.params.get('logger') is not None:
661 self.params['logger'].warning(message)
662 else:
663 if self.params.get('no_warnings'):
664 return
665 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
666 _msg_header = '\033[0;33mWARNING:\033[0m'
667 else:
668 _msg_header = 'WARNING:'
669 warning_message = '%s %s' % (_msg_header, message)
670 self.to_stderr(warning_message)
671
672 def report_error(self, message, tb=None):
673 '''
674 Do the same as trouble, but prefixes the message with 'ERROR:', colored
675 in red if stderr is a tty file.
676 '''
677 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
678 _msg_header = '\033[0;31mERROR:\033[0m'
679 else:
680 _msg_header = 'ERROR:'
681 error_message = '%s %s' % (_msg_header, message)
682 self.trouble(error_message, tb)
683
684 def report_file_already_downloaded(self, file_name):
685 """Report file has already been fully downloaded."""
686 try:
687 self.to_screen('[download] %s has already been downloaded' % file_name)
688 except UnicodeEncodeError:
689 self.to_screen('[download] The file has already been downloaded')
690
691 def report_file_delete(self, file_name):
692 """Report that existing file will be deleted."""
693 try:
694 self.to_screen('Deleting already existent file %s' % file_name)
695 except UnicodeEncodeError:
696 self.to_screen('Deleting already existent file')
697
698 def prepare_filename(self, info_dict):
699 """Generate the output filename."""
700 try:
701 template_dict = dict(info_dict)
702
703 template_dict['epoch'] = int(time.time())
704 autonumber_size = self.params.get('autonumber_size')
705 if autonumber_size is None:
706 autonumber_size = 5
707 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
708 if template_dict.get('resolution') is None:
709 if template_dict.get('width') and template_dict.get('height'):
710 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
711 elif template_dict.get('height'):
712 template_dict['resolution'] = '%sp' % template_dict['height']
713 elif template_dict.get('width'):
714 template_dict['resolution'] = '%dx?' % template_dict['width']
715
716 sanitize = lambda k, v: sanitize_filename(
717 compat_str(v),
718 restricted=self.params.get('restrictfilenames'),
719 is_id=(k == 'id' or k.endswith('_id')))
720 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
721 for k, v in template_dict.items()
722 if v is not None and not isinstance(v, (list, tuple, dict)))
723 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
724
725 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
726
727 # For fields playlist_index and autonumber convert all occurrences
728 # of %(field)s to %(field)0Nd for backward compatibility
729 field_size_compat_map = {
730 'playlist_index': len(str(template_dict['n_entries'])),
731 'autonumber': autonumber_size,
732 }
733 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
734 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
735 if mobj:
736 outtmpl = re.sub(
737 FIELD_SIZE_COMPAT_RE,
738 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
739 outtmpl)
740
741 # Missing numeric fields used together with integer presentation types
742 # in format specification will break the argument substitution since
743 # string 'NA' is returned for missing fields. We will patch output
744 # template for missing fields to meet string presentation type.
745 for numeric_field in self._NUMERIC_FIELDS:
746 if numeric_field not in template_dict:
747 # As of [1] format syntax is:
748 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
749 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
750 FORMAT_RE = r'''(?x)
751 (?<!%)
752 %
753 \({0}\) # mapping key
754 (?:[#0\-+ ]+)? # conversion flags (optional)
755 (?:\d+)? # minimum field width (optional)
756 (?:\.\d+)? # precision (optional)
757 [hlL]? # length modifier (optional)
758 [diouxXeEfFgGcrs%] # conversion type
759 '''
760 outtmpl = re.sub(
761 FORMAT_RE.format(numeric_field),
762 r'%({0})s'.format(numeric_field), outtmpl)
763
764 # expand_path translates '%%' into '%' and '$$' into '$'
765 # correspondingly that is not what we want since we need to keep
766 # '%%' intact for template dict substitution step. Working around
767 # with boundary-alike separator hack.
768 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
769 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
770
771 # outtmpl should be expand_path'ed before template dict substitution
772 # because meta fields may contain env variables we don't want to
773 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
774 # title "Hello $PATH", we don't want `$PATH` to be expanded.
775 filename = expand_path(outtmpl).replace(sep, '') % template_dict
776
777 # https://github.com/blackjack4494/youtube-dlc/issues/85
778 trim_file_name = self.params.get('trim_file_name', False)
779 if trim_file_name:
780 fn_groups = filename.rsplit('.')
781 ext = fn_groups[-1]
782 sub_ext = ''
783 if len(fn_groups) > 2:
784 sub_ext = fn_groups[-2]
785 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
786
787 # Temporary fix for #4787
788 # 'Treat' all problem characters by passing filename through preferredencoding
789 # to workaround encoding issues with subprocess on python2 @ Windows
790 if sys.version_info < (3, 0) and sys.platform == 'win32':
791 filename = encodeFilename(filename, True).decode(preferredencoding())
792 return sanitize_path(filename)
793 except ValueError as err:
794 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
795 return None
796
797 def _match_entry(self, info_dict, incomplete):
798 """ Returns None if the file should be downloaded """
799
800 video_title = info_dict.get('title', info_dict.get('id', 'video'))
801 if 'title' in info_dict:
802 # This can happen when we're just evaluating the playlist
803 title = info_dict['title']
804 matchtitle = self.params.get('matchtitle', False)
805 if matchtitle:
806 if not re.search(matchtitle, title, re.IGNORECASE):
807 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
808 rejecttitle = self.params.get('rejecttitle', False)
809 if rejecttitle:
810 if re.search(rejecttitle, title, re.IGNORECASE):
811 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
812 date = info_dict.get('upload_date')
813 if date is not None:
814 dateRange = self.params.get('daterange', DateRange())
815 if date not in dateRange:
816 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
817 view_count = info_dict.get('view_count')
818 if view_count is not None:
819 min_views = self.params.get('min_views')
820 if min_views is not None and view_count < min_views:
821 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
822 max_views = self.params.get('max_views')
823 if max_views is not None and view_count > max_views:
824 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
825 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
826 return 'Skipping "%s" because it is age restricted' % video_title
827 if self.in_download_archive(info_dict):
828 return '%s has already been recorded in archive' % video_title
829
830 if not incomplete:
831 match_filter = self.params.get('match_filter')
832 if match_filter is not None:
833 ret = match_filter(info_dict)
834 if ret is not None:
835 return ret
836
837 return None
838
839 @staticmethod
840 def add_extra_info(info_dict, extra_info):
841 '''Set the keys from extra_info in info dict if they are missing'''
842 for key, value in extra_info.items():
843 info_dict.setdefault(key, value)
844
845 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
846 process=True, force_generic_extractor=False):
847 '''
848 Returns a list with a dictionary for each video we find.
849 If 'download', also downloads the videos.
850 extra_info is a dict containing the extra values to add to each result
851 '''
852
853 if not ie_key and force_generic_extractor:
854 ie_key = 'Generic'
855
856 if ie_key:
857 ies = [self.get_info_extractor(ie_key)]
858 else:
859 ies = self._ies
860
861 for ie in ies:
862 if not ie.suitable(url):
863 continue
864
865 ie_key = ie.ie_key()
866 ie = self.get_info_extractor(ie_key)
867 if not ie.working():
868 self.report_warning('The program functionality for this site has been marked as broken, '
869 'and will probably not work.')
870
871 try:
872 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
873 except (AssertionError, IndexError, AttributeError):
874 temp_id = None
875 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
876 self.to_screen("[%s] %s: has already been recorded in archive" % (
877 ie_key, temp_id))
878 break
879
880 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
881
882 else:
883 self.report_error('no suitable InfoExtractor for URL %s' % url)
884
885 def __handle_extraction_exceptions(func):
886 def wrapper(self, *args, **kwargs):
887 try:
888 return func(self, *args, **kwargs)
889 except GeoRestrictedError as e:
890 msg = e.msg
891 if e.countries:
892 msg += '\nThis video is available in %s.' % ', '.join(
893 map(ISO3166Utils.short2full, e.countries))
894 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
895 self.report_error(msg)
896 except ExtractorError as e: # An error we somewhat expected
897 self.report_error(compat_str(e), e.format_traceback())
898 except MaxDownloadsReached:
899 raise
900 except Exception as e:
901 if self.params.get('ignoreerrors', False):
902 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
903 else:
904 raise
905 return wrapper
906
907 @__handle_extraction_exceptions
908 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
909 ie_result = ie.extract(url)
910 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
911 return
912 if isinstance(ie_result, list):
913 # Backwards compatibility: old IE result format
914 ie_result = {
915 '_type': 'compat_list',
916 'entries': ie_result,
917 }
918 if info_dict:
919 if info_dict.get('id'):
920 ie_result['id'] = info_dict['id']
921 if info_dict.get('title'):
922 ie_result['title'] = info_dict['title']
923 self.add_default_extra_info(ie_result, ie, url)
924 if process:
925 return self.process_ie_result(ie_result, download, extra_info)
926 else:
927 return ie_result
928
929 def add_default_extra_info(self, ie_result, ie, url):
930 self.add_extra_info(ie_result, {
931 'extractor': ie.IE_NAME,
932 'webpage_url': url,
933 'duration_string': (
934 formatSeconds(ie_result['duration'], '-')
935 if ie_result.get('duration', None) is not None
936 else None),
937 'webpage_url_basename': url_basename(url),
938 'extractor_key': ie.ie_key(),
939 })
940
941 def process_ie_result(self, ie_result, download=True, extra_info={}):
942 """
943 Take the result of the ie(may be modified) and resolve all unresolved
944 references (URLs, playlist items).
945
946 It will also download the videos if 'download'.
947 Returns the resolved ie_result.
948 """
949 result_type = ie_result.get('_type', 'video')
950
951 if result_type in ('url', 'url_transparent'):
952 ie_result['url'] = sanitize_url(ie_result['url'])
953 extract_flat = self.params.get('extract_flat', False)
954 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
955 or extract_flat is True):
956 self.__forced_printings(
957 ie_result, self.prepare_filename(ie_result),
958 incomplete=True)
959 return ie_result
960
961 if result_type == 'video':
962 self.add_extra_info(ie_result, extra_info)
963 return self.process_video_result(ie_result, download=download)
964 elif result_type == 'url':
965 # We have to add extra_info to the results because it may be
966 # contained in a playlist
967 return self.extract_info(ie_result['url'],
968 download, info_dict=ie_result,
969 ie_key=ie_result.get('ie_key'),
970 extra_info=extra_info)
971 elif result_type == 'url_transparent':
972 # Use the information from the embedding page
973 info = self.extract_info(
974 ie_result['url'], ie_key=ie_result.get('ie_key'),
975 extra_info=extra_info, download=False, process=False)
976
977 # extract_info may return None when ignoreerrors is enabled and
978 # extraction failed with an error, don't crash and return early
979 # in this case
980 if not info:
981 return info
982
983 force_properties = dict(
984 (k, v) for k, v in ie_result.items() if v is not None)
985 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
986 if f in force_properties:
987 del force_properties[f]
988 new_result = info.copy()
989 new_result.update(force_properties)
990
991 # Extracted info may not be a video result (i.e.
992 # info.get('_type', 'video') != video) but rather an url or
993 # url_transparent. In such cases outer metadata (from ie_result)
994 # should be propagated to inner one (info). For this to happen
995 # _type of info should be overridden with url_transparent. This
996 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
997 if new_result.get('_type') == 'url':
998 new_result['_type'] = 'url_transparent'
999
1000 return self.process_ie_result(
1001 new_result, download=download, extra_info=extra_info)
1002 elif result_type in ('playlist', 'multi_video'):
1003 # We process each entry in the playlist
1004 playlist = ie_result.get('title') or ie_result.get('id')
1005 self.to_screen('[download] Downloading playlist: %s' % playlist)
1006
1007 playlist_results = []
1008
1009 playliststart = self.params.get('playliststart', 1) - 1
1010 playlistend = self.params.get('playlistend')
1011 # For backwards compatibility, interpret -1 as whole list
1012 if playlistend == -1:
1013 playlistend = None
1014
1015 playlistitems_str = self.params.get('playlist_items')
1016 playlistitems = None
1017 if playlistitems_str is not None:
1018 def iter_playlistitems(format):
1019 for string_segment in format.split(','):
1020 if '-' in string_segment:
1021 start, end = string_segment.split('-')
1022 for item in range(int(start), int(end) + 1):
1023 yield int(item)
1024 else:
1025 yield int(string_segment)
1026 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1027
1028 ie_entries = ie_result['entries']
1029
1030 def make_playlistitems_entries(list_ie_entries):
1031 num_entries = len(list_ie_entries)
1032 return [
1033 list_ie_entries[i - 1] for i in playlistitems
1034 if -num_entries <= i - 1 < num_entries]
1035
1036 def report_download(num_entries):
1037 self.to_screen(
1038 '[%s] playlist %s: Downloading %d videos' %
1039 (ie_result['extractor'], playlist, num_entries))
1040
1041 if isinstance(ie_entries, list):
1042 n_all_entries = len(ie_entries)
1043 if playlistitems:
1044 entries = make_playlistitems_entries(ie_entries)
1045 else:
1046 entries = ie_entries[playliststart:playlistend]
1047 n_entries = len(entries)
1048 self.to_screen(
1049 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1050 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1051 elif isinstance(ie_entries, PagedList):
1052 if playlistitems:
1053 entries = []
1054 for item in playlistitems:
1055 entries.extend(ie_entries.getslice(
1056 item - 1, item
1057 ))
1058 else:
1059 entries = ie_entries.getslice(
1060 playliststart, playlistend)
1061 n_entries = len(entries)
1062 report_download(n_entries)
1063 else: # iterable
1064 if playlistitems:
1065 entries = make_playlistitems_entries(list(itertools.islice(
1066 ie_entries, 0, max(playlistitems))))
1067 else:
1068 entries = list(itertools.islice(
1069 ie_entries, playliststart, playlistend))
1070 n_entries = len(entries)
1071 report_download(n_entries)
1072
1073 if self.params.get('playlistreverse', False):
1074 entries = entries[::-1]
1075
1076 if self.params.get('playlistrandom', False):
1077 random.shuffle(entries)
1078
1079 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1080
1081 for i, entry in enumerate(entries, 1):
1082 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1083 # This __x_forwarded_for_ip thing is a bit ugly but requires
1084 # minimal changes
1085 if x_forwarded_for:
1086 entry['__x_forwarded_for_ip'] = x_forwarded_for
1087 extra = {
1088 'n_entries': n_entries,
1089 'playlist': playlist,
1090 'playlist_id': ie_result.get('id'),
1091 'playlist_title': ie_result.get('title'),
1092 'playlist_uploader': ie_result.get('uploader'),
1093 'playlist_uploader_id': ie_result.get('uploader_id'),
1094 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1095 'extractor': ie_result['extractor'],
1096 'webpage_url': ie_result['webpage_url'],
1097 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1098 'extractor_key': ie_result['extractor_key'],
1099 }
1100
1101 reason = self._match_entry(entry, incomplete=True)
1102 if reason is not None:
1103 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1104 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1105 break
1106 else:
1107 self.to_screen('[download] ' + reason)
1108 continue
1109
1110 entry_result = self.__process_iterable_entry(entry, download, extra)
1111 # TODO: skip failed (empty) entries?
1112 playlist_results.append(entry_result)
1113 ie_result['entries'] = playlist_results
1114 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1115 return ie_result
1116 elif result_type == 'compat_list':
1117 self.report_warning(
1118 'Extractor %s returned a compat_list result. '
1119 'It needs to be updated.' % ie_result.get('extractor'))
1120
1121 def _fixup(r):
1122 self.add_extra_info(
1123 r,
1124 {
1125 'extractor': ie_result['extractor'],
1126 'webpage_url': ie_result['webpage_url'],
1127 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1128 'extractor_key': ie_result['extractor_key'],
1129 }
1130 )
1131 return r
1132 ie_result['entries'] = [
1133 self.process_ie_result(_fixup(r), download, extra_info)
1134 for r in ie_result['entries']
1135 ]
1136 return ie_result
1137 else:
1138 raise Exception('Invalid result type: %s' % result_type)
1139
1140 @__handle_extraction_exceptions
1141 def __process_iterable_entry(self, entry, download, extra_info):
1142 return self.process_ie_result(
1143 entry, download=download, extra_info=extra_info)
1144
1145 def _build_format_filter(self, filter_spec):
1146 " Returns a function to filter the formats according to the filter_spec "
1147
1148 OPERATORS = {
1149 '<': operator.lt,
1150 '<=': operator.le,
1151 '>': operator.gt,
1152 '>=': operator.ge,
1153 '=': operator.eq,
1154 '!=': operator.ne,
1155 }
1156 operator_rex = re.compile(r'''(?x)\s*
1157 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1158 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1159 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1160 $
1161 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1162 m = operator_rex.search(filter_spec)
1163 if m:
1164 try:
1165 comparison_value = int(m.group('value'))
1166 except ValueError:
1167 comparison_value = parse_filesize(m.group('value'))
1168 if comparison_value is None:
1169 comparison_value = parse_filesize(m.group('value') + 'B')
1170 if comparison_value is None:
1171 raise ValueError(
1172 'Invalid value %r in format specification %r' % (
1173 m.group('value'), filter_spec))
1174 op = OPERATORS[m.group('op')]
1175
1176 if not m:
1177 STR_OPERATORS = {
1178 '=': operator.eq,
1179 '^=': lambda attr, value: attr.startswith(value),
1180 '$=': lambda attr, value: attr.endswith(value),
1181 '*=': lambda attr, value: value in attr,
1182 }
1183 str_operator_rex = re.compile(r'''(?x)
1184 \s*(?P<key>[a-zA-Z0-9._-]+)
1185 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1186 \s*(?P<value>[a-zA-Z0-9._-]+)
1187 \s*$
1188 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1189 m = str_operator_rex.search(filter_spec)
1190 if m:
1191 comparison_value = m.group('value')
1192 str_op = STR_OPERATORS[m.group('op')]
1193 if m.group('negation'):
1194 op = lambda attr, value: not str_op(attr, value)
1195 else:
1196 op = str_op
1197
1198 if not m:
1199 raise ValueError('Invalid filter specification %r' % filter_spec)
1200
1201 def _filter(f):
1202 actual_value = f.get(m.group('key'))
1203 if actual_value is None:
1204 return m.group('none_inclusive')
1205 return op(actual_value, comparison_value)
1206 return _filter
1207
1208 def _default_format_spec(self, info_dict, download=True):
1209
1210 def can_merge():
1211 merger = FFmpegMergerPP(self)
1212 return merger.available and merger.can_merge()
1213
1214 prefer_best = (
1215 not self.params.get('simulate', False)
1216 and download
1217 and (
1218 not can_merge()
1219 or info_dict.get('is_live', False)
1220 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1221
1222 return (
1223 'best/bestvideo+bestaudio'
1224 if prefer_best
1225 else 'bestvideo*+bestaudio/best'
1226 if not self.params.get('allow_multiple_audio_streams', False)
1227 else 'bestvideo+bestaudio/best')
1228
1229 def build_format_selector(self, format_spec):
1230 def syntax_error(note, start):
1231 message = (
1232 'Invalid format specification: '
1233 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1234 return SyntaxError(message)
1235
1236 PICKFIRST = 'PICKFIRST'
1237 MERGE = 'MERGE'
1238 SINGLE = 'SINGLE'
1239 GROUP = 'GROUP'
1240 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1241
1242 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1243 'video': self.params.get('allow_multiple_video_streams', False)}
1244
1245 def _parse_filter(tokens):
1246 filter_parts = []
1247 for type, string, start, _, _ in tokens:
1248 if type == tokenize.OP and string == ']':
1249 return ''.join(filter_parts)
1250 else:
1251 filter_parts.append(string)
1252
1253 def _remove_unused_ops(tokens):
1254 # Remove operators that we don't use and join them with the surrounding strings
1255 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1256 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1257 last_string, last_start, last_end, last_line = None, None, None, None
1258 for type, string, start, end, line in tokens:
1259 if type == tokenize.OP and string == '[':
1260 if last_string:
1261 yield tokenize.NAME, last_string, last_start, last_end, last_line
1262 last_string = None
1263 yield type, string, start, end, line
1264 # everything inside brackets will be handled by _parse_filter
1265 for type, string, start, end, line in tokens:
1266 yield type, string, start, end, line
1267 if type == tokenize.OP and string == ']':
1268 break
1269 elif type == tokenize.OP and string in ALLOWED_OPS:
1270 if last_string:
1271 yield tokenize.NAME, last_string, last_start, last_end, last_line
1272 last_string = None
1273 yield type, string, start, end, line
1274 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1275 if not last_string:
1276 last_string = string
1277 last_start = start
1278 last_end = end
1279 else:
1280 last_string += string
1281 if last_string:
1282 yield tokenize.NAME, last_string, last_start, last_end, last_line
1283
1284 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1285 selectors = []
1286 current_selector = None
1287 for type, string, start, _, _ in tokens:
1288 # ENCODING is only defined in python 3.x
1289 if type == getattr(tokenize, 'ENCODING', None):
1290 continue
1291 elif type in [tokenize.NAME, tokenize.NUMBER]:
1292 current_selector = FormatSelector(SINGLE, string, [])
1293 elif type == tokenize.OP:
1294 if string == ')':
1295 if not inside_group:
1296 # ')' will be handled by the parentheses group
1297 tokens.restore_last_token()
1298 break
1299 elif inside_merge and string in ['/', ',']:
1300 tokens.restore_last_token()
1301 break
1302 elif inside_choice and string == ',':
1303 tokens.restore_last_token()
1304 break
1305 elif string == ',':
1306 if not current_selector:
1307 raise syntax_error('"," must follow a format selector', start)
1308 selectors.append(current_selector)
1309 current_selector = None
1310 elif string == '/':
1311 if not current_selector:
1312 raise syntax_error('"/" must follow a format selector', start)
1313 first_choice = current_selector
1314 second_choice = _parse_format_selection(tokens, inside_choice=True)
1315 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1316 elif string == '[':
1317 if not current_selector:
1318 current_selector = FormatSelector(SINGLE, 'best', [])
1319 format_filter = _parse_filter(tokens)
1320 current_selector.filters.append(format_filter)
1321 elif string == '(':
1322 if current_selector:
1323 raise syntax_error('Unexpected "("', start)
1324 group = _parse_format_selection(tokens, inside_group=True)
1325 current_selector = FormatSelector(GROUP, group, [])
1326 elif string == '+':
1327 if not current_selector:
1328 raise syntax_error('Unexpected "+"', start)
1329 selector_1 = current_selector
1330 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1331 if not selector_2:
1332 raise syntax_error('Expected a selector', start)
1333 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1334 else:
1335 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1336 elif type == tokenize.ENDMARKER:
1337 break
1338 if current_selector:
1339 selectors.append(current_selector)
1340 return selectors
1341
1342 def _build_selector_function(selector):
1343 if isinstance(selector, list): # ,
1344 fs = [_build_selector_function(s) for s in selector]
1345
1346 def selector_function(ctx):
1347 for f in fs:
1348 for format in f(ctx):
1349 yield format
1350 return selector_function
1351
1352 elif selector.type == GROUP: # ()
1353 selector_function = _build_selector_function(selector.selector)
1354
1355 elif selector.type == PICKFIRST: # /
1356 fs = [_build_selector_function(s) for s in selector.selector]
1357
1358 def selector_function(ctx):
1359 for f in fs:
1360 picked_formats = list(f(ctx))
1361 if picked_formats:
1362 return picked_formats
1363 return []
1364
1365 elif selector.type == SINGLE: # atom
1366 format_spec = selector.selector if selector.selector is not None else 'best'
1367
1368 if format_spec == 'all':
1369 def selector_function(ctx):
1370 formats = list(ctx['formats'])
1371 if formats:
1372 for f in formats:
1373 yield f
1374
1375 else:
1376 format_fallback = False
1377 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1378 if format_spec_obj is not None:
1379 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1380 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1381 not_format_type = 'v' if format_type == 'a' else 'a'
1382 format_modified = format_spec_obj.group(3) is not None
1383
1384 format_fallback = not format_type and not format_modified # for b, w
1385 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1386 if format_type and format_modified # bv*, ba*, wv*, wa*
1387 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1388 if format_type # bv, ba, wv, wa
1389 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1390 if not format_modified # b, w
1391 else None) # b*, w*
1392 else:
1393 format_idx = -1
1394 filter_f = ((lambda f: f.get('ext') == format_spec)
1395 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1396 else (lambda f: f.get('format_id') == format_spec)) # id
1397
1398 def selector_function(ctx):
1399 formats = list(ctx['formats'])
1400 if not formats:
1401 return
1402 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1403 if matches:
1404 yield matches[format_idx]
1405 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1406 # for extractors with incomplete formats (audio only (soundcloud)
1407 # or video only (imgur)) best/worst will fallback to
1408 # best/worst {video,audio}-only format
1409 yield formats[format_idx]
1410
1411 elif selector.type == MERGE: # +
1412 def _merge(formats_pair):
1413 format_1, format_2 = formats_pair
1414
1415 formats_info = []
1416 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1417 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1418
1419 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1420 get_no_more = {"video": False, "audio": False}
1421 for (i, fmt_info) in enumerate(formats_info):
1422 for aud_vid in ["audio", "video"]:
1423 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1424 if get_no_more[aud_vid]:
1425 formats_info.pop(i)
1426 get_no_more[aud_vid] = True
1427
1428 if len(formats_info) == 1:
1429 return formats_info[0]
1430
1431 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1432 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1433
1434 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1435 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1436
1437 output_ext = self.params.get('merge_output_format')
1438 if not output_ext:
1439 if the_only_video:
1440 output_ext = the_only_video['ext']
1441 elif the_only_audio and not video_fmts:
1442 output_ext = the_only_audio['ext']
1443 else:
1444 output_ext = 'mkv'
1445
1446 new_dict = {
1447 'requested_formats': formats_info,
1448 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1449 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1450 'ext': output_ext,
1451 }
1452
1453 if the_only_video:
1454 new_dict.update({
1455 'width': the_only_video.get('width'),
1456 'height': the_only_video.get('height'),
1457 'resolution': the_only_video.get('resolution'),
1458 'fps': the_only_video.get('fps'),
1459 'vcodec': the_only_video.get('vcodec'),
1460 'vbr': the_only_video.get('vbr'),
1461 'stretched_ratio': the_only_video.get('stretched_ratio'),
1462 })
1463
1464 if the_only_audio:
1465 new_dict.update({
1466 'acodec': the_only_audio.get('acodec'),
1467 'abr': the_only_audio.get('abr'),
1468 })
1469
1470 return new_dict
1471
1472 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1473
1474 def selector_function(ctx):
1475 for pair in itertools.product(
1476 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1477 yield _merge(pair)
1478
1479 filters = [self._build_format_filter(f) for f in selector.filters]
1480
1481 def final_selector(ctx):
1482 ctx_copy = copy.deepcopy(ctx)
1483 for _filter in filters:
1484 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1485 return selector_function(ctx_copy)
1486 return final_selector
1487
1488 stream = io.BytesIO(format_spec.encode('utf-8'))
1489 try:
1490 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1491 except tokenize.TokenError:
1492 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1493
1494 class TokenIterator(object):
1495 def __init__(self, tokens):
1496 self.tokens = tokens
1497 self.counter = 0
1498
1499 def __iter__(self):
1500 return self
1501
1502 def __next__(self):
1503 if self.counter >= len(self.tokens):
1504 raise StopIteration()
1505 value = self.tokens[self.counter]
1506 self.counter += 1
1507 return value
1508
1509 next = __next__
1510
1511 def restore_last_token(self):
1512 self.counter -= 1
1513
1514 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1515 return _build_selector_function(parsed_selector)
1516
1517 def _calc_headers(self, info_dict):
1518 res = std_headers.copy()
1519
1520 add_headers = info_dict.get('http_headers')
1521 if add_headers:
1522 res.update(add_headers)
1523
1524 cookies = self._calc_cookies(info_dict)
1525 if cookies:
1526 res['Cookie'] = cookies
1527
1528 if 'X-Forwarded-For' not in res:
1529 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1530 if x_forwarded_for_ip:
1531 res['X-Forwarded-For'] = x_forwarded_for_ip
1532
1533 return res
1534
1535 def _calc_cookies(self, info_dict):
1536 pr = sanitized_Request(info_dict['url'])
1537 self.cookiejar.add_cookie_header(pr)
1538 return pr.get_header('Cookie')
1539
1540 def process_video_result(self, info_dict, download=True):
1541 assert info_dict.get('_type', 'video') == 'video'
1542
1543 if 'id' not in info_dict:
1544 raise ExtractorError('Missing "id" field in extractor result')
1545 if 'title' not in info_dict:
1546 raise ExtractorError('Missing "title" field in extractor result')
1547
1548 def report_force_conversion(field, field_not, conversion):
1549 self.report_warning(
1550 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1551 % (field, field_not, conversion))
1552
1553 def sanitize_string_field(info, string_field):
1554 field = info.get(string_field)
1555 if field is None or isinstance(field, compat_str):
1556 return
1557 report_force_conversion(string_field, 'a string', 'string')
1558 info[string_field] = compat_str(field)
1559
1560 def sanitize_numeric_fields(info):
1561 for numeric_field in self._NUMERIC_FIELDS:
1562 field = info.get(numeric_field)
1563 if field is None or isinstance(field, compat_numeric_types):
1564 continue
1565 report_force_conversion(numeric_field, 'numeric', 'int')
1566 info[numeric_field] = int_or_none(field)
1567
1568 sanitize_string_field(info_dict, 'id')
1569 sanitize_numeric_fields(info_dict)
1570
1571 if 'playlist' not in info_dict:
1572 # It isn't part of a playlist
1573 info_dict['playlist'] = None
1574 info_dict['playlist_index'] = None
1575
1576 thumbnails = info_dict.get('thumbnails')
1577 if thumbnails is None:
1578 thumbnail = info_dict.get('thumbnail')
1579 if thumbnail:
1580 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1581 if thumbnails:
1582 thumbnails.sort(key=lambda t: (
1583 t.get('preference') if t.get('preference') is not None else -1,
1584 t.get('width') if t.get('width') is not None else -1,
1585 t.get('height') if t.get('height') is not None else -1,
1586 t.get('id') if t.get('id') is not None else '', t.get('url')))
1587 for i, t in enumerate(thumbnails):
1588 t['url'] = sanitize_url(t['url'])
1589 if t.get('width') and t.get('height'):
1590 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1591 if t.get('id') is None:
1592 t['id'] = '%d' % i
1593
1594 if self.params.get('list_thumbnails'):
1595 self.list_thumbnails(info_dict)
1596 return
1597
1598 thumbnail = info_dict.get('thumbnail')
1599 if thumbnail:
1600 info_dict['thumbnail'] = sanitize_url(thumbnail)
1601 elif thumbnails:
1602 info_dict['thumbnail'] = thumbnails[-1]['url']
1603
1604 if 'display_id' not in info_dict and 'id' in info_dict:
1605 info_dict['display_id'] = info_dict['id']
1606
1607 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1608 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1609 # see http://bugs.python.org/issue1646728)
1610 try:
1611 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1612 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1613 except (ValueError, OverflowError, OSError):
1614 pass
1615
1616 # Auto generate title fields corresponding to the *_number fields when missing
1617 # in order to always have clean titles. This is very common for TV series.
1618 for field in ('chapter', 'season', 'episode'):
1619 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1620 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1621
1622 for cc_kind in ('subtitles', 'automatic_captions'):
1623 cc = info_dict.get(cc_kind)
1624 if cc:
1625 for _, subtitle in cc.items():
1626 for subtitle_format in subtitle:
1627 if subtitle_format.get('url'):
1628 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1629 if subtitle_format.get('ext') is None:
1630 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1631
1632 automatic_captions = info_dict.get('automatic_captions')
1633 subtitles = info_dict.get('subtitles')
1634
1635 if self.params.get('listsubtitles', False):
1636 if 'automatic_captions' in info_dict:
1637 self.list_subtitles(
1638 info_dict['id'], automatic_captions, 'automatic captions')
1639 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1640 return
1641
1642 info_dict['requested_subtitles'] = self.process_subtitles(
1643 info_dict['id'], subtitles, automatic_captions)
1644
1645 # We now pick which formats have to be downloaded
1646 if info_dict.get('formats') is None:
1647 # There's only one format available
1648 formats = [info_dict]
1649 else:
1650 formats = info_dict['formats']
1651
1652 if not formats:
1653 raise ExtractorError('No video formats found!')
1654
1655 def is_wellformed(f):
1656 url = f.get('url')
1657 if not url:
1658 self.report_warning(
1659 '"url" field is missing or empty - skipping format, '
1660 'there is an error in extractor')
1661 return False
1662 if isinstance(url, bytes):
1663 sanitize_string_field(f, 'url')
1664 return True
1665
1666 # Filter out malformed formats for better extraction robustness
1667 formats = list(filter(is_wellformed, formats))
1668
1669 formats_dict = {}
1670
1671 # We check that all the formats have the format and format_id fields
1672 for i, format in enumerate(formats):
1673 sanitize_string_field(format, 'format_id')
1674 sanitize_numeric_fields(format)
1675 format['url'] = sanitize_url(format['url'])
1676 if not format.get('format_id'):
1677 format['format_id'] = compat_str(i)
1678 else:
1679 # Sanitize format_id from characters used in format selector expression
1680 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1681 format_id = format['format_id']
1682 if format_id not in formats_dict:
1683 formats_dict[format_id] = []
1684 formats_dict[format_id].append(format)
1685
1686 # Make sure all formats have unique format_id
1687 for format_id, ambiguous_formats in formats_dict.items():
1688 if len(ambiguous_formats) > 1:
1689 for i, format in enumerate(ambiguous_formats):
1690 format['format_id'] = '%s-%d' % (format_id, i)
1691
1692 for i, format in enumerate(formats):
1693 if format.get('format') is None:
1694 format['format'] = '{id} - {res}{note}'.format(
1695 id=format['format_id'],
1696 res=self.format_resolution(format),
1697 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1698 )
1699 # Automatically determine file extension if missing
1700 if format.get('ext') is None:
1701 format['ext'] = determine_ext(format['url']).lower()
1702 # Automatically determine protocol if missing (useful for format
1703 # selection purposes)
1704 if format.get('protocol') is None:
1705 format['protocol'] = determine_protocol(format)
1706 # Add HTTP headers, so that external programs can use them from the
1707 # json output
1708 full_format_info = info_dict.copy()
1709 full_format_info.update(format)
1710 format['http_headers'] = self._calc_headers(full_format_info)
1711 # Remove private housekeeping stuff
1712 if '__x_forwarded_for_ip' in info_dict:
1713 del info_dict['__x_forwarded_for_ip']
1714
1715 # TODO Central sorting goes here
1716
1717 if formats[0] is not info_dict:
1718 # only set the 'formats' fields if the original info_dict list them
1719 # otherwise we end up with a circular reference, the first (and unique)
1720 # element in the 'formats' field in info_dict is info_dict itself,
1721 # which can't be exported to json
1722 info_dict['formats'] = formats
1723 if self.params.get('listformats'):
1724 self.list_formats(info_dict)
1725 return
1726
1727 req_format = self.params.get('format')
1728 if req_format is None:
1729 req_format = self._default_format_spec(info_dict, download=download)
1730 if self.params.get('verbose'):
1731 self._write_string('[debug] Default format spec: %s\n' % req_format)
1732
1733 format_selector = self.build_format_selector(req_format)
1734
1735 # While in format selection we may need to have an access to the original
1736 # format set in order to calculate some metrics or do some processing.
1737 # For now we need to be able to guess whether original formats provided
1738 # by extractor are incomplete or not (i.e. whether extractor provides only
1739 # video-only or audio-only formats) for proper formats selection for
1740 # extractors with such incomplete formats (see
1741 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1742 # Since formats may be filtered during format selection and may not match
1743 # the original formats the results may be incorrect. Thus original formats
1744 # or pre-calculated metrics should be passed to format selection routines
1745 # as well.
1746 # We will pass a context object containing all necessary additional data
1747 # instead of just formats.
1748 # This fixes incorrect format selection issue (see
1749 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1750 incomplete_formats = (
1751 # All formats are video-only or
1752 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1753 # all formats are audio-only
1754 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1755
1756 ctx = {
1757 'formats': formats,
1758 'incomplete_formats': incomplete_formats,
1759 }
1760
1761 formats_to_download = list(format_selector(ctx))
1762 if not formats_to_download:
1763 raise ExtractorError('requested format not available',
1764 expected=True)
1765
1766 if download:
1767 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1768 if len(formats_to_download) > 1:
1769 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1770 for format in formats_to_download:
1771 new_info = dict(info_dict)
1772 new_info.update(format)
1773 self.process_info(new_info)
1774 # We update the info dict with the best quality format (backwards compatibility)
1775 info_dict.update(formats_to_download[-1])
1776 return info_dict
1777
1778 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1779 """Select the requested subtitles and their format"""
1780 available_subs = {}
1781 if normal_subtitles and self.params.get('writesubtitles'):
1782 available_subs.update(normal_subtitles)
1783 if automatic_captions and self.params.get('writeautomaticsub'):
1784 for lang, cap_info in automatic_captions.items():
1785 if lang not in available_subs:
1786 available_subs[lang] = cap_info
1787
1788 if (not self.params.get('writesubtitles') and not
1789 self.params.get('writeautomaticsub') or not
1790 available_subs):
1791 return None
1792
1793 if self.params.get('allsubtitles', False):
1794 requested_langs = available_subs.keys()
1795 else:
1796 if self.params.get('subtitleslangs', False):
1797 requested_langs = self.params.get('subtitleslangs')
1798 elif 'en' in available_subs:
1799 requested_langs = ['en']
1800 else:
1801 requested_langs = [list(available_subs.keys())[0]]
1802
1803 formats_query = self.params.get('subtitlesformat', 'best')
1804 formats_preference = formats_query.split('/') if formats_query else []
1805 subs = {}
1806 for lang in requested_langs:
1807 formats = available_subs.get(lang)
1808 if formats is None:
1809 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1810 continue
1811 for ext in formats_preference:
1812 if ext == 'best':
1813 f = formats[-1]
1814 break
1815 matches = list(filter(lambda f: f['ext'] == ext, formats))
1816 if matches:
1817 f = matches[-1]
1818 break
1819 else:
1820 f = formats[-1]
1821 self.report_warning(
1822 'No subtitle format found matching "%s" for language %s, '
1823 'using %s' % (formats_query, lang, f['ext']))
1824 subs[lang] = f
1825 return subs
1826
1827 def __forced_printings(self, info_dict, filename, incomplete):
1828 def print_mandatory(field):
1829 if (self.params.get('force%s' % field, False)
1830 and (not incomplete or info_dict.get(field) is not None)):
1831 self.to_stdout(info_dict[field])
1832
1833 def print_optional(field):
1834 if (self.params.get('force%s' % field, False)
1835 and info_dict.get(field) is not None):
1836 self.to_stdout(info_dict[field])
1837
1838 print_mandatory('title')
1839 print_mandatory('id')
1840 if self.params.get('forceurl', False) and not incomplete:
1841 if info_dict.get('requested_formats') is not None:
1842 for f in info_dict['requested_formats']:
1843 self.to_stdout(f['url'] + f.get('play_path', ''))
1844 else:
1845 # For RTMP URLs, also include the playpath
1846 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1847 print_optional('thumbnail')
1848 print_optional('description')
1849 if self.params.get('forcefilename', False) and filename is not None:
1850 self.to_stdout(filename)
1851 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1852 self.to_stdout(formatSeconds(info_dict['duration']))
1853 print_mandatory('format')
1854 if self.params.get('forcejson', False):
1855 self.to_stdout(json.dumps(info_dict))
1856
1857 def process_info(self, info_dict):
1858 """Process a single resolved IE result."""
1859
1860 assert info_dict.get('_type', 'video') == 'video'
1861
1862 max_downloads = self.params.get('max_downloads')
1863 if max_downloads is not None:
1864 if self._num_downloads >= int(max_downloads):
1865 raise MaxDownloadsReached()
1866
1867 # TODO: backward compatibility, to be removed
1868 info_dict['fulltitle'] = info_dict['title']
1869
1870 if 'format' not in info_dict:
1871 info_dict['format'] = info_dict['ext']
1872
1873 reason = self._match_entry(info_dict, incomplete=False)
1874 if reason is not None:
1875 self.to_screen('[download] ' + reason)
1876 return
1877
1878 self._num_downloads += 1
1879
1880 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1881
1882 # Forced printings
1883 self.__forced_printings(info_dict, filename, incomplete=False)
1884
1885 if self.params.get('simulate', False):
1886 if self.params.get('force_write_download_archive', False):
1887 self.record_download_archive(info_dict)
1888
1889 # Do nothing else if in simulate mode
1890 return
1891
1892 if filename is None:
1893 return
1894
1895 def ensure_dir_exists(path):
1896 try:
1897 dn = os.path.dirname(path)
1898 if dn and not os.path.exists(dn):
1899 os.makedirs(dn)
1900 return True
1901 except (OSError, IOError) as err:
1902 self.report_error('unable to create directory ' + error_to_compat_str(err))
1903 return False
1904
1905 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1906 return
1907
1908 if self.params.get('writedescription', False):
1909 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1910 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1911 self.to_screen('[info] Video description is already present')
1912 elif info_dict.get('description') is None:
1913 self.report_warning('There\'s no description to write.')
1914 else:
1915 try:
1916 self.to_screen('[info] Writing video description to: ' + descfn)
1917 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1918 descfile.write(info_dict['description'])
1919 except (OSError, IOError):
1920 self.report_error('Cannot write description file ' + descfn)
1921 return
1922
1923 if self.params.get('writeannotations', False):
1924 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1925 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
1926 self.to_screen('[info] Video annotations are already present')
1927 elif not info_dict.get('annotations'):
1928 self.report_warning('There are no annotations to write.')
1929 else:
1930 try:
1931 self.to_screen('[info] Writing video annotations to: ' + annofn)
1932 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1933 annofile.write(info_dict['annotations'])
1934 except (KeyError, TypeError):
1935 self.report_warning('There are no annotations to write.')
1936 except (OSError, IOError):
1937 self.report_error('Cannot write annotations file: ' + annofn)
1938 return
1939
1940 def dl(name, info, subtitle=False):
1941 fd = get_suitable_downloader(info, self.params)(self, self.params)
1942 for ph in self._progress_hooks:
1943 fd.add_progress_hook(ph)
1944 if self.params.get('verbose'):
1945 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1946 return fd.download(name, info, subtitle)
1947
1948 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1949 self.params.get('writeautomaticsub')])
1950
1951 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1952 # subtitles download errors are already managed as troubles in relevant IE
1953 # that way it will silently go on when used with unsupporting IE
1954 subtitles = info_dict['requested_subtitles']
1955 # ie = self.get_info_extractor(info_dict['extractor_key'])
1956 for sub_lang, sub_info in subtitles.items():
1957 sub_format = sub_info['ext']
1958 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1959 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
1960 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1961 else:
1962 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1963 if sub_info.get('data') is not None:
1964 try:
1965 # Use newline='' to prevent conversion of newline characters
1966 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1967 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1968 subfile.write(sub_info['data'])
1969 except (OSError, IOError):
1970 self.report_error('Cannot write subtitles file ' + sub_filename)
1971 return
1972 else:
1973 try:
1974 dl(sub_filename, sub_info, subtitle=True)
1975 '''
1976 if self.params.get('sleep_interval_subtitles', False):
1977 dl(sub_filename, sub_info)
1978 else:
1979 sub_data = ie._request_webpage(
1980 sub_info['url'], info_dict['id'], note=False).read()
1981 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1982 subfile.write(sub_data)
1983 '''
1984 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1985 self.report_warning('Unable to download subtitle for "%s": %s' %
1986 (sub_lang, error_to_compat_str(err)))
1987 continue
1988
1989 if self.params.get('skip_download', False):
1990 if self.params.get('convertsubtitles', False):
1991 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1992 filename_real_ext = os.path.splitext(filename)[1][1:]
1993 filename_wo_ext = (
1994 os.path.splitext(filename)[0]
1995 if filename_real_ext == info_dict['ext']
1996 else filename)
1997 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1998 if subconv.available:
1999 info_dict.setdefault('__postprocessors', [])
2000 # info_dict['__postprocessors'].append(subconv)
2001 if os.path.exists(encodeFilename(afilename)):
2002 self.to_screen(
2003 '[download] %s has already been downloaded and '
2004 'converted' % afilename)
2005 else:
2006 try:
2007 self.post_process(filename, info_dict)
2008 except (PostProcessingError) as err:
2009 self.report_error('postprocessing: %s' % str(err))
2010 return
2011
2012 if self.params.get('writeinfojson', False):
2013 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2014 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2015 self.to_screen('[info] Video description metadata is already present')
2016 else:
2017 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2018 try:
2019 write_json_file(self.filter_requested_info(info_dict), infofn)
2020 except (OSError, IOError):
2021 self.report_error('Cannot write metadata to JSON file ' + infofn)
2022 return
2023
2024 self._write_thumbnails(info_dict, filename)
2025
2026 # Write internet shortcut files
2027 url_link = webloc_link = desktop_link = False
2028 if self.params.get('writelink', False):
2029 if sys.platform == "darwin": # macOS.
2030 webloc_link = True
2031 elif sys.platform.startswith("linux"):
2032 desktop_link = True
2033 else: # if sys.platform in ['win32', 'cygwin']:
2034 url_link = True
2035 if self.params.get('writeurllink', False):
2036 url_link = True
2037 if self.params.get('writewebloclink', False):
2038 webloc_link = True
2039 if self.params.get('writedesktoplink', False):
2040 desktop_link = True
2041
2042 if url_link or webloc_link or desktop_link:
2043 if 'webpage_url' not in info_dict:
2044 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2045 return
2046 ascii_url = iri_to_uri(info_dict['webpage_url'])
2047
2048 def _write_link_file(extension, template, newline, embed_filename):
2049 linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2050 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2051 self.to_screen('[info] Internet shortcut is already present')
2052 else:
2053 try:
2054 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2055 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2056 template_vars = {'url': ascii_url}
2057 if embed_filename:
2058 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2059 linkfile.write(template % template_vars)
2060 except (OSError, IOError):
2061 self.report_error('Cannot write internet shortcut ' + linkfn)
2062 return False
2063 return True
2064
2065 if url_link:
2066 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2067 return
2068 if webloc_link:
2069 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2070 return
2071 if desktop_link:
2072 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2073 return
2074
2075 # Download
2076 must_record_download_archive = False
2077 if not self.params.get('skip_download', False):
2078 try:
2079 if info_dict.get('requested_formats') is not None:
2080 downloaded = []
2081 success = True
2082 merger = FFmpegMergerPP(self)
2083 if not merger.available:
2084 postprocessors = []
2085 self.report_warning('You have requested multiple '
2086 'formats but ffmpeg or avconv are not installed.'
2087 ' The formats won\'t be merged.')
2088 else:
2089 postprocessors = [merger]
2090
2091 def compatible_formats(formats):
2092 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2093 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2094 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2095 if len(video_formats) > 2 or len(audio_formats) > 2:
2096 return False
2097
2098 # Check extension
2099 exts = set(format.get('ext') for format in formats)
2100 COMPATIBLE_EXTS = (
2101 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2102 set(('webm',)),
2103 )
2104 for ext_sets in COMPATIBLE_EXTS:
2105 if ext_sets.issuperset(exts):
2106 return True
2107 # TODO: Check acodec/vcodec
2108 return False
2109
2110 filename_real_ext = os.path.splitext(filename)[1][1:]
2111 filename_wo_ext = (
2112 os.path.splitext(filename)[0]
2113 if filename_real_ext == info_dict['ext']
2114 else filename)
2115 requested_formats = info_dict['requested_formats']
2116 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2117 info_dict['ext'] = 'mkv'
2118 self.report_warning(
2119 'Requested formats are incompatible for merge and will be merged into mkv.')
2120 # Ensure filename always has a correct extension for successful merge
2121 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2122 file_exists = os.path.exists(encodeFilename(filename))
2123 if not self.params.get('overwrites', False) and file_exists:
2124 self.to_screen(
2125 '[download] %s has already been downloaded and '
2126 'merged' % filename)
2127 else:
2128 if file_exists:
2129 self.report_file_delete(filename)
2130 os.remove(encodeFilename(filename))
2131 for f in requested_formats:
2132 new_info = dict(info_dict)
2133 new_info.update(f)
2134 fname = prepend_extension(
2135 self.prepare_filename(new_info),
2136 'f%s' % f['format_id'], new_info['ext'])
2137 if not ensure_dir_exists(fname):
2138 return
2139 downloaded.append(fname)
2140 partial_success, real_download = dl(fname, new_info)
2141 success = success and partial_success
2142 info_dict['__postprocessors'] = postprocessors
2143 info_dict['__files_to_merge'] = downloaded
2144 # Even if there were no downloads, it is being merged only now
2145 info_dict['__real_download'] = True
2146 else:
2147 # Delete existing file with --yes-overwrites
2148 if self.params.get('overwrites', False):
2149 if os.path.exists(encodeFilename(filename)):
2150 self.report_file_delete(filename)
2151 os.remove(encodeFilename(filename))
2152 # Just a single file
2153 success, real_download = dl(filename, info_dict)
2154 info_dict['__real_download'] = real_download
2155 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2156 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2157 return
2158 except (OSError, IOError) as err:
2159 raise UnavailableVideoError(err)
2160 except (ContentTooShortError, ) as err:
2161 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2162 return
2163
2164 if success and filename != '-':
2165 # Fixup content
2166 fixup_policy = self.params.get('fixup')
2167 if fixup_policy is None:
2168 fixup_policy = 'detect_or_warn'
2169
2170 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2171
2172 stretched_ratio = info_dict.get('stretched_ratio')
2173 if stretched_ratio is not None and stretched_ratio != 1:
2174 if fixup_policy == 'warn':
2175 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2176 info_dict['id'], stretched_ratio))
2177 elif fixup_policy == 'detect_or_warn':
2178 stretched_pp = FFmpegFixupStretchedPP(self)
2179 if stretched_pp.available:
2180 info_dict.setdefault('__postprocessors', [])
2181 info_dict['__postprocessors'].append(stretched_pp)
2182 else:
2183 self.report_warning(
2184 '%s: Non-uniform pixel ratio (%s). %s'
2185 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2186 else:
2187 assert fixup_policy in ('ignore', 'never')
2188
2189 if (info_dict.get('requested_formats') is None
2190 and info_dict.get('container') == 'm4a_dash'):
2191 if fixup_policy == 'warn':
2192 self.report_warning(
2193 '%s: writing DASH m4a. '
2194 'Only some players support this container.'
2195 % info_dict['id'])
2196 elif fixup_policy == 'detect_or_warn':
2197 fixup_pp = FFmpegFixupM4aPP(self)
2198 if fixup_pp.available:
2199 info_dict.setdefault('__postprocessors', [])
2200 info_dict['__postprocessors'].append(fixup_pp)
2201 else:
2202 self.report_warning(
2203 '%s: writing DASH m4a. '
2204 'Only some players support this container. %s'
2205 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2206 else:
2207 assert fixup_policy in ('ignore', 'never')
2208
2209 if (info_dict.get('protocol') == 'm3u8_native'
2210 or info_dict.get('protocol') == 'm3u8'
2211 and self.params.get('hls_prefer_native')):
2212 if fixup_policy == 'warn':
2213 self.report_warning('%s: malformed AAC bitstream detected.' % (
2214 info_dict['id']))
2215 elif fixup_policy == 'detect_or_warn':
2216 fixup_pp = FFmpegFixupM3u8PP(self)
2217 if fixup_pp.available:
2218 info_dict.setdefault('__postprocessors', [])
2219 info_dict['__postprocessors'].append(fixup_pp)
2220 else:
2221 self.report_warning(
2222 '%s: malformed AAC bitstream detected. %s'
2223 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2224 else:
2225 assert fixup_policy in ('ignore', 'never')
2226
2227 try:
2228 self.post_process(filename, info_dict)
2229 except (PostProcessingError) as err:
2230 self.report_error('postprocessing: %s' % str(err))
2231 return
2232 try:
2233 for ph in self._post_hooks:
2234 ph(filename)
2235 except Exception as err:
2236 self.report_error('post hooks: %s' % str(err))
2237 return
2238 must_record_download_archive = True
2239
2240 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2241 self.record_download_archive(info_dict)
2242 max_downloads = self.params.get('max_downloads')
2243 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2244 raise MaxDownloadsReached()
2245
2246 def download(self, url_list):
2247 """Download a given list of URLs."""
2248 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2249 if (len(url_list) > 1
2250 and outtmpl != '-'
2251 and '%' not in outtmpl
2252 and self.params.get('max_downloads') != 1):
2253 raise SameFileError(outtmpl)
2254
2255 for url in url_list:
2256 try:
2257 # It also downloads the videos
2258 res = self.extract_info(
2259 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2260 except UnavailableVideoError:
2261 self.report_error('unable to download video')
2262 except MaxDownloadsReached:
2263 self.to_screen('[info] Maximum number of downloaded files reached.')
2264 raise
2265 else:
2266 if self.params.get('dump_single_json', False):
2267 self.to_stdout(json.dumps(res))
2268
2269 return self._download_retcode
2270
2271 def download_with_info_file(self, info_filename):
2272 with contextlib.closing(fileinput.FileInput(
2273 [info_filename], mode='r',
2274 openhook=fileinput.hook_encoded('utf-8'))) as f:
2275 # FileInput doesn't have a read method, we can't call json.load
2276 info = self.filter_requested_info(json.loads('\n'.join(f)))
2277 try:
2278 self.process_ie_result(info, download=True)
2279 except DownloadError:
2280 webpage_url = info.get('webpage_url')
2281 if webpage_url is not None:
2282 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2283 return self.download([webpage_url])
2284 else:
2285 raise
2286 return self._download_retcode
2287
2288 @staticmethod
2289 def filter_requested_info(info_dict):
2290 return dict(
2291 (k, v) for k, v in info_dict.items()
2292 if k not in ['requested_formats', 'requested_subtitles'])
2293
2294 def post_process(self, filename, ie_info):
2295 """Run all the postprocessors on the given file."""
2296 info = dict(ie_info)
2297 info['filepath'] = filename
2298 pps_chain = []
2299 if ie_info.get('__postprocessors') is not None:
2300 pps_chain.extend(ie_info['__postprocessors'])
2301 pps_chain.extend(self._pps)
2302 for pp in pps_chain:
2303 files_to_delete = []
2304 try:
2305 files_to_delete, info = pp.run(info)
2306 except PostProcessingError as e:
2307 self.report_error(e.msg)
2308 if files_to_delete and not self.params.get('keepvideo', False):
2309 for old_filename in set(files_to_delete):
2310 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2311 try:
2312 os.remove(encodeFilename(old_filename))
2313 except (IOError, OSError):
2314 self.report_warning('Unable to remove downloaded original file')
2315
2316 def _make_archive_id(self, info_dict):
2317 video_id = info_dict.get('id')
2318 if not video_id:
2319 return
2320 # Future-proof against any change in case
2321 # and backwards compatibility with prior versions
2322 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2323 if extractor is None:
2324 url = str_or_none(info_dict.get('url'))
2325 if not url:
2326 return
2327 # Try to find matching extractor for the URL and take its ie_key
2328 for ie in self._ies:
2329 if ie.suitable(url):
2330 extractor = ie.ie_key()
2331 break
2332 else:
2333 return
2334 return extractor.lower() + ' ' + video_id
2335
2336 def in_download_archive(self, info_dict):
2337 fn = self.params.get('download_archive')
2338 if fn is None:
2339 return False
2340
2341 vid_id = self._make_archive_id(info_dict)
2342 if not vid_id:
2343 return False # Incomplete video information
2344
2345 return vid_id in self.archive
2346
2347 def record_download_archive(self, info_dict):
2348 fn = self.params.get('download_archive')
2349 if fn is None:
2350 return
2351 vid_id = self._make_archive_id(info_dict)
2352 assert vid_id
2353 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2354 archive_file.write(vid_id + '\n')
2355 self.archive.add(vid_id)
2356
2357 @staticmethod
2358 def format_resolution(format, default='unknown'):
2359 if format.get('vcodec') == 'none':
2360 return 'audio only'
2361 if format.get('resolution') is not None:
2362 return format['resolution']
2363 if format.get('height') is not None:
2364 if format.get('width') is not None:
2365 res = '%sx%s' % (format['width'], format['height'])
2366 else:
2367 res = '%sp' % format['height']
2368 elif format.get('width') is not None:
2369 res = '%dx?' % format['width']
2370 else:
2371 res = default
2372 return res
2373
2374 def _format_note(self, fdict):
2375 res = ''
2376 if fdict.get('ext') in ['f4f', 'f4m']:
2377 res += '(unsupported) '
2378 if fdict.get('language'):
2379 if res:
2380 res += ' '
2381 res += '[%s] ' % fdict['language']
2382 if fdict.get('format_note') is not None:
2383 res += fdict['format_note'] + ' '
2384 if fdict.get('tbr') is not None:
2385 res += '%4dk ' % fdict['tbr']
2386 if fdict.get('container') is not None:
2387 if res:
2388 res += ', '
2389 res += '%s container' % fdict['container']
2390 if (fdict.get('vcodec') is not None
2391 and fdict.get('vcodec') != 'none'):
2392 if res:
2393 res += ', '
2394 res += fdict['vcodec']
2395 if fdict.get('vbr') is not None:
2396 res += '@'
2397 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2398 res += 'video@'
2399 if fdict.get('vbr') is not None:
2400 res += '%4dk' % fdict['vbr']
2401 if fdict.get('fps') is not None:
2402 if res:
2403 res += ', '
2404 res += '%sfps' % fdict['fps']
2405 if fdict.get('acodec') is not None:
2406 if res:
2407 res += ', '
2408 if fdict['acodec'] == 'none':
2409 res += 'video only'
2410 else:
2411 res += '%-5s' % fdict['acodec']
2412 elif fdict.get('abr') is not None:
2413 if res:
2414 res += ', '
2415 res += 'audio'
2416 if fdict.get('abr') is not None:
2417 res += '@%3dk' % fdict['abr']
2418 if fdict.get('asr') is not None:
2419 res += ' (%5dHz)' % fdict['asr']
2420 if fdict.get('filesize') is not None:
2421 if res:
2422 res += ', '
2423 res += format_bytes(fdict['filesize'])
2424 elif fdict.get('filesize_approx') is not None:
2425 if res:
2426 res += ', '
2427 res += '~' + format_bytes(fdict['filesize_approx'])
2428 return res
2429
2430 def _format_note_table(self, f):
2431 def join_fields(*vargs):
2432 return ', '.join((val for val in vargs if val != ''))
2433
2434 return join_fields(
2435 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2436 format_field(f, 'language', '[%s]'),
2437 format_field(f, 'format_note'),
2438 format_field(f, 'container', ignore=(None, f.get('ext'))),
2439 format_field(f, 'asr', '%5dHz'))
2440
2441 def list_formats(self, info_dict):
2442 formats = info_dict.get('formats', [info_dict])
2443 new_format = self.params.get('listformats_table', False)
2444 if new_format:
2445 table = [
2446 [
2447 format_field(f, 'format_id'),
2448 format_field(f, 'ext'),
2449 self.format_resolution(f),
2450 format_field(f, 'fps', '%d'),
2451 '|',
2452 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2453 format_field(f, 'tbr', '%4dk'),
2454 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2455 '|',
2456 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2457 format_field(f, 'vbr', '%4dk'),
2458 format_field(f, 'acodec', default='unknown').replace('none', ''),
2459 format_field(f, 'abr', '%3dk'),
2460 format_field(f, 'asr', '%5dHz'),
2461 self._format_note_table(f)]
2462 for f in formats
2463 if f.get('preference') is None or f['preference'] >= -1000]
2464 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2465 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2466 else:
2467 table = [
2468 [
2469 format_field(f, 'format_id'),
2470 format_field(f, 'ext'),
2471 self.format_resolution(f),
2472 self._format_note(f)]
2473 for f in formats
2474 if f.get('preference') is None or f['preference'] >= -1000]
2475 header_line = ['format code', 'extension', 'resolution', 'note']
2476
2477 # if len(formats) > 1:
2478 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2479 self.to_screen(
2480 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2481 header_line,
2482 table,
2483 delim=new_format,
2484 extraGap=(0 if new_format else 1),
2485 hideEmpty=new_format)))
2486
2487 def list_thumbnails(self, info_dict):
2488 thumbnails = info_dict.get('thumbnails')
2489 if not thumbnails:
2490 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2491 return
2492
2493 self.to_screen(
2494 '[info] Thumbnails for %s:' % info_dict['id'])
2495 self.to_screen(render_table(
2496 ['ID', 'width', 'height', 'URL'],
2497 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2498
2499 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2500 if not subtitles:
2501 self.to_screen('%s has no %s' % (video_id, name))
2502 return
2503 self.to_screen(
2504 'Available %s for %s:' % (name, video_id))
2505 self.to_screen(render_table(
2506 ['Language', 'formats'],
2507 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2508 for lang, formats in subtitles.items()]))
2509
2510 def urlopen(self, req):
2511 """ Start an HTTP download """
2512 if isinstance(req, compat_basestring):
2513 req = sanitized_Request(req)
2514 return self._opener.open(req, timeout=self._socket_timeout)
2515
2516 def print_debug_header(self):
2517 if not self.params.get('verbose'):
2518 return
2519
2520 if type('') is not compat_str:
2521 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2522 self.report_warning(
2523 'Your Python is broken! Update to a newer and supported version')
2524
2525 stdout_encoding = getattr(
2526 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2527 encoding_str = (
2528 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2529 locale.getpreferredencoding(),
2530 sys.getfilesystemencoding(),
2531 stdout_encoding,
2532 self.get_encoding()))
2533 write_string(encoding_str, encoding=None)
2534
2535 self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
2536 if _LAZY_LOADER:
2537 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2538 try:
2539 sp = subprocess.Popen(
2540 ['git', 'rev-parse', '--short', 'HEAD'],
2541 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2542 cwd=os.path.dirname(os.path.abspath(__file__)))
2543 out, err = process_communicate_or_kill(sp)
2544 out = out.decode().strip()
2545 if re.match('[0-9a-f]+', out):
2546 self._write_string('[debug] Git HEAD: ' + out + '\n')
2547 except Exception:
2548 try:
2549 sys.exc_clear()
2550 except Exception:
2551 pass
2552
2553 def python_implementation():
2554 impl_name = platform.python_implementation()
2555 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2556 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2557 return impl_name
2558
2559 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2560 platform.python_version(), python_implementation(),
2561 platform_name()))
2562
2563 exe_versions = FFmpegPostProcessor.get_versions(self)
2564 exe_versions['rtmpdump'] = rtmpdump_version()
2565 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2566 exe_str = ', '.join(
2567 '%s %s' % (exe, v)
2568 for exe, v in sorted(exe_versions.items())
2569 if v
2570 )
2571 if not exe_str:
2572 exe_str = 'none'
2573 self._write_string('[debug] exe versions: %s\n' % exe_str)
2574
2575 proxy_map = {}
2576 for handler in self._opener.handlers:
2577 if hasattr(handler, 'proxies'):
2578 proxy_map.update(handler.proxies)
2579 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2580
2581 if self.params.get('call_home', False):
2582 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2583 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2584 return
2585 latest_version = self.urlopen(
2586 'https://yt-dl.org/latest/version').read().decode('utf-8')
2587 if version_tuple(latest_version) > version_tuple(__version__):
2588 self.report_warning(
2589 'You are using an outdated version (newest version: %s)! '
2590 'See https://yt-dl.org/update if you need help updating.' %
2591 latest_version)
2592
2593 def _setup_opener(self):
2594 timeout_val = self.params.get('socket_timeout')
2595 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2596
2597 opts_cookiefile = self.params.get('cookiefile')
2598 opts_proxy = self.params.get('proxy')
2599
2600 if opts_cookiefile is None:
2601 self.cookiejar = compat_cookiejar.CookieJar()
2602 else:
2603 opts_cookiefile = expand_path(opts_cookiefile)
2604 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2605 if os.access(opts_cookiefile, os.R_OK):
2606 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2607
2608 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2609 if opts_proxy is not None:
2610 if opts_proxy == '':
2611 proxies = {}
2612 else:
2613 proxies = {'http': opts_proxy, 'https': opts_proxy}
2614 else:
2615 proxies = compat_urllib_request.getproxies()
2616 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2617 if 'http' in proxies and 'https' not in proxies:
2618 proxies['https'] = proxies['http']
2619 proxy_handler = PerRequestProxyHandler(proxies)
2620
2621 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2622 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2623 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2624 redirect_handler = YoutubeDLRedirectHandler()
2625 data_handler = compat_urllib_request_DataHandler()
2626
2627 # When passing our own FileHandler instance, build_opener won't add the
2628 # default FileHandler and allows us to disable the file protocol, which
2629 # can be used for malicious purposes (see
2630 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2631 file_handler = compat_urllib_request.FileHandler()
2632
2633 def file_open(*args, **kwargs):
2634 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2635 file_handler.file_open = file_open
2636
2637 opener = compat_urllib_request.build_opener(
2638 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2639
2640 # Delete the default user-agent header, which would otherwise apply in
2641 # cases where our custom HTTP handler doesn't come into play
2642 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2643 opener.addheaders = []
2644 self._opener = opener
2645
2646 def encode(self, s):
2647 if isinstance(s, bytes):
2648 return s # Already encoded
2649
2650 try:
2651 return s.encode(self.get_encoding())
2652 except UnicodeEncodeError as err:
2653 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2654 raise
2655
2656 def get_encoding(self):
2657 encoding = self.params.get('encoding')
2658 if encoding is None:
2659 encoding = preferredencoding()
2660 return encoding
2661
2662 def _write_thumbnails(self, info_dict, filename):
2663 if self.params.get('writethumbnail', False):
2664 thumbnails = info_dict.get('thumbnails')
2665 if thumbnails:
2666 thumbnails = [thumbnails[-1]]
2667 elif self.params.get('write_all_thumbnails', False):
2668 thumbnails = info_dict.get('thumbnails')
2669 else:
2670 return
2671
2672 if not thumbnails:
2673 # No thumbnails present, so return immediately
2674 return
2675
2676 for t in thumbnails:
2677 thumb_ext = determine_ext(t['url'], 'jpg')
2678 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2679 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2680 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2681
2682 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2683 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2684 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2685 else:
2686 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2687 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2688 try:
2689 uf = self.urlopen(t['url'])
2690 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2691 shutil.copyfileobj(uf, thumbf)
2692 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2693 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2694 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2695 self.report_warning('Unable to download thumbnail "%s": %s' %
2696 (t['url'], error_to_compat_str(err)))