]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
697b089a14a02f049df98a10f0758d69c6fc5d3d
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30 from zipimport import zipimporter
31
32 from .compat import (
33 compat_basestring,
34 compat_get_terminal_size,
35 compat_kwargs,
36 compat_numeric_types,
37 compat_os_name,
38 compat_shlex_quote,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .cookies import load_cookies
46 from .utils import (
47 age_restricted,
48 args_to_str,
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
52 DEFAULT_OUTTMPL,
53 determine_ext,
54 determine_protocol,
55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
58 DownloadError,
59 encode_compat_str,
60 encodeFilename,
61 EntryNotInPlaylist,
62 error_to_compat_str,
63 ExistingVideoReached,
64 expand_path,
65 ExtractorError,
66 float_or_none,
67 format_bytes,
68 format_field,
69 STR_FORMAT_RE_TMPL,
70 STR_FORMAT_TYPES,
71 formatSeconds,
72 GeoRestrictedError,
73 HEADRequest,
74 int_or_none,
75 iri_to_uri,
76 ISO3166Utils,
77 LazyList,
78 locked_file,
79 make_dir,
80 make_HTTPS_handler,
81 MaxDownloadsReached,
82 network_exceptions,
83 orderedSet,
84 OUTTMPL_TYPES,
85 PagedList,
86 parse_filesize,
87 PerRequestProxyHandler,
88 platform_name,
89 PostProcessingError,
90 preferredencoding,
91 prepend_extension,
92 process_communicate_or_kill,
93 register_socks_protocols,
94 RejectedVideoReached,
95 render_table,
96 replace_extension,
97 SameFileError,
98 sanitize_filename,
99 sanitize_path,
100 sanitize_url,
101 sanitized_Request,
102 std_headers,
103 str_or_none,
104 strftime_or_none,
105 subtitles_filename,
106 ThrottledDownload,
107 to_high_limit_path,
108 traverse_obj,
109 try_get,
110 UnavailableVideoError,
111 url_basename,
112 variadic,
113 version_tuple,
114 write_json_file,
115 write_string,
116 YoutubeDLCookieProcessor,
117 YoutubeDLHandler,
118 YoutubeDLRedirectHandler,
119 )
120 from .cache import Cache
121 from .extractor import (
122 gen_extractor_classes,
123 get_info_extractor,
124 _LAZY_LOADER,
125 _PLUGIN_CLASSES
126 )
127 from .extractor.openload import PhantomJSwrapper
128 from .downloader import (
129 get_suitable_downloader,
130 shorten_protocol_name
131 )
132 from .downloader.rtmp import rtmpdump_version
133 from .postprocessor import (
134 get_postprocessor,
135 FFmpegFixupDurationPP,
136 FFmpegFixupM3u8PP,
137 FFmpegFixupM4aPP,
138 FFmpegFixupStretchedPP,
139 FFmpegFixupTimestampPP,
140 FFmpegMergerPP,
141 FFmpegPostProcessor,
142 MoveFilesAfterDownloadPP,
143 )
144 from .version import __version__
145
146 if compat_os_name == 'nt':
147 import ctypes
148
149
150 class YoutubeDL(object):
151 """YoutubeDL class.
152
153 YoutubeDL objects are the ones responsible of downloading the
154 actual video file and writing it to disk if the user has requested
155 it, among some other tasks. In most cases there should be one per
156 program. As, given a video URL, the downloader doesn't know how to
157 extract all the needed information, task that InfoExtractors do, it
158 has to pass the URL to one of them.
159
160 For this, YoutubeDL objects have a method that allows
161 InfoExtractors to be registered in a given order. When it is passed
162 a URL, the YoutubeDL object handles it to the first InfoExtractor it
163 finds that reports being able to handle it. The InfoExtractor extracts
164 all the information about the video or videos the URL refers to, and
165 YoutubeDL process the extracted information, possibly using a File
166 Downloader to download the video.
167
168 YoutubeDL objects accept a lot of parameters. In order not to saturate
169 the object constructor with arguments, it receives a dictionary of
170 options instead. These options are available through the params
171 attribute for the InfoExtractors to use. The YoutubeDL also
172 registers itself as the downloader in charge for the InfoExtractors
173 that are added to it, so this is a "mutual registration".
174
175 Available options:
176
177 username: Username for authentication purposes.
178 password: Password for authentication purposes.
179 videopassword: Password for accessing a video.
180 ap_mso: Adobe Pass multiple-system operator identifier.
181 ap_username: Multiple-system operator account username.
182 ap_password: Multiple-system operator account password.
183 usenetrc: Use netrc for authentication instead.
184 verbose: Print additional info to stdout.
185 quiet: Do not print messages to stdout.
186 no_warnings: Do not print out anything for warnings.
187 forceprint: A list of templates to force print
188 forceurl: Force printing final URL. (Deprecated)
189 forcetitle: Force printing title. (Deprecated)
190 forceid: Force printing ID. (Deprecated)
191 forcethumbnail: Force printing thumbnail URL. (Deprecated)
192 forcedescription: Force printing description. (Deprecated)
193 forcefilename: Force printing final filename. (Deprecated)
194 forceduration: Force printing duration. (Deprecated)
195 forcejson: Force printing info_dict as JSON.
196 dump_single_json: Force printing the info_dict of the whole playlist
197 (or video) as a single JSON line.
198 force_write_download_archive: Force writing download archive regardless
199 of 'skip_download' or 'simulate'.
200 simulate: Do not download the video files.
201 format: Video format code. see "FORMAT SELECTION" for more details.
202 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
203 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
204 extracting metadata even if the video is not actually
205 available for download (experimental)
206 format_sort: How to sort the video formats. see "Sorting Formats"
207 for more details.
208 format_sort_force: Force the given format_sort. see "Sorting Formats"
209 for more details.
210 allow_multiple_video_streams: Allow multiple video streams to be merged
211 into a single file
212 allow_multiple_audio_streams: Allow multiple audio streams to be merged
213 into a single file
214 check_formats Whether to test if the formats are downloadable.
215 Can be True (check all), False (check none)
216 or None (check only if requested by extractor)
217 paths: Dictionary of output paths. The allowed keys are 'home'
218 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
219 outtmpl: Dictionary of templates for output names. Allowed keys
220 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
221 A string a also accepted for backward compatibility
222 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
223 restrictfilenames: Do not allow "&" and spaces in file names
224 trim_file_name: Limit length of filename (extension excluded)
225 windowsfilenames: Force the filenames to be windows compatible
226 ignoreerrors: Do not stop on download errors
227 (Default True when running yt-dlp,
228 but False when directly accessing YoutubeDL class)
229 skip_playlist_after_errors: Number of allowed failures until the rest of
230 the playlist is skipped
231 force_generic_extractor: Force downloader to use the generic extractor
232 overwrites: Overwrite all video and metadata files if True,
233 overwrite only non-video files if None
234 and don't overwrite any file if False
235 playliststart: Playlist item to start at.
236 playlistend: Playlist item to end at.
237 playlist_items: Specific indices of playlist to download.
238 playlistreverse: Download playlist items in reverse order.
239 playlistrandom: Download playlist items in random order.
240 matchtitle: Download only matching titles.
241 rejecttitle: Reject downloads for matching titles.
242 logger: Log messages to a logging.Logger instance.
243 logtostderr: Log messages to stderr instead of stdout.
244 writedescription: Write the video description to a .description file
245 writeinfojson: Write the video description to a .info.json file
246 clean_infojson: Remove private fields from the infojson
247 writecomments: Extract video comments. This will not be written to disk
248 unless writeinfojson is also given
249 writeannotations: Write the video annotations to a .annotations.xml file
250 writethumbnail: Write the thumbnail image to a file
251 allow_playlist_files: Whether to write playlists' description, infojson etc
252 also to disk when using the 'write*' options
253 write_all_thumbnails: Write all thumbnail formats to files
254 writelink: Write an internet shortcut file, depending on the
255 current platform (.url/.webloc/.desktop)
256 writeurllink: Write a Windows internet shortcut file (.url)
257 writewebloclink: Write a macOS internet shortcut file (.webloc)
258 writedesktoplink: Write a Linux internet shortcut file (.desktop)
259 writesubtitles: Write the video subtitles to a file
260 writeautomaticsub: Write the automatically generated subtitles to a file
261 allsubtitles: Deprecated - Use subtitleslangs = ['all']
262 Downloads all the subtitles of the video
263 (requires writesubtitles or writeautomaticsub)
264 listsubtitles: Lists all available subtitles for the video
265 subtitlesformat: The format code for subtitles
266 subtitleslangs: List of languages of the subtitles to download (can be regex).
267 The list may contain "all" to refer to all the available
268 subtitles. The language can be prefixed with a "-" to
269 exclude it from the requested languages. Eg: ['all', '-live_chat']
270 keepvideo: Keep the video file after post-processing
271 daterange: A DateRange object, download only if the upload_date is in the range.
272 skip_download: Skip the actual download of the video file
273 cachedir: Location of the cache files in the filesystem.
274 False to disable filesystem cache.
275 noplaylist: Download single video instead of a playlist if in doubt.
276 age_limit: An integer representing the user's age in years.
277 Unsuitable videos for the given age are skipped.
278 min_views: An integer representing the minimum view count the video
279 must have in order to not be skipped.
280 Videos without view count information are always
281 downloaded. None for no limit.
282 max_views: An integer representing the maximum view count.
283 Videos that are more popular than that are not
284 downloaded.
285 Videos without view count information are always
286 downloaded. None for no limit.
287 download_archive: File name of a file where all downloads are recorded.
288 Videos already present in the file are not downloaded
289 again.
290 break_on_existing: Stop the download process after attempting to download a
291 file that is in the archive.
292 break_on_reject: Stop the download process when encountering a video that
293 has been filtered out.
294 cookiefile: File name where cookies should be read from and dumped to
295 cookiesfrombrowser: A tuple containing the name of the browser and the profile
296 name/path from where cookies are loaded.
297 Eg: ('chrome', ) or (vivaldi, 'default')
298 nocheckcertificate:Do not verify SSL certificates
299 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
300 At the moment, this is only supported by YouTube.
301 proxy: URL of the proxy server to use
302 geo_verification_proxy: URL of the proxy to use for IP address verification
303 on geo-restricted sites.
304 socket_timeout: Time to wait for unresponsive hosts, in seconds
305 bidi_workaround: Work around buggy terminals without bidirectional text
306 support, using fridibi
307 debug_printtraffic:Print out sent and received HTTP traffic
308 include_ads: Download ads as well
309 default_search: Prepend this string if an input url is not valid.
310 'auto' for elaborate guessing
311 encoding: Use this encoding instead of the system-specified.
312 extract_flat: Do not resolve URLs, return the immediate result.
313 Pass in 'in_playlist' to only show this behavior for
314 playlist items.
315 postprocessors: A list of dictionaries, each with an entry
316 * key: The name of the postprocessor. See
317 yt_dlp/postprocessor/__init__.py for a list.
318 * when: When to run the postprocessor. Can be one of
319 pre_process|before_dl|post_process|after_move.
320 Assumed to be 'post_process' if not given
321 post_hooks: A list of functions that get called as the final step
322 for each video file, after all postprocessors have been
323 called. The filename will be passed as the only argument.
324 progress_hooks: A list of functions that get called on download
325 progress, with a dictionary with the entries
326 * status: One of "downloading", "error", or "finished".
327 Check this first and ignore unknown values.
328 * info_dict: The extracted info_dict
329
330 If status is one of "downloading", or "finished", the
331 following properties may also be present:
332 * filename: The final filename (always present)
333 * tmpfilename: The filename we're currently writing to
334 * downloaded_bytes: Bytes on disk
335 * total_bytes: Size of the whole file, None if unknown
336 * total_bytes_estimate: Guess of the eventual file size,
337 None if unavailable.
338 * elapsed: The number of seconds since download started.
339 * eta: The estimated time in seconds, None if unknown
340 * speed: The download speed in bytes/second, None if
341 unknown
342 * fragment_index: The counter of the currently
343 downloaded video fragment.
344 * fragment_count: The number of fragments (= individual
345 files that will be merged)
346
347 Progress hooks are guaranteed to be called at least once
348 (with status "finished") if the download is successful.
349 merge_output_format: Extension to use when merging formats.
350 final_ext: Expected final extension; used to detect when the file was
351 already downloaded and converted. "merge_output_format" is
352 replaced by this extension when given
353 fixup: Automatically correct known faults of the file.
354 One of:
355 - "never": do nothing
356 - "warn": only emit a warning
357 - "detect_or_warn": check whether we can do anything
358 about it, warn otherwise (default)
359 source_address: Client-side IP address to bind to.
360 call_home: Boolean, true iff we are allowed to contact the
361 yt-dlp servers for debugging. (BROKEN)
362 sleep_interval_requests: Number of seconds to sleep between requests
363 during extraction
364 sleep_interval: Number of seconds to sleep before each download when
365 used alone or a lower bound of a range for randomized
366 sleep before each download (minimum possible number
367 of seconds to sleep) when used along with
368 max_sleep_interval.
369 max_sleep_interval:Upper bound of a range for randomized sleep before each
370 download (maximum possible number of seconds to sleep).
371 Must only be used along with sleep_interval.
372 Actual sleep time will be a random float from range
373 [sleep_interval; max_sleep_interval].
374 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
375 listformats: Print an overview of available video formats and exit.
376 list_thumbnails: Print a table of all thumbnails and exit.
377 match_filter: A function that gets called with the info_dict of
378 every video.
379 If it returns a message, the video is ignored.
380 If it returns None, the video is downloaded.
381 match_filter_func in utils.py is one example for this.
382 no_color: Do not emit color codes in output.
383 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
384 HTTP header
385 geo_bypass_country:
386 Two-letter ISO 3166-2 country code that will be used for
387 explicit geographic restriction bypassing via faking
388 X-Forwarded-For HTTP header
389 geo_bypass_ip_block:
390 IP range in CIDR notation that will be used similarly to
391 geo_bypass_country
392
393 The following options determine which downloader is picked:
394 external_downloader: A dictionary of protocol keys and the executable of the
395 external downloader to use for it. The allowed protocols
396 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
397 Set the value to 'native' to use the native downloader
398 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
399 or {'m3u8': 'ffmpeg'} instead.
400 Use the native HLS downloader instead of ffmpeg/avconv
401 if True, otherwise use ffmpeg/avconv if False, otherwise
402 use downloader suggested by extractor if None.
403 compat_opts: Compatibility options. See "Differences in default behavior".
404 The following options do not work when used through the API:
405 filename, abort-on-error, multistreams, no-live-chat,
406 no-clean-infojson, no-playlist-metafiles.
407 Refer __init__.py for their implementation
408
409 The following parameters are not used by YoutubeDL itself, they are used by
410 the downloader (see yt_dlp/downloader/common.py):
411 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
412 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
413 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
414
415 The following options are used by the post processors:
416 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
417 otherwise prefer ffmpeg. (avconv support is deprecated)
418 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
419 to the binary or its containing directory.
420 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
421 and a list of additional command-line arguments for the
422 postprocessor/executable. The dict can also have "PP+EXE" keys
423 which are used when the given exe is used by the given PP.
424 Use 'default' as the name for arguments to passed to all PP
425
426 The following options are used by the extractors:
427 extractor_retries: Number of times to retry for known errors
428 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
429 hls_split_discontinuity: Split HLS playlists to different formats at
430 discontinuities such as ad breaks (default: False)
431 extractor_args: A dictionary of arguments to be passed to the extractors.
432 See "EXTRACTOR ARGUMENTS" for details.
433 Eg: {'youtube': {'skip': ['dash', 'hls']}}
434 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
435 If True (default), DASH manifests and related
436 data will be downloaded and processed by extractor.
437 You can reduce network I/O by disabling it if you don't
438 care about DASH. (only for youtube)
439 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
440 If True (default), HLS manifests and related
441 data will be downloaded and processed by extractor.
442 You can reduce network I/O by disabling it if you don't
443 care about HLS. (only for youtube)
444 """
445
446 _NUMERIC_FIELDS = set((
447 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
448 'timestamp', 'upload_year', 'upload_month', 'upload_day',
449 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
450 'average_rating', 'comment_count', 'age_limit',
451 'start_time', 'end_time',
452 'chapter_number', 'season_number', 'episode_number',
453 'track_number', 'disc_number', 'release_year',
454 'playlist_index',
455 ))
456
457 params = None
458 _ies = []
459 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
460 _printed_messages = set()
461 _first_webpage_request = True
462 _download_retcode = None
463 _num_downloads = None
464 _playlist_level = 0
465 _playlist_urls = set()
466 _screen_file = None
467
468 def __init__(self, params=None, auto_init=True):
469 """Create a FileDownloader object with the given options."""
470 if params is None:
471 params = {}
472 self._ies = []
473 self._ies_instances = {}
474 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
475 self._printed_messages = set()
476 self._first_webpage_request = True
477 self._post_hooks = []
478 self._progress_hooks = []
479 self._download_retcode = 0
480 self._num_downloads = 0
481 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
482 self._err_file = sys.stderr
483 self.params = {
484 # Default parameters
485 'nocheckcertificate': False,
486 }
487 self.params.update(params)
488 self.cache = Cache(self)
489
490 if sys.version_info < (3, 6):
491 self.report_warning(
492 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
493
494 def check_deprecated(param, option, suggestion):
495 if self.params.get(param) is not None:
496 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
497 return True
498 return False
499
500 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
501 if self.params.get('geo_verification_proxy') is None:
502 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
503
504 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
505 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
506 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
507
508 for msg in self.params.get('warnings', []):
509 self.report_warning(msg)
510
511 if self.params.get('final_ext'):
512 if self.params.get('merge_output_format'):
513 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
514 self.params['merge_output_format'] = self.params['final_ext']
515
516 if 'overwrites' in self.params and self.params['overwrites'] is None:
517 del self.params['overwrites']
518
519 if params.get('bidi_workaround', False):
520 try:
521 import pty
522 master, slave = pty.openpty()
523 width = compat_get_terminal_size().columns
524 if width is None:
525 width_args = []
526 else:
527 width_args = ['-w', str(width)]
528 sp_kwargs = dict(
529 stdin=subprocess.PIPE,
530 stdout=slave,
531 stderr=self._err_file)
532 try:
533 self._output_process = subprocess.Popen(
534 ['bidiv'] + width_args, **sp_kwargs
535 )
536 except OSError:
537 self._output_process = subprocess.Popen(
538 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
539 self._output_channel = os.fdopen(master, 'rb')
540 except OSError as ose:
541 if ose.errno == errno.ENOENT:
542 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
543 else:
544 raise
545
546 if (sys.platform != 'win32'
547 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
548 and not params.get('restrictfilenames', False)):
549 # Unicode filesystem API will throw errors (#1474, #13027)
550 self.report_warning(
551 'Assuming --restrict-filenames since file system encoding '
552 'cannot encode all characters. '
553 'Set the LC_ALL environment variable to fix this.')
554 self.params['restrictfilenames'] = True
555
556 self.outtmpl_dict = self.parse_outtmpl()
557
558 # Creating format selector here allows us to catch syntax errors before the extraction
559 self.format_selector = (
560 None if self.params.get('format') is None
561 else self.build_format_selector(self.params['format']))
562
563 self._setup_opener()
564
565 """Preload the archive, if any is specified"""
566 def preload_download_archive(fn):
567 if fn is None:
568 return False
569 self.write_debug('Loading archive file %r\n' % fn)
570 try:
571 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
572 for line in archive_file:
573 self.archive.add(line.strip())
574 except IOError as ioe:
575 if ioe.errno != errno.ENOENT:
576 raise
577 return False
578 return True
579
580 self.archive = set()
581 preload_download_archive(self.params.get('download_archive'))
582
583 if auto_init:
584 self.print_debug_header()
585 self.add_default_info_extractors()
586
587 for pp_def_raw in self.params.get('postprocessors', []):
588 pp_def = dict(pp_def_raw)
589 when = pp_def.pop('when', 'post_process')
590 pp_class = get_postprocessor(pp_def.pop('key'))
591 pp = pp_class(self, **compat_kwargs(pp_def))
592 self.add_post_processor(pp, when=when)
593
594 for ph in self.params.get('post_hooks', []):
595 self.add_post_hook(ph)
596
597 for ph in self.params.get('progress_hooks', []):
598 self.add_progress_hook(ph)
599
600 register_socks_protocols()
601
602 def warn_if_short_id(self, argv):
603 # short YouTube ID starting with dash?
604 idxs = [
605 i for i, a in enumerate(argv)
606 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
607 if idxs:
608 correct_argv = (
609 ['yt-dlp']
610 + [a for i, a in enumerate(argv) if i not in idxs]
611 + ['--'] + [argv[i] for i in idxs]
612 )
613 self.report_warning(
614 'Long argument string detected. '
615 'Use -- to separate parameters and URLs, like this:\n%s\n' %
616 args_to_str(correct_argv))
617
618 def add_info_extractor(self, ie):
619 """Add an InfoExtractor object to the end of the list."""
620 self._ies.append(ie)
621 if not isinstance(ie, type):
622 self._ies_instances[ie.ie_key()] = ie
623 ie.set_downloader(self)
624
625 def get_info_extractor(self, ie_key):
626 """
627 Get an instance of an IE with name ie_key, it will try to get one from
628 the _ies list, if there's no instance it will create a new one and add
629 it to the extractor list.
630 """
631 ie = self._ies_instances.get(ie_key)
632 if ie is None:
633 ie = get_info_extractor(ie_key)()
634 self.add_info_extractor(ie)
635 return ie
636
637 def add_default_info_extractors(self):
638 """
639 Add the InfoExtractors returned by gen_extractors to the end of the list
640 """
641 for ie in gen_extractor_classes():
642 self.add_info_extractor(ie)
643
644 def add_post_processor(self, pp, when='post_process'):
645 """Add a PostProcessor object to the end of the chain."""
646 self._pps[when].append(pp)
647 pp.set_downloader(self)
648
649 def add_post_hook(self, ph):
650 """Add the post hook"""
651 self._post_hooks.append(ph)
652
653 def add_progress_hook(self, ph):
654 """Add the progress hook (currently only for the file downloader)"""
655 self._progress_hooks.append(ph)
656
657 def _bidi_workaround(self, message):
658 if not hasattr(self, '_output_channel'):
659 return message
660
661 assert hasattr(self, '_output_process')
662 assert isinstance(message, compat_str)
663 line_count = message.count('\n') + 1
664 self._output_process.stdin.write((message + '\n').encode('utf-8'))
665 self._output_process.stdin.flush()
666 res = ''.join(self._output_channel.readline().decode('utf-8')
667 for _ in range(line_count))
668 return res[:-len('\n')]
669
670 def _write_string(self, message, out=None, only_once=False):
671 if only_once:
672 if message in self._printed_messages:
673 return
674 self._printed_messages.add(message)
675 write_string(message, out=out, encoding=self.params.get('encoding'))
676
677 def to_stdout(self, message, skip_eol=False, quiet=False):
678 """Print message to stdout"""
679 if self.params.get('logger'):
680 self.params['logger'].debug(message)
681 elif not quiet or self.params.get('verbose'):
682 self._write_string(
683 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
684 self._err_file if quiet else self._screen_file)
685
686 def to_stderr(self, message, only_once=False):
687 """Print message to stderr"""
688 assert isinstance(message, compat_str)
689 if self.params.get('logger'):
690 self.params['logger'].error(message)
691 else:
692 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
693
694 def to_console_title(self, message):
695 if not self.params.get('consoletitle', False):
696 return
697 if compat_os_name == 'nt':
698 if ctypes.windll.kernel32.GetConsoleWindow():
699 # c_wchar_p() might not be necessary if `message` is
700 # already of type unicode()
701 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
702 elif 'TERM' in os.environ:
703 self._write_string('\033]0;%s\007' % message, self._screen_file)
704
705 def save_console_title(self):
706 if not self.params.get('consoletitle', False):
707 return
708 if self.params.get('simulate', False):
709 return
710 if compat_os_name != 'nt' and 'TERM' in os.environ:
711 # Save the title on stack
712 self._write_string('\033[22;0t', self._screen_file)
713
714 def restore_console_title(self):
715 if not self.params.get('consoletitle', False):
716 return
717 if self.params.get('simulate', False):
718 return
719 if compat_os_name != 'nt' and 'TERM' in os.environ:
720 # Restore the title from stack
721 self._write_string('\033[23;0t', self._screen_file)
722
723 def __enter__(self):
724 self.save_console_title()
725 return self
726
727 def __exit__(self, *args):
728 self.restore_console_title()
729
730 if self.params.get('cookiefile') is not None:
731 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
732
733 def trouble(self, message=None, tb=None):
734 """Determine action to take when a download problem appears.
735
736 Depending on if the downloader has been configured to ignore
737 download errors or not, this method may throw an exception or
738 not when errors are found, after printing the message.
739
740 tb, if given, is additional traceback information.
741 """
742 if message is not None:
743 self.to_stderr(message)
744 if self.params.get('verbose'):
745 if tb is None:
746 if sys.exc_info()[0]: # if .trouble has been called from an except block
747 tb = ''
748 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
749 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
750 tb += encode_compat_str(traceback.format_exc())
751 else:
752 tb_data = traceback.format_list(traceback.extract_stack())
753 tb = ''.join(tb_data)
754 if tb:
755 self.to_stderr(tb)
756 if not self.params.get('ignoreerrors', False):
757 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
758 exc_info = sys.exc_info()[1].exc_info
759 else:
760 exc_info = sys.exc_info()
761 raise DownloadError(message, exc_info)
762 self._download_retcode = 1
763
764 def to_screen(self, message, skip_eol=False):
765 """Print message to stdout if not in quiet mode"""
766 self.to_stdout(
767 message, skip_eol, quiet=self.params.get('quiet', False))
768
769 def report_warning(self, message, only_once=False):
770 '''
771 Print the message to stderr, it will be prefixed with 'WARNING:'
772 If stderr is a tty file the 'WARNING:' will be colored
773 '''
774 if self.params.get('logger') is not None:
775 self.params['logger'].warning(message)
776 else:
777 if self.params.get('no_warnings'):
778 return
779 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
780 _msg_header = '\033[0;33mWARNING:\033[0m'
781 else:
782 _msg_header = 'WARNING:'
783 warning_message = '%s %s' % (_msg_header, message)
784 self.to_stderr(warning_message, only_once)
785
786 def report_error(self, message, tb=None):
787 '''
788 Do the same as trouble, but prefixes the message with 'ERROR:', colored
789 in red if stderr is a tty file.
790 '''
791 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
792 _msg_header = '\033[0;31mERROR:\033[0m'
793 else:
794 _msg_header = 'ERROR:'
795 error_message = '%s %s' % (_msg_header, message)
796 self.trouble(error_message, tb)
797
798 def write_debug(self, message, only_once=False):
799 '''Log debug message or Print message to stderr'''
800 if not self.params.get('verbose', False):
801 return
802 message = '[debug] %s' % message
803 if self.params.get('logger'):
804 self.params['logger'].debug(message)
805 else:
806 self.to_stderr(message, only_once)
807
808 def report_file_already_downloaded(self, file_name):
809 """Report file has already been fully downloaded."""
810 try:
811 self.to_screen('[download] %s has already been downloaded' % file_name)
812 except UnicodeEncodeError:
813 self.to_screen('[download] The file has already been downloaded')
814
815 def report_file_delete(self, file_name):
816 """Report that existing file will be deleted."""
817 try:
818 self.to_screen('Deleting existing file %s' % file_name)
819 except UnicodeEncodeError:
820 self.to_screen('Deleting existing file')
821
822 def parse_outtmpl(self):
823 outtmpl_dict = self.params.get('outtmpl', {})
824 if not isinstance(outtmpl_dict, dict):
825 outtmpl_dict = {'default': outtmpl_dict}
826 outtmpl_dict.update({
827 k: v for k, v in DEFAULT_OUTTMPL.items()
828 if not outtmpl_dict.get(k)})
829 for key, val in outtmpl_dict.items():
830 if isinstance(val, bytes):
831 self.report_warning(
832 'Parameter outtmpl is bytes, but should be a unicode string. '
833 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
834 return outtmpl_dict
835
836 def get_output_path(self, dir_type='', filename=None):
837 paths = self.params.get('paths', {})
838 assert isinstance(paths, dict)
839 path = os.path.join(
840 expand_path(paths.get('home', '').strip()),
841 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
842 filename or '')
843
844 # Temporary fix for #4787
845 # 'Treat' all problem characters by passing filename through preferredencoding
846 # to workaround encoding issues with subprocess on python2 @ Windows
847 if sys.version_info < (3, 0) and sys.platform == 'win32':
848 path = encodeFilename(path, True).decode(preferredencoding())
849 return sanitize_path(path, force=self.params.get('windowsfilenames'))
850
851 @staticmethod
852 def _outtmpl_expandpath(outtmpl):
853 # expand_path translates '%%' into '%' and '$$' into '$'
854 # correspondingly that is not what we want since we need to keep
855 # '%%' intact for template dict substitution step. Working around
856 # with boundary-alike separator hack.
857 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
858 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
859
860 # outtmpl should be expand_path'ed before template dict substitution
861 # because meta fields may contain env variables we don't want to
862 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
863 # title "Hello $PATH", we don't want `$PATH` to be expanded.
864 return expand_path(outtmpl).replace(sep, '')
865
866 @staticmethod
867 def escape_outtmpl(outtmpl):
868 ''' Escape any remaining strings like %s, %abc% etc. '''
869 return re.sub(
870 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
871 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
872 outtmpl)
873
874 @classmethod
875 def validate_outtmpl(cls, outtmpl):
876 ''' @return None or Exception object '''
877 outtmpl = re.sub(
878 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
879 lambda mobj: f'{mobj.group(0)[:-1]}s',
880 cls._outtmpl_expandpath(outtmpl))
881 try:
882 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
883 return None
884 except ValueError as err:
885 return err
886
887 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
888 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
889 info_dict = dict(info_dict)
890 na = self.params.get('outtmpl_na_placeholder', 'NA')
891
892 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
893 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
894 if info_dict.get('duration', None) is not None
895 else None)
896 info_dict['epoch'] = int(time.time())
897 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
898 if info_dict.get('resolution') is None:
899 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
900
901 # For fields playlist_index and autonumber convert all occurrences
902 # of %(field)s to %(field)0Nd for backward compatibility
903 field_size_compat_map = {
904 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
905 'autonumber': self.params.get('autonumber_size') or 5,
906 }
907
908 TMPL_DICT = {}
909 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
910 MATH_FUNCTIONS = {
911 '+': float.__add__,
912 '-': float.__sub__,
913 }
914 # Field is of the form key1.key2...
915 # where keys (except first) can be string, int or slice
916 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
917 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
918 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
919 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
920 (?P<negate>-)?
921 (?P<fields>{field})
922 (?P<maths>(?:{math_op}{math_field})*)
923 (?:>(?P<strf_format>.+?))?
924 (?:\|(?P<default>.*?))?
925 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
926
927 get_key = lambda k: traverse_obj(
928 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
929
930 def get_value(mdict):
931 # Object traversal
932 value = get_key(mdict['fields'])
933 # Negative
934 if mdict['negate']:
935 value = float_or_none(value)
936 if value is not None:
937 value *= -1
938 # Do maths
939 offset_key = mdict['maths']
940 if offset_key:
941 value = float_or_none(value)
942 operator = None
943 while offset_key:
944 item = re.match(
945 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
946 offset_key).group(0)
947 offset_key = offset_key[len(item):]
948 if operator is None:
949 operator = MATH_FUNCTIONS[item]
950 continue
951 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
952 offset = float_or_none(item)
953 if offset is None:
954 offset = float_or_none(get_key(item))
955 try:
956 value = operator(value, multiplier * offset)
957 except (TypeError, ZeroDivisionError):
958 return None
959 operator = None
960 # Datetime formatting
961 if mdict['strf_format']:
962 value = strftime_or_none(value, mdict['strf_format'])
963
964 return value
965
966 def create_key(outer_mobj):
967 if not outer_mobj.group('has_key'):
968 return f'%{outer_mobj.group(0)}'
969
970 prefix = outer_mobj.group('prefix')
971 key = outer_mobj.group('key')
972 original_fmt = fmt = outer_mobj.group('format')
973 mobj = re.match(INTERNAL_FORMAT_RE, key)
974 if mobj is None:
975 value, default, mobj = None, na, {'fields': ''}
976 else:
977 mobj = mobj.groupdict()
978 default = mobj['default'] if mobj['default'] is not None else na
979 value = get_value(mobj)
980
981 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
982 fmt = '0{:d}d'.format(field_size_compat_map[key])
983
984 value = default if value is None else value
985
986 str_fmt = f'{fmt[:-1]}s'
987 if fmt[-1] == 'l':
988 value, fmt = ', '.join(variadic(value)), str_fmt
989 elif fmt[-1] == 'j':
990 value, fmt = json.dumps(value), str_fmt
991 elif fmt[-1] == 'q':
992 value, fmt = compat_shlex_quote(str(value)), str_fmt
993 elif fmt[-1] == 'c':
994 value = str(value)
995 if value is None:
996 value, fmt = default, 's'
997 else:
998 value = value[0]
999 elif fmt[-1] not in 'rs': # numeric
1000 value = float_or_none(value)
1001 if value is None:
1002 value, fmt = default, 's'
1003
1004 if sanitize:
1005 if fmt[-1] == 'r':
1006 # If value is an object, sanitize might convert it to a string
1007 # So we convert it to repr first
1008 value, fmt = repr(value), str_fmt
1009 if fmt[-1] in 'csr':
1010 value = sanitize(mobj['fields'].split('.')[-1], value)
1011
1012 key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
1013 TMPL_DICT[key] = value
1014 return f'{prefix}%({key}){fmt}'
1015
1016 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1017
1018 def _prepare_filename(self, info_dict, tmpl_type='default'):
1019 try:
1020 sanitize = lambda k, v: sanitize_filename(
1021 compat_str(v),
1022 restricted=self.params.get('restrictfilenames'),
1023 is_id=(k == 'id' or k.endswith('_id')))
1024 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1025 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1026 outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1027 filename = outtmpl % template_dict
1028
1029 force_ext = OUTTMPL_TYPES.get(tmpl_type)
1030 if force_ext is not None:
1031 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1032
1033 # https://github.com/blackjack4494/youtube-dlc/issues/85
1034 trim_file_name = self.params.get('trim_file_name', False)
1035 if trim_file_name:
1036 fn_groups = filename.rsplit('.')
1037 ext = fn_groups[-1]
1038 sub_ext = ''
1039 if len(fn_groups) > 2:
1040 sub_ext = fn_groups[-2]
1041 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1042
1043 return filename
1044 except ValueError as err:
1045 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1046 return None
1047
1048 def prepare_filename(self, info_dict, dir_type='', warn=False):
1049 """Generate the output filename."""
1050
1051 filename = self._prepare_filename(info_dict, dir_type or 'default')
1052
1053 if warn:
1054 if not self.params.get('paths'):
1055 pass
1056 elif filename == '-':
1057 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1058 elif os.path.isabs(filename):
1059 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1060 self.__prepare_filename_warned = True
1061 if filename == '-' or not filename:
1062 return filename
1063
1064 return self.get_output_path(dir_type, filename)
1065
1066 def _match_entry(self, info_dict, incomplete=False, silent=False):
1067 """ Returns None if the file should be downloaded """
1068
1069 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1070
1071 def check_filter():
1072 if 'title' in info_dict:
1073 # This can happen when we're just evaluating the playlist
1074 title = info_dict['title']
1075 matchtitle = self.params.get('matchtitle', False)
1076 if matchtitle:
1077 if not re.search(matchtitle, title, re.IGNORECASE):
1078 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1079 rejecttitle = self.params.get('rejecttitle', False)
1080 if rejecttitle:
1081 if re.search(rejecttitle, title, re.IGNORECASE):
1082 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1083 date = info_dict.get('upload_date')
1084 if date is not None:
1085 dateRange = self.params.get('daterange', DateRange())
1086 if date not in dateRange:
1087 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1088 view_count = info_dict.get('view_count')
1089 if view_count is not None:
1090 min_views = self.params.get('min_views')
1091 if min_views is not None and view_count < min_views:
1092 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1093 max_views = self.params.get('max_views')
1094 if max_views is not None and view_count > max_views:
1095 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1096 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1097 return 'Skipping "%s" because it is age restricted' % video_title
1098
1099 if not incomplete:
1100 match_filter = self.params.get('match_filter')
1101 if match_filter is not None:
1102 ret = match_filter(info_dict)
1103 if ret is not None:
1104 return ret
1105 return None
1106
1107 if self.in_download_archive(info_dict):
1108 reason = '%s has already been recorded in the archive' % video_title
1109 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1110 else:
1111 reason = check_filter()
1112 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1113 if reason is not None:
1114 if not silent:
1115 self.to_screen('[download] ' + reason)
1116 if self.params.get(break_opt, False):
1117 raise break_err()
1118 return reason
1119
1120 @staticmethod
1121 def add_extra_info(info_dict, extra_info):
1122 '''Set the keys from extra_info in info dict if they are missing'''
1123 for key, value in extra_info.items():
1124 info_dict.setdefault(key, value)
1125
1126 def extract_info(self, url, download=True, ie_key=None, extra_info={},
1127 process=True, force_generic_extractor=False):
1128 """
1129 Return a list with a dictionary for each video extracted.
1130
1131 Arguments:
1132 url -- URL to extract
1133
1134 Keyword arguments:
1135 download -- whether to download videos during extraction
1136 ie_key -- extractor key hint
1137 extra_info -- dictionary containing the extra values to add to each result
1138 process -- whether to resolve all unresolved references (URLs, playlist items),
1139 must be True for download to work.
1140 force_generic_extractor -- force using the generic extractor
1141 """
1142
1143 if not ie_key and force_generic_extractor:
1144 ie_key = 'Generic'
1145
1146 if ie_key:
1147 ies = [self.get_info_extractor(ie_key)]
1148 else:
1149 ies = self._ies
1150
1151 for ie in ies:
1152 if not ie.suitable(url):
1153 continue
1154
1155 ie_key = ie.ie_key()
1156 ie = self.get_info_extractor(ie_key)
1157 if not ie.working():
1158 self.report_warning('The program functionality for this site has been marked as broken, '
1159 'and will probably not work.')
1160
1161 try:
1162 temp_id = str_or_none(
1163 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1164 else ie._match_id(url))
1165 except (AssertionError, IndexError, AttributeError):
1166 temp_id = None
1167 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1168 self.to_screen("[%s] %s: has already been recorded in archive" % (
1169 ie_key, temp_id))
1170 break
1171 return self.__extract_info(url, ie, download, extra_info, process)
1172 else:
1173 self.report_error('no suitable InfoExtractor for URL %s' % url)
1174
1175 def __handle_extraction_exceptions(func, handle_all_errors=True):
1176 def wrapper(self, *args, **kwargs):
1177 try:
1178 return func(self, *args, **kwargs)
1179 except GeoRestrictedError as e:
1180 msg = e.msg
1181 if e.countries:
1182 msg += '\nThis video is available in %s.' % ', '.join(
1183 map(ISO3166Utils.short2full, e.countries))
1184 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1185 self.report_error(msg)
1186 except ExtractorError as e: # An error we somewhat expected
1187 self.report_error(compat_str(e), e.format_traceback())
1188 except ThrottledDownload:
1189 self.to_stderr('\r')
1190 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1191 return wrapper(self, *args, **kwargs)
1192 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1193 raise
1194 except Exception as e:
1195 if handle_all_errors and self.params.get('ignoreerrors', False):
1196 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1197 else:
1198 raise
1199 return wrapper
1200
1201 @__handle_extraction_exceptions
1202 def __extract_info(self, url, ie, download, extra_info, process):
1203 ie_result = ie.extract(url)
1204 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1205 return
1206 if isinstance(ie_result, list):
1207 # Backwards compatibility: old IE result format
1208 ie_result = {
1209 '_type': 'compat_list',
1210 'entries': ie_result,
1211 }
1212 if extra_info.get('original_url'):
1213 ie_result.setdefault('original_url', extra_info['original_url'])
1214 self.add_default_extra_info(ie_result, ie, url)
1215 if process:
1216 return self.process_ie_result(ie_result, download, extra_info)
1217 else:
1218 return ie_result
1219
1220 def add_default_extra_info(self, ie_result, ie, url):
1221 if url is not None:
1222 self.add_extra_info(ie_result, {
1223 'webpage_url': url,
1224 'original_url': url,
1225 'webpage_url_basename': url_basename(url),
1226 })
1227 if ie is not None:
1228 self.add_extra_info(ie_result, {
1229 'extractor': ie.IE_NAME,
1230 'extractor_key': ie.ie_key(),
1231 })
1232
1233 def process_ie_result(self, ie_result, download=True, extra_info={}):
1234 """
1235 Take the result of the ie(may be modified) and resolve all unresolved
1236 references (URLs, playlist items).
1237
1238 It will also download the videos if 'download'.
1239 Returns the resolved ie_result.
1240 """
1241 result_type = ie_result.get('_type', 'video')
1242
1243 if result_type in ('url', 'url_transparent'):
1244 ie_result['url'] = sanitize_url(ie_result['url'])
1245 if ie_result.get('original_url'):
1246 extra_info.setdefault('original_url', ie_result['original_url'])
1247
1248 extract_flat = self.params.get('extract_flat', False)
1249 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1250 or extract_flat is True):
1251 info_copy = ie_result.copy()
1252 self.add_extra_info(info_copy, extra_info)
1253 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1254 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1255 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1256 return ie_result
1257
1258 if result_type == 'video':
1259 self.add_extra_info(ie_result, extra_info)
1260 ie_result = self.process_video_result(ie_result, download=download)
1261 additional_urls = (ie_result or {}).get('additional_urls')
1262 if additional_urls:
1263 # TODO: Improve MetadataFromFieldPP to allow setting a list
1264 if isinstance(additional_urls, compat_str):
1265 additional_urls = [additional_urls]
1266 self.to_screen(
1267 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1268 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1269 ie_result['additional_entries'] = [
1270 self.extract_info(
1271 url, download, extra_info,
1272 force_generic_extractor=self.params.get('force_generic_extractor'))
1273 for url in additional_urls
1274 ]
1275 return ie_result
1276 elif result_type == 'url':
1277 # We have to add extra_info to the results because it may be
1278 # contained in a playlist
1279 return self.extract_info(
1280 ie_result['url'], download,
1281 ie_key=ie_result.get('ie_key'),
1282 extra_info=extra_info)
1283 elif result_type == 'url_transparent':
1284 # Use the information from the embedding page
1285 info = self.extract_info(
1286 ie_result['url'], ie_key=ie_result.get('ie_key'),
1287 extra_info=extra_info, download=False, process=False)
1288
1289 # extract_info may return None when ignoreerrors is enabled and
1290 # extraction failed with an error, don't crash and return early
1291 # in this case
1292 if not info:
1293 return info
1294
1295 force_properties = dict(
1296 (k, v) for k, v in ie_result.items() if v is not None)
1297 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1298 if f in force_properties:
1299 del force_properties[f]
1300 new_result = info.copy()
1301 new_result.update(force_properties)
1302
1303 # Extracted info may not be a video result (i.e.
1304 # info.get('_type', 'video') != video) but rather an url or
1305 # url_transparent. In such cases outer metadata (from ie_result)
1306 # should be propagated to inner one (info). For this to happen
1307 # _type of info should be overridden with url_transparent. This
1308 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1309 if new_result.get('_type') == 'url':
1310 new_result['_type'] = 'url_transparent'
1311
1312 return self.process_ie_result(
1313 new_result, download=download, extra_info=extra_info)
1314 elif result_type in ('playlist', 'multi_video'):
1315 # Protect from infinite recursion due to recursively nested playlists
1316 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1317 webpage_url = ie_result['webpage_url']
1318 if webpage_url in self._playlist_urls:
1319 self.to_screen(
1320 '[download] Skipping already downloaded playlist: %s'
1321 % ie_result.get('title') or ie_result.get('id'))
1322 return
1323
1324 self._playlist_level += 1
1325 self._playlist_urls.add(webpage_url)
1326 self._sanitize_thumbnails(ie_result)
1327 try:
1328 return self.__process_playlist(ie_result, download)
1329 finally:
1330 self._playlist_level -= 1
1331 if not self._playlist_level:
1332 self._playlist_urls.clear()
1333 elif result_type == 'compat_list':
1334 self.report_warning(
1335 'Extractor %s returned a compat_list result. '
1336 'It needs to be updated.' % ie_result.get('extractor'))
1337
1338 def _fixup(r):
1339 self.add_extra_info(
1340 r,
1341 {
1342 'extractor': ie_result['extractor'],
1343 'webpage_url': ie_result['webpage_url'],
1344 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1345 'extractor_key': ie_result['extractor_key'],
1346 }
1347 )
1348 return r
1349 ie_result['entries'] = [
1350 self.process_ie_result(_fixup(r), download, extra_info)
1351 for r in ie_result['entries']
1352 ]
1353 return ie_result
1354 else:
1355 raise Exception('Invalid result type: %s' % result_type)
1356
1357 def _ensure_dir_exists(self, path):
1358 return make_dir(path, self.report_error)
1359
1360 def __process_playlist(self, ie_result, download):
1361 # We process each entry in the playlist
1362 playlist = ie_result.get('title') or ie_result.get('id')
1363 self.to_screen('[download] Downloading playlist: %s' % playlist)
1364
1365 if 'entries' not in ie_result:
1366 raise EntryNotInPlaylist()
1367 incomplete_entries = bool(ie_result.get('requested_entries'))
1368 if incomplete_entries:
1369 def fill_missing_entries(entries, indexes):
1370 ret = [None] * max(*indexes)
1371 for i, entry in zip(indexes, entries):
1372 ret[i - 1] = entry
1373 return ret
1374 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1375
1376 playlist_results = []
1377
1378 playliststart = self.params.get('playliststart', 1)
1379 playlistend = self.params.get('playlistend')
1380 # For backwards compatibility, interpret -1 as whole list
1381 if playlistend == -1:
1382 playlistend = None
1383
1384 playlistitems_str = self.params.get('playlist_items')
1385 playlistitems = None
1386 if playlistitems_str is not None:
1387 def iter_playlistitems(format):
1388 for string_segment in format.split(','):
1389 if '-' in string_segment:
1390 start, end = string_segment.split('-')
1391 for item in range(int(start), int(end) + 1):
1392 yield int(item)
1393 else:
1394 yield int(string_segment)
1395 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1396
1397 ie_entries = ie_result['entries']
1398 msg = (
1399 'Downloading %d videos' if not isinstance(ie_entries, list)
1400 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1401 if not isinstance(ie_entries, (list, PagedList)):
1402 ie_entries = LazyList(ie_entries)
1403
1404 def get_entry(i):
1405 return YoutubeDL.__handle_extraction_exceptions(
1406 lambda self, i: ie_entries[i - 1],
1407 False
1408 )(self, i)
1409
1410 entries = []
1411 for i in playlistitems or itertools.count(playliststart):
1412 if playlistitems is None and playlistend is not None and playlistend < i:
1413 break
1414 entry = None
1415 try:
1416 entry = get_entry(i)
1417 if entry is None:
1418 raise EntryNotInPlaylist()
1419 except (IndexError, EntryNotInPlaylist):
1420 if incomplete_entries:
1421 raise EntryNotInPlaylist()
1422 elif not playlistitems:
1423 break
1424 entries.append(entry)
1425 try:
1426 if entry is not None:
1427 self._match_entry(entry, incomplete=True, silent=True)
1428 except (ExistingVideoReached, RejectedVideoReached):
1429 break
1430 ie_result['entries'] = entries
1431
1432 # Save playlist_index before re-ordering
1433 entries = [
1434 ((playlistitems[i - 1] if playlistitems else i), entry)
1435 for i, entry in enumerate(entries, 1)
1436 if entry is not None]
1437 n_entries = len(entries)
1438
1439 if not playlistitems and (playliststart or playlistend):
1440 playlistitems = list(range(playliststart, playliststart + n_entries))
1441 ie_result['requested_entries'] = playlistitems
1442
1443 if self.params.get('allow_playlist_files', True):
1444 ie_copy = {
1445 'playlist': playlist,
1446 'playlist_id': ie_result.get('id'),
1447 'playlist_title': ie_result.get('title'),
1448 'playlist_uploader': ie_result.get('uploader'),
1449 'playlist_uploader_id': ie_result.get('uploader_id'),
1450 'playlist_index': 0,
1451 }
1452 ie_copy.update(dict(ie_result))
1453
1454 if self.params.get('writeinfojson', False):
1455 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1456 if not self._ensure_dir_exists(encodeFilename(infofn)):
1457 return
1458 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1459 self.to_screen('[info] Playlist metadata is already present')
1460 else:
1461 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1462 try:
1463 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1464 except (OSError, IOError):
1465 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1466
1467 # TODO: This should be passed to ThumbnailsConvertor if necessary
1468 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1469
1470 if self.params.get('writedescription', False):
1471 descfn = self.prepare_filename(ie_copy, 'pl_description')
1472 if not self._ensure_dir_exists(encodeFilename(descfn)):
1473 return
1474 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1475 self.to_screen('[info] Playlist description is already present')
1476 elif ie_result.get('description') is None:
1477 self.report_warning('There\'s no playlist description to write.')
1478 else:
1479 try:
1480 self.to_screen('[info] Writing playlist description to: ' + descfn)
1481 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1482 descfile.write(ie_result['description'])
1483 except (OSError, IOError):
1484 self.report_error('Cannot write playlist description file ' + descfn)
1485 return
1486
1487 if self.params.get('playlistreverse', False):
1488 entries = entries[::-1]
1489 if self.params.get('playlistrandom', False):
1490 random.shuffle(entries)
1491
1492 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1493
1494 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1495 failures = 0
1496 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1497 for i, entry_tuple in enumerate(entries, 1):
1498 playlist_index, entry = entry_tuple
1499 if 'playlist_index' in self.params.get('compat_options', []):
1500 playlist_index = playlistitems[i - 1] if playlistitems else i
1501 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1502 # This __x_forwarded_for_ip thing is a bit ugly but requires
1503 # minimal changes
1504 if x_forwarded_for:
1505 entry['__x_forwarded_for_ip'] = x_forwarded_for
1506 extra = {
1507 'n_entries': n_entries,
1508 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1509 'playlist_index': playlist_index,
1510 'playlist_autonumber': i,
1511 'playlist': playlist,
1512 'playlist_id': ie_result.get('id'),
1513 'playlist_title': ie_result.get('title'),
1514 'playlist_uploader': ie_result.get('uploader'),
1515 'playlist_uploader_id': ie_result.get('uploader_id'),
1516 'extractor': ie_result['extractor'],
1517 'webpage_url': ie_result['webpage_url'],
1518 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1519 'extractor_key': ie_result['extractor_key'],
1520 }
1521
1522 if self._match_entry(entry, incomplete=True) is not None:
1523 continue
1524
1525 entry_result = self.__process_iterable_entry(entry, download, extra)
1526 if not entry_result:
1527 failures += 1
1528 if failures >= max_failures:
1529 self.report_error(
1530 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1531 break
1532 # TODO: skip failed (empty) entries?
1533 playlist_results.append(entry_result)
1534 ie_result['entries'] = playlist_results
1535 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1536 return ie_result
1537
1538 @__handle_extraction_exceptions
1539 def __process_iterable_entry(self, entry, download, extra_info):
1540 return self.process_ie_result(
1541 entry, download=download, extra_info=extra_info)
1542
1543 def _build_format_filter(self, filter_spec):
1544 " Returns a function to filter the formats according to the filter_spec "
1545
1546 OPERATORS = {
1547 '<': operator.lt,
1548 '<=': operator.le,
1549 '>': operator.gt,
1550 '>=': operator.ge,
1551 '=': operator.eq,
1552 '!=': operator.ne,
1553 }
1554 operator_rex = re.compile(r'''(?x)\s*
1555 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1556 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1557 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1558 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1559 m = operator_rex.fullmatch(filter_spec)
1560 if m:
1561 try:
1562 comparison_value = int(m.group('value'))
1563 except ValueError:
1564 comparison_value = parse_filesize(m.group('value'))
1565 if comparison_value is None:
1566 comparison_value = parse_filesize(m.group('value') + 'B')
1567 if comparison_value is None:
1568 raise ValueError(
1569 'Invalid value %r in format specification %r' % (
1570 m.group('value'), filter_spec))
1571 op = OPERATORS[m.group('op')]
1572
1573 if not m:
1574 STR_OPERATORS = {
1575 '=': operator.eq,
1576 '^=': lambda attr, value: attr.startswith(value),
1577 '$=': lambda attr, value: attr.endswith(value),
1578 '*=': lambda attr, value: value in attr,
1579 }
1580 str_operator_rex = re.compile(r'''(?x)\s*
1581 (?P<key>[a-zA-Z0-9._-]+)\s*
1582 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1583 (?P<value>[a-zA-Z0-9._-]+)\s*
1584 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1585 m = str_operator_rex.fullmatch(filter_spec)
1586 if m:
1587 comparison_value = m.group('value')
1588 str_op = STR_OPERATORS[m.group('op')]
1589 if m.group('negation'):
1590 op = lambda attr, value: not str_op(attr, value)
1591 else:
1592 op = str_op
1593
1594 if not m:
1595 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1596
1597 def _filter(f):
1598 actual_value = f.get(m.group('key'))
1599 if actual_value is None:
1600 return m.group('none_inclusive')
1601 return op(actual_value, comparison_value)
1602 return _filter
1603
1604 def _default_format_spec(self, info_dict, download=True):
1605
1606 def can_merge():
1607 merger = FFmpegMergerPP(self)
1608 return merger.available and merger.can_merge()
1609
1610 prefer_best = (
1611 not self.params.get('simulate', False)
1612 and download
1613 and (
1614 not can_merge()
1615 or info_dict.get('is_live', False)
1616 or self.outtmpl_dict['default'] == '-'))
1617 compat = (
1618 prefer_best
1619 or self.params.get('allow_multiple_audio_streams', False)
1620 or 'format-spec' in self.params.get('compat_opts', []))
1621
1622 return (
1623 'best/bestvideo+bestaudio' if prefer_best
1624 else 'bestvideo*+bestaudio/best' if not compat
1625 else 'bestvideo+bestaudio/best')
1626
1627 def build_format_selector(self, format_spec):
1628 def syntax_error(note, start):
1629 message = (
1630 'Invalid format specification: '
1631 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1632 return SyntaxError(message)
1633
1634 PICKFIRST = 'PICKFIRST'
1635 MERGE = 'MERGE'
1636 SINGLE = 'SINGLE'
1637 GROUP = 'GROUP'
1638 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1639
1640 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1641 'video': self.params.get('allow_multiple_video_streams', False)}
1642
1643 check_formats = self.params.get('check_formats')
1644
1645 def _parse_filter(tokens):
1646 filter_parts = []
1647 for type, string, start, _, _ in tokens:
1648 if type == tokenize.OP and string == ']':
1649 return ''.join(filter_parts)
1650 else:
1651 filter_parts.append(string)
1652
1653 def _remove_unused_ops(tokens):
1654 # Remove operators that we don't use and join them with the surrounding strings
1655 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1656 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1657 last_string, last_start, last_end, last_line = None, None, None, None
1658 for type, string, start, end, line in tokens:
1659 if type == tokenize.OP and string == '[':
1660 if last_string:
1661 yield tokenize.NAME, last_string, last_start, last_end, last_line
1662 last_string = None
1663 yield type, string, start, end, line
1664 # everything inside brackets will be handled by _parse_filter
1665 for type, string, start, end, line in tokens:
1666 yield type, string, start, end, line
1667 if type == tokenize.OP and string == ']':
1668 break
1669 elif type == tokenize.OP and string in ALLOWED_OPS:
1670 if last_string:
1671 yield tokenize.NAME, last_string, last_start, last_end, last_line
1672 last_string = None
1673 yield type, string, start, end, line
1674 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1675 if not last_string:
1676 last_string = string
1677 last_start = start
1678 last_end = end
1679 else:
1680 last_string += string
1681 if last_string:
1682 yield tokenize.NAME, last_string, last_start, last_end, last_line
1683
1684 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1685 selectors = []
1686 current_selector = None
1687 for type, string, start, _, _ in tokens:
1688 # ENCODING is only defined in python 3.x
1689 if type == getattr(tokenize, 'ENCODING', None):
1690 continue
1691 elif type in [tokenize.NAME, tokenize.NUMBER]:
1692 current_selector = FormatSelector(SINGLE, string, [])
1693 elif type == tokenize.OP:
1694 if string == ')':
1695 if not inside_group:
1696 # ')' will be handled by the parentheses group
1697 tokens.restore_last_token()
1698 break
1699 elif inside_merge and string in ['/', ',']:
1700 tokens.restore_last_token()
1701 break
1702 elif inside_choice and string == ',':
1703 tokens.restore_last_token()
1704 break
1705 elif string == ',':
1706 if not current_selector:
1707 raise syntax_error('"," must follow a format selector', start)
1708 selectors.append(current_selector)
1709 current_selector = None
1710 elif string == '/':
1711 if not current_selector:
1712 raise syntax_error('"/" must follow a format selector', start)
1713 first_choice = current_selector
1714 second_choice = _parse_format_selection(tokens, inside_choice=True)
1715 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1716 elif string == '[':
1717 if not current_selector:
1718 current_selector = FormatSelector(SINGLE, 'best', [])
1719 format_filter = _parse_filter(tokens)
1720 current_selector.filters.append(format_filter)
1721 elif string == '(':
1722 if current_selector:
1723 raise syntax_error('Unexpected "("', start)
1724 group = _parse_format_selection(tokens, inside_group=True)
1725 current_selector = FormatSelector(GROUP, group, [])
1726 elif string == '+':
1727 if not current_selector:
1728 raise syntax_error('Unexpected "+"', start)
1729 selector_1 = current_selector
1730 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1731 if not selector_2:
1732 raise syntax_error('Expected a selector', start)
1733 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1734 else:
1735 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1736 elif type == tokenize.ENDMARKER:
1737 break
1738 if current_selector:
1739 selectors.append(current_selector)
1740 return selectors
1741
1742 def _merge(formats_pair):
1743 format_1, format_2 = formats_pair
1744
1745 formats_info = []
1746 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1747 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1748
1749 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1750 get_no_more = {'video': False, 'audio': False}
1751 for (i, fmt_info) in enumerate(formats_info):
1752 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1753 formats_info.pop(i)
1754 continue
1755 for aud_vid in ['audio', 'video']:
1756 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1757 if get_no_more[aud_vid]:
1758 formats_info.pop(i)
1759 get_no_more[aud_vid] = True
1760
1761 if len(formats_info) == 1:
1762 return formats_info[0]
1763
1764 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1765 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1766
1767 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1768 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1769
1770 output_ext = self.params.get('merge_output_format')
1771 if not output_ext:
1772 if the_only_video:
1773 output_ext = the_only_video['ext']
1774 elif the_only_audio and not video_fmts:
1775 output_ext = the_only_audio['ext']
1776 else:
1777 output_ext = 'mkv'
1778
1779 new_dict = {
1780 'requested_formats': formats_info,
1781 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1782 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1783 'ext': output_ext,
1784 }
1785
1786 if the_only_video:
1787 new_dict.update({
1788 'width': the_only_video.get('width'),
1789 'height': the_only_video.get('height'),
1790 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1791 'fps': the_only_video.get('fps'),
1792 'vcodec': the_only_video.get('vcodec'),
1793 'vbr': the_only_video.get('vbr'),
1794 'stretched_ratio': the_only_video.get('stretched_ratio'),
1795 })
1796
1797 if the_only_audio:
1798 new_dict.update({
1799 'acodec': the_only_audio.get('acodec'),
1800 'abr': the_only_audio.get('abr'),
1801 })
1802
1803 return new_dict
1804
1805 def _check_formats(formats):
1806 if not check_formats:
1807 yield from formats
1808 return
1809 for f in formats:
1810 self.to_screen('[info] Testing format %s' % f['format_id'])
1811 temp_file = tempfile.NamedTemporaryFile(
1812 suffix='.tmp', delete=False,
1813 dir=self.get_output_path('temp') or None)
1814 temp_file.close()
1815 try:
1816 success, _ = self.dl(temp_file.name, f, test=True)
1817 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1818 success = False
1819 finally:
1820 if os.path.exists(temp_file.name):
1821 try:
1822 os.remove(temp_file.name)
1823 except OSError:
1824 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1825 if success:
1826 yield f
1827 else:
1828 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1829
1830 def _build_selector_function(selector):
1831 if isinstance(selector, list): # ,
1832 fs = [_build_selector_function(s) for s in selector]
1833
1834 def selector_function(ctx):
1835 for f in fs:
1836 yield from f(ctx)
1837 return selector_function
1838
1839 elif selector.type == GROUP: # ()
1840 selector_function = _build_selector_function(selector.selector)
1841
1842 elif selector.type == PICKFIRST: # /
1843 fs = [_build_selector_function(s) for s in selector.selector]
1844
1845 def selector_function(ctx):
1846 for f in fs:
1847 picked_formats = list(f(ctx))
1848 if picked_formats:
1849 return picked_formats
1850 return []
1851
1852 elif selector.type == MERGE: # +
1853 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1854
1855 def selector_function(ctx):
1856 for pair in itertools.product(
1857 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1858 yield _merge(pair)
1859
1860 elif selector.type == SINGLE: # atom
1861 format_spec = selector.selector or 'best'
1862
1863 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1864 if format_spec == 'all':
1865 def selector_function(ctx):
1866 yield from _check_formats(ctx['formats'])
1867 elif format_spec == 'mergeall':
1868 def selector_function(ctx):
1869 formats = list(_check_formats(ctx['formats']))
1870 if not formats:
1871 return
1872 merged_format = formats[-1]
1873 for f in formats[-2::-1]:
1874 merged_format = _merge((merged_format, f))
1875 yield merged_format
1876
1877 else:
1878 format_fallback, format_reverse, format_idx = False, True, 1
1879 mobj = re.match(
1880 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1881 format_spec)
1882 if mobj is not None:
1883 format_idx = int_or_none(mobj.group('n'), default=1)
1884 format_reverse = mobj.group('bw')[0] == 'b'
1885 format_type = (mobj.group('type') or [None])[0]
1886 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1887 format_modified = mobj.group('mod') is not None
1888
1889 format_fallback = not format_type and not format_modified # for b, w
1890 _filter_f = (
1891 (lambda f: f.get('%scodec' % format_type) != 'none')
1892 if format_type and format_modified # bv*, ba*, wv*, wa*
1893 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1894 if format_type # bv, ba, wv, wa
1895 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1896 if not format_modified # b, w
1897 else lambda f: True) # b*, w*
1898 filter_f = lambda f: _filter_f(f) and (
1899 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1900 else:
1901 filter_f = ((lambda f: f.get('ext') == format_spec)
1902 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1903 else (lambda f: f.get('format_id') == format_spec)) # id
1904
1905 def selector_function(ctx):
1906 formats = list(ctx['formats'])
1907 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1908 if format_fallback and ctx['incomplete_formats'] and not matches:
1909 # for extractors with incomplete formats (audio only (soundcloud)
1910 # or video only (imgur)) best/worst will fallback to
1911 # best/worst {video,audio}-only format
1912 matches = formats
1913 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1914 try:
1915 yield matches[format_idx - 1]
1916 except IndexError:
1917 return
1918
1919 filters = [self._build_format_filter(f) for f in selector.filters]
1920
1921 def final_selector(ctx):
1922 ctx_copy = copy.deepcopy(ctx)
1923 for _filter in filters:
1924 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1925 return selector_function(ctx_copy)
1926 return final_selector
1927
1928 stream = io.BytesIO(format_spec.encode('utf-8'))
1929 try:
1930 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1931 except tokenize.TokenError:
1932 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1933
1934 class TokenIterator(object):
1935 def __init__(self, tokens):
1936 self.tokens = tokens
1937 self.counter = 0
1938
1939 def __iter__(self):
1940 return self
1941
1942 def __next__(self):
1943 if self.counter >= len(self.tokens):
1944 raise StopIteration()
1945 value = self.tokens[self.counter]
1946 self.counter += 1
1947 return value
1948
1949 next = __next__
1950
1951 def restore_last_token(self):
1952 self.counter -= 1
1953
1954 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1955 return _build_selector_function(parsed_selector)
1956
1957 def _calc_headers(self, info_dict):
1958 res = std_headers.copy()
1959
1960 add_headers = info_dict.get('http_headers')
1961 if add_headers:
1962 res.update(add_headers)
1963
1964 cookies = self._calc_cookies(info_dict)
1965 if cookies:
1966 res['Cookie'] = cookies
1967
1968 if 'X-Forwarded-For' not in res:
1969 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1970 if x_forwarded_for_ip:
1971 res['X-Forwarded-For'] = x_forwarded_for_ip
1972
1973 return res
1974
1975 def _calc_cookies(self, info_dict):
1976 pr = sanitized_Request(info_dict['url'])
1977 self.cookiejar.add_cookie_header(pr)
1978 return pr.get_header('Cookie')
1979
1980 def _sanitize_thumbnails(self, info_dict):
1981 thumbnails = info_dict.get('thumbnails')
1982 if thumbnails is None:
1983 thumbnail = info_dict.get('thumbnail')
1984 if thumbnail:
1985 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1986 if thumbnails:
1987 thumbnails.sort(key=lambda t: (
1988 t.get('preference') if t.get('preference') is not None else -1,
1989 t.get('width') if t.get('width') is not None else -1,
1990 t.get('height') if t.get('height') is not None else -1,
1991 t.get('id') if t.get('id') is not None else '',
1992 t.get('url')))
1993
1994 def thumbnail_tester():
1995 if self.params.get('check_formats'):
1996 test_all = True
1997 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1998 else:
1999 test_all = False
2000 to_screen = self.write_debug
2001
2002 def test_thumbnail(t):
2003 if not test_all and not t.get('_test_url'):
2004 return True
2005 to_screen('Testing thumbnail %s' % t['id'])
2006 try:
2007 self.urlopen(HEADRequest(t['url']))
2008 except network_exceptions as err:
2009 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2010 t['id'], t['url'], error_to_compat_str(err)))
2011 return False
2012 return True
2013
2014 return test_thumbnail
2015
2016 for i, t in enumerate(thumbnails):
2017 if t.get('id') is None:
2018 t['id'] = '%d' % i
2019 if t.get('width') and t.get('height'):
2020 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2021 t['url'] = sanitize_url(t['url'])
2022
2023 if self.params.get('check_formats') is not False:
2024 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2025 else:
2026 info_dict['thumbnails'] = thumbnails
2027
2028 def process_video_result(self, info_dict, download=True):
2029 assert info_dict.get('_type', 'video') == 'video'
2030
2031 if 'id' not in info_dict:
2032 raise ExtractorError('Missing "id" field in extractor result')
2033 if 'title' not in info_dict:
2034 raise ExtractorError('Missing "title" field in extractor result')
2035
2036 def report_force_conversion(field, field_not, conversion):
2037 self.report_warning(
2038 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2039 % (field, field_not, conversion))
2040
2041 def sanitize_string_field(info, string_field):
2042 field = info.get(string_field)
2043 if field is None or isinstance(field, compat_str):
2044 return
2045 report_force_conversion(string_field, 'a string', 'string')
2046 info[string_field] = compat_str(field)
2047
2048 def sanitize_numeric_fields(info):
2049 for numeric_field in self._NUMERIC_FIELDS:
2050 field = info.get(numeric_field)
2051 if field is None or isinstance(field, compat_numeric_types):
2052 continue
2053 report_force_conversion(numeric_field, 'numeric', 'int')
2054 info[numeric_field] = int_or_none(field)
2055
2056 sanitize_string_field(info_dict, 'id')
2057 sanitize_numeric_fields(info_dict)
2058
2059 if 'playlist' not in info_dict:
2060 # It isn't part of a playlist
2061 info_dict['playlist'] = None
2062 info_dict['playlist_index'] = None
2063
2064 self._sanitize_thumbnails(info_dict)
2065
2066 thumbnail = info_dict.get('thumbnail')
2067 thumbnails = info_dict.get('thumbnails')
2068 if thumbnail:
2069 info_dict['thumbnail'] = sanitize_url(thumbnail)
2070 elif thumbnails:
2071 info_dict['thumbnail'] = thumbnails[-1]['url']
2072
2073 if info_dict.get('display_id') is None and 'id' in info_dict:
2074 info_dict['display_id'] = info_dict['id']
2075
2076 for ts_key, date_key in (
2077 ('timestamp', 'upload_date'),
2078 ('release_timestamp', 'release_date'),
2079 ):
2080 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2081 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2082 # see http://bugs.python.org/issue1646728)
2083 try:
2084 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2085 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2086 except (ValueError, OverflowError, OSError):
2087 pass
2088
2089 live_keys = ('is_live', 'was_live')
2090 live_status = info_dict.get('live_status')
2091 if live_status is None:
2092 for key in live_keys:
2093 if info_dict.get(key) is False:
2094 continue
2095 if info_dict.get(key):
2096 live_status = key
2097 break
2098 if all(info_dict.get(key) is False for key in live_keys):
2099 live_status = 'not_live'
2100 if live_status:
2101 info_dict['live_status'] = live_status
2102 for key in live_keys:
2103 if info_dict.get(key) is None:
2104 info_dict[key] = (live_status == key)
2105
2106 # Auto generate title fields corresponding to the *_number fields when missing
2107 # in order to always have clean titles. This is very common for TV series.
2108 for field in ('chapter', 'season', 'episode'):
2109 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2110 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2111
2112 for cc_kind in ('subtitles', 'automatic_captions'):
2113 cc = info_dict.get(cc_kind)
2114 if cc:
2115 for _, subtitle in cc.items():
2116 for subtitle_format in subtitle:
2117 if subtitle_format.get('url'):
2118 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2119 if subtitle_format.get('ext') is None:
2120 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2121
2122 automatic_captions = info_dict.get('automatic_captions')
2123 subtitles = info_dict.get('subtitles')
2124
2125 info_dict['requested_subtitles'] = self.process_subtitles(
2126 info_dict['id'], subtitles, automatic_captions)
2127
2128 # We now pick which formats have to be downloaded
2129 if info_dict.get('formats') is None:
2130 # There's only one format available
2131 formats = [info_dict]
2132 else:
2133 formats = info_dict['formats']
2134
2135 if not formats:
2136 if not self.params.get('ignore_no_formats_error'):
2137 raise ExtractorError('No video formats found!')
2138 else:
2139 self.report_warning('No video formats found!')
2140
2141 def is_wellformed(f):
2142 url = f.get('url')
2143 if not url:
2144 self.report_warning(
2145 '"url" field is missing or empty - skipping format, '
2146 'there is an error in extractor')
2147 return False
2148 if isinstance(url, bytes):
2149 sanitize_string_field(f, 'url')
2150 return True
2151
2152 # Filter out malformed formats for better extraction robustness
2153 formats = list(filter(is_wellformed, formats))
2154
2155 formats_dict = {}
2156
2157 # We check that all the formats have the format and format_id fields
2158 for i, format in enumerate(formats):
2159 sanitize_string_field(format, 'format_id')
2160 sanitize_numeric_fields(format)
2161 format['url'] = sanitize_url(format['url'])
2162 if not format.get('format_id'):
2163 format['format_id'] = compat_str(i)
2164 else:
2165 # Sanitize format_id from characters used in format selector expression
2166 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2167 format_id = format['format_id']
2168 if format_id not in formats_dict:
2169 formats_dict[format_id] = []
2170 formats_dict[format_id].append(format)
2171
2172 # Make sure all formats have unique format_id
2173 for format_id, ambiguous_formats in formats_dict.items():
2174 if len(ambiguous_formats) > 1:
2175 for i, format in enumerate(ambiguous_formats):
2176 format['format_id'] = '%s-%d' % (format_id, i)
2177
2178 for i, format in enumerate(formats):
2179 if format.get('format') is None:
2180 format['format'] = '{id} - {res}{note}'.format(
2181 id=format['format_id'],
2182 res=self.format_resolution(format),
2183 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2184 )
2185 # Automatically determine file extension if missing
2186 if format.get('ext') is None:
2187 format['ext'] = determine_ext(format['url']).lower()
2188 # Automatically determine protocol if missing (useful for format
2189 # selection purposes)
2190 if format.get('protocol') is None:
2191 format['protocol'] = determine_protocol(format)
2192 # Add HTTP headers, so that external programs can use them from the
2193 # json output
2194 full_format_info = info_dict.copy()
2195 full_format_info.update(format)
2196 format['http_headers'] = self._calc_headers(full_format_info)
2197 # Remove private housekeeping stuff
2198 if '__x_forwarded_for_ip' in info_dict:
2199 del info_dict['__x_forwarded_for_ip']
2200
2201 # TODO Central sorting goes here
2202
2203 if formats and formats[0] is not info_dict:
2204 # only set the 'formats' fields if the original info_dict list them
2205 # otherwise we end up with a circular reference, the first (and unique)
2206 # element in the 'formats' field in info_dict is info_dict itself,
2207 # which can't be exported to json
2208 info_dict['formats'] = formats
2209
2210 info_dict, _ = self.pre_process(info_dict)
2211
2212 list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2213 if list_only:
2214 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2215 if self.params.get('list_thumbnails'):
2216 self.list_thumbnails(info_dict)
2217 if self.params.get('listformats'):
2218 if not info_dict.get('formats'):
2219 raise ExtractorError('No video formats found', expected=True)
2220 self.list_formats(info_dict)
2221 if self.params.get('listsubtitles'):
2222 if 'automatic_captions' in info_dict:
2223 self.list_subtitles(
2224 info_dict['id'], automatic_captions, 'automatic captions')
2225 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2226 return
2227
2228 format_selector = self.format_selector
2229 if format_selector is None:
2230 req_format = self._default_format_spec(info_dict, download=download)
2231 self.write_debug('Default format spec: %s' % req_format)
2232 format_selector = self.build_format_selector(req_format)
2233
2234 # While in format selection we may need to have an access to the original
2235 # format set in order to calculate some metrics or do some processing.
2236 # For now we need to be able to guess whether original formats provided
2237 # by extractor are incomplete or not (i.e. whether extractor provides only
2238 # video-only or audio-only formats) for proper formats selection for
2239 # extractors with such incomplete formats (see
2240 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2241 # Since formats may be filtered during format selection and may not match
2242 # the original formats the results may be incorrect. Thus original formats
2243 # or pre-calculated metrics should be passed to format selection routines
2244 # as well.
2245 # We will pass a context object containing all necessary additional data
2246 # instead of just formats.
2247 # This fixes incorrect format selection issue (see
2248 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2249 incomplete_formats = (
2250 # All formats are video-only or
2251 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2252 # all formats are audio-only
2253 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2254
2255 ctx = {
2256 'formats': formats,
2257 'incomplete_formats': incomplete_formats,
2258 }
2259
2260 formats_to_download = list(format_selector(ctx))
2261 if not formats_to_download:
2262 if not self.params.get('ignore_no_formats_error'):
2263 raise ExtractorError('Requested format is not available', expected=True)
2264 else:
2265 self.report_warning('Requested format is not available')
2266 # Process what we can, even without any available formats.
2267 self.process_info(dict(info_dict))
2268 elif download:
2269 self.to_screen(
2270 '[info] %s: Downloading %d format(s): %s' % (
2271 info_dict['id'], len(formats_to_download),
2272 ", ".join([f['format_id'] for f in formats_to_download])))
2273 for fmt in formats_to_download:
2274 new_info = dict(info_dict)
2275 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2276 new_info['__original_infodict'] = info_dict
2277 new_info.update(fmt)
2278 self.process_info(new_info)
2279 # We update the info dict with the best quality format (backwards compatibility)
2280 if formats_to_download:
2281 info_dict.update(formats_to_download[-1])
2282 return info_dict
2283
2284 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2285 """Select the requested subtitles and their format"""
2286 available_subs = {}
2287 if normal_subtitles and self.params.get('writesubtitles'):
2288 available_subs.update(normal_subtitles)
2289 if automatic_captions and self.params.get('writeautomaticsub'):
2290 for lang, cap_info in automatic_captions.items():
2291 if lang not in available_subs:
2292 available_subs[lang] = cap_info
2293
2294 if (not self.params.get('writesubtitles') and not
2295 self.params.get('writeautomaticsub') or not
2296 available_subs):
2297 return None
2298
2299 all_sub_langs = available_subs.keys()
2300 if self.params.get('allsubtitles', False):
2301 requested_langs = all_sub_langs
2302 elif self.params.get('subtitleslangs', False):
2303 requested_langs = set()
2304 for lang in self.params.get('subtitleslangs'):
2305 if lang == 'all':
2306 requested_langs.update(all_sub_langs)
2307 continue
2308 discard = lang[0] == '-'
2309 if discard:
2310 lang = lang[1:]
2311 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2312 if discard:
2313 for lang in current_langs:
2314 requested_langs.discard(lang)
2315 else:
2316 requested_langs.update(current_langs)
2317 elif 'en' in available_subs:
2318 requested_langs = ['en']
2319 else:
2320 requested_langs = [list(all_sub_langs)[0]]
2321 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2322
2323 formats_query = self.params.get('subtitlesformat', 'best')
2324 formats_preference = formats_query.split('/') if formats_query else []
2325 subs = {}
2326 for lang in requested_langs:
2327 formats = available_subs.get(lang)
2328 if formats is None:
2329 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2330 continue
2331 for ext in formats_preference:
2332 if ext == 'best':
2333 f = formats[-1]
2334 break
2335 matches = list(filter(lambda f: f['ext'] == ext, formats))
2336 if matches:
2337 f = matches[-1]
2338 break
2339 else:
2340 f = formats[-1]
2341 self.report_warning(
2342 'No subtitle format found matching "%s" for language %s, '
2343 'using %s' % (formats_query, lang, f['ext']))
2344 subs[lang] = f
2345 return subs
2346
2347 def __forced_printings(self, info_dict, filename, incomplete):
2348 def print_mandatory(field, actual_field=None):
2349 if actual_field is None:
2350 actual_field = field
2351 if (self.params.get('force%s' % field, False)
2352 and (not incomplete or info_dict.get(actual_field) is not None)):
2353 self.to_stdout(info_dict[actual_field])
2354
2355 def print_optional(field):
2356 if (self.params.get('force%s' % field, False)
2357 and info_dict.get(field) is not None):
2358 self.to_stdout(info_dict[field])
2359
2360 info_dict = info_dict.copy()
2361 if filename is not None:
2362 info_dict['filename'] = filename
2363 if info_dict.get('requested_formats') is not None:
2364 # For RTMP URLs, also include the playpath
2365 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2366 elif 'url' in info_dict:
2367 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2368
2369 for tmpl in self.params.get('forceprint', []):
2370 if re.match(r'\w+$', tmpl):
2371 tmpl = '%({})s'.format(tmpl)
2372 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2373 self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2374
2375 print_mandatory('title')
2376 print_mandatory('id')
2377 print_mandatory('url', 'urls')
2378 print_optional('thumbnail')
2379 print_optional('description')
2380 print_optional('filename')
2381 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2382 self.to_stdout(formatSeconds(info_dict['duration']))
2383 print_mandatory('format')
2384
2385 if self.params.get('forcejson', False):
2386 self.post_extract(info_dict)
2387 self.to_stdout(json.dumps(info_dict, default=repr))
2388
2389 def dl(self, name, info, subtitle=False, test=False):
2390
2391 if test:
2392 verbose = self.params.get('verbose')
2393 params = {
2394 'test': True,
2395 'quiet': not verbose,
2396 'verbose': verbose,
2397 'noprogress': not verbose,
2398 'nopart': True,
2399 'skip_unavailable_fragments': False,
2400 'keep_fragments': False,
2401 'overwrites': True,
2402 '_no_ytdl_file': True,
2403 }
2404 else:
2405 params = self.params
2406 fd = get_suitable_downloader(info, params)(self, params)
2407 if not test:
2408 for ph in self._progress_hooks:
2409 fd.add_progress_hook(ph)
2410 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2411 self.write_debug('Invoking downloader on "%s"' % urls)
2412 new_info = dict(info)
2413 if new_info.get('http_headers') is None:
2414 new_info['http_headers'] = self._calc_headers(new_info)
2415 return fd.download(name, new_info, subtitle)
2416
2417 def process_info(self, info_dict):
2418 """Process a single resolved IE result."""
2419
2420 assert info_dict.get('_type', 'video') == 'video'
2421
2422 info_dict.setdefault('__postprocessors', [])
2423
2424 max_downloads = self.params.get('max_downloads')
2425 if max_downloads is not None:
2426 if self._num_downloads >= int(max_downloads):
2427 raise MaxDownloadsReached()
2428
2429 # TODO: backward compatibility, to be removed
2430 info_dict['fulltitle'] = info_dict['title']
2431
2432 if 'format' not in info_dict and 'ext' in info_dict:
2433 info_dict['format'] = info_dict['ext']
2434
2435 if self._match_entry(info_dict) is not None:
2436 return
2437
2438 self.post_extract(info_dict)
2439 self._num_downloads += 1
2440
2441 # info_dict['_filename'] needs to be set for backward compatibility
2442 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2443 temp_filename = self.prepare_filename(info_dict, 'temp')
2444 files_to_move = {}
2445
2446 # Forced printings
2447 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2448
2449 if self.params.get('simulate', False):
2450 if self.params.get('force_write_download_archive', False):
2451 self.record_download_archive(info_dict)
2452
2453 # Do nothing else if in simulate mode
2454 return
2455
2456 if full_filename is None:
2457 return
2458
2459 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2460 return
2461 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2462 return
2463
2464 if self.params.get('writedescription', False):
2465 descfn = self.prepare_filename(info_dict, 'description')
2466 if not self._ensure_dir_exists(encodeFilename(descfn)):
2467 return
2468 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2469 self.to_screen('[info] Video description is already present')
2470 elif info_dict.get('description') is None:
2471 self.report_warning('There\'s no description to write.')
2472 else:
2473 try:
2474 self.to_screen('[info] Writing video description to: ' + descfn)
2475 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2476 descfile.write(info_dict['description'])
2477 except (OSError, IOError):
2478 self.report_error('Cannot write description file ' + descfn)
2479 return
2480
2481 if self.params.get('writeannotations', False):
2482 annofn = self.prepare_filename(info_dict, 'annotation')
2483 if not self._ensure_dir_exists(encodeFilename(annofn)):
2484 return
2485 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2486 self.to_screen('[info] Video annotations are already present')
2487 elif not info_dict.get('annotations'):
2488 self.report_warning('There are no annotations to write.')
2489 else:
2490 try:
2491 self.to_screen('[info] Writing video annotations to: ' + annofn)
2492 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2493 annofile.write(info_dict['annotations'])
2494 except (KeyError, TypeError):
2495 self.report_warning('There are no annotations to write.')
2496 except (OSError, IOError):
2497 self.report_error('Cannot write annotations file: ' + annofn)
2498 return
2499
2500 subtitles_are_requested = any([self.params.get('writesubtitles', False),
2501 self.params.get('writeautomaticsub')])
2502
2503 if subtitles_are_requested and info_dict.get('requested_subtitles'):
2504 # subtitles download errors are already managed as troubles in relevant IE
2505 # that way it will silently go on when used with unsupporting IE
2506 subtitles = info_dict['requested_subtitles']
2507 # ie = self.get_info_extractor(info_dict['extractor_key'])
2508 for sub_lang, sub_info in subtitles.items():
2509 sub_format = sub_info['ext']
2510 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2511 sub_filename_final = subtitles_filename(
2512 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2513 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2514 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2515 sub_info['filepath'] = sub_filename
2516 files_to_move[sub_filename] = sub_filename_final
2517 else:
2518 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2519 if sub_info.get('data') is not None:
2520 try:
2521 # Use newline='' to prevent conversion of newline characters
2522 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2523 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2524 subfile.write(sub_info['data'])
2525 sub_info['filepath'] = sub_filename
2526 files_to_move[sub_filename] = sub_filename_final
2527 except (OSError, IOError):
2528 self.report_error('Cannot write subtitles file ' + sub_filename)
2529 return
2530 else:
2531 try:
2532 self.dl(sub_filename, sub_info.copy(), subtitle=True)
2533 sub_info['filepath'] = sub_filename
2534 files_to_move[sub_filename] = sub_filename_final
2535 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2536 self.report_warning('Unable to download subtitle for "%s": %s' %
2537 (sub_lang, error_to_compat_str(err)))
2538 continue
2539
2540 if self.params.get('writeinfojson', False):
2541 infofn = self.prepare_filename(info_dict, 'infojson')
2542 if not self._ensure_dir_exists(encodeFilename(infofn)):
2543 return
2544 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2545 self.to_screen('[info] Video metadata is already present')
2546 else:
2547 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2548 try:
2549 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2550 except (OSError, IOError):
2551 self.report_error('Cannot write video metadata to JSON file ' + infofn)
2552 return
2553 info_dict['__infojson_filename'] = infofn
2554
2555 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2556 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2557 thumb_filename = replace_extension(
2558 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2559 files_to_move[thumb_filename_temp] = thumb_filename
2560
2561 # Write internet shortcut files
2562 url_link = webloc_link = desktop_link = False
2563 if self.params.get('writelink', False):
2564 if sys.platform == "darwin": # macOS.
2565 webloc_link = True
2566 elif sys.platform.startswith("linux"):
2567 desktop_link = True
2568 else: # if sys.platform in ['win32', 'cygwin']:
2569 url_link = True
2570 if self.params.get('writeurllink', False):
2571 url_link = True
2572 if self.params.get('writewebloclink', False):
2573 webloc_link = True
2574 if self.params.get('writedesktoplink', False):
2575 desktop_link = True
2576
2577 if url_link or webloc_link or desktop_link:
2578 if 'webpage_url' not in info_dict:
2579 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2580 return
2581 ascii_url = iri_to_uri(info_dict['webpage_url'])
2582
2583 def _write_link_file(extension, template, newline, embed_filename):
2584 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2585 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2586 self.to_screen('[info] Internet shortcut is already present')
2587 else:
2588 try:
2589 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2590 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2591 template_vars = {'url': ascii_url}
2592 if embed_filename:
2593 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2594 linkfile.write(template % template_vars)
2595 except (OSError, IOError):
2596 self.report_error('Cannot write internet shortcut ' + linkfn)
2597 return False
2598 return True
2599
2600 if url_link:
2601 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2602 return
2603 if webloc_link:
2604 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2605 return
2606 if desktop_link:
2607 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2608 return
2609
2610 try:
2611 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2612 except PostProcessingError as err:
2613 self.report_error('Preprocessing: %s' % str(err))
2614 return
2615
2616 must_record_download_archive = False
2617 if self.params.get('skip_download', False):
2618 info_dict['filepath'] = temp_filename
2619 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2620 info_dict['__files_to_move'] = files_to_move
2621 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2622 else:
2623 # Download
2624 try:
2625
2626 def existing_file(*filepaths):
2627 ext = info_dict.get('ext')
2628 final_ext = self.params.get('final_ext', ext)
2629 existing_files = []
2630 for file in orderedSet(filepaths):
2631 if final_ext != ext:
2632 converted = replace_extension(file, final_ext, ext)
2633 if os.path.exists(encodeFilename(converted)):
2634 existing_files.append(converted)
2635 if os.path.exists(encodeFilename(file)):
2636 existing_files.append(file)
2637
2638 if not existing_files or self.params.get('overwrites', False):
2639 for file in orderedSet(existing_files):
2640 self.report_file_delete(file)
2641 os.remove(encodeFilename(file))
2642 return None
2643
2644 self.report_file_already_downloaded(existing_files[0])
2645 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2646 return existing_files[0]
2647
2648 success = True
2649 if info_dict.get('requested_formats') is not None:
2650
2651 def compatible_formats(formats):
2652 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2653 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2654 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2655 if len(video_formats) > 2 or len(audio_formats) > 2:
2656 return False
2657
2658 # Check extension
2659 exts = set(format.get('ext') for format in formats)
2660 COMPATIBLE_EXTS = (
2661 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2662 set(('webm',)),
2663 )
2664 for ext_sets in COMPATIBLE_EXTS:
2665 if ext_sets.issuperset(exts):
2666 return True
2667 # TODO: Check acodec/vcodec
2668 return False
2669
2670 requested_formats = info_dict['requested_formats']
2671 old_ext = info_dict['ext']
2672 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2673 info_dict['ext'] = 'mkv'
2674 self.report_warning(
2675 'Requested formats are incompatible for merge and will be merged into mkv.')
2676
2677 def correct_ext(filename):
2678 filename_real_ext = os.path.splitext(filename)[1][1:]
2679 filename_wo_ext = (
2680 os.path.splitext(filename)[0]
2681 if filename_real_ext == old_ext
2682 else filename)
2683 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2684
2685 # Ensure filename always has a correct extension for successful merge
2686 full_filename = correct_ext(full_filename)
2687 temp_filename = correct_ext(temp_filename)
2688 dl_filename = existing_file(full_filename, temp_filename)
2689 info_dict['__real_download'] = False
2690
2691 _protocols = set(determine_protocol(f) for f in requested_formats)
2692 if len(_protocols) == 1:
2693 info_dict['protocol'] = _protocols.pop()
2694 directly_mergable = (
2695 'no-direct-merge' not in self.params.get('compat_opts', [])
2696 and info_dict.get('protocol') is not None # All requested formats have same protocol
2697 and not self.params.get('allow_unplayable_formats')
2698 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2699 if directly_mergable:
2700 info_dict['url'] = requested_formats[0]['url']
2701 # Treat it as a single download
2702 dl_filename = existing_file(full_filename, temp_filename)
2703 if dl_filename is None:
2704 success, real_download = self.dl(temp_filename, info_dict)
2705 info_dict['__real_download'] = real_download
2706 else:
2707 downloaded = []
2708 merger = FFmpegMergerPP(self)
2709 if self.params.get('allow_unplayable_formats'):
2710 self.report_warning(
2711 'You have requested merging of multiple formats '
2712 'while also allowing unplayable formats to be downloaded. '
2713 'The formats won\'t be merged to prevent data corruption.')
2714 elif not merger.available:
2715 self.report_warning(
2716 'You have requested merging of multiple formats but ffmpeg is not installed. '
2717 'The formats won\'t be merged.')
2718
2719 if dl_filename is None:
2720 for f in requested_formats:
2721 new_info = dict(info_dict)
2722 del new_info['requested_formats']
2723 new_info.update(f)
2724 fname = prepend_extension(
2725 self.prepare_filename(new_info, 'temp'),
2726 'f%s' % f['format_id'], new_info['ext'])
2727 if not self._ensure_dir_exists(fname):
2728 return
2729 downloaded.append(fname)
2730 partial_success, real_download = self.dl(fname, new_info)
2731 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2732 success = success and partial_success
2733 if merger.available and not self.params.get('allow_unplayable_formats'):
2734 info_dict['__postprocessors'].append(merger)
2735 info_dict['__files_to_merge'] = downloaded
2736 # Even if there were no downloads, it is being merged only now
2737 info_dict['__real_download'] = True
2738 else:
2739 for file in downloaded:
2740 files_to_move[file] = None
2741 else:
2742 # Just a single file
2743 dl_filename = existing_file(full_filename, temp_filename)
2744 if dl_filename is None:
2745 success, real_download = self.dl(temp_filename, info_dict)
2746 info_dict['__real_download'] = real_download
2747
2748 dl_filename = dl_filename or temp_filename
2749 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2750
2751 except network_exceptions as err:
2752 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2753 return
2754 except (OSError, IOError) as err:
2755 raise UnavailableVideoError(err)
2756 except (ContentTooShortError, ) as err:
2757 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2758 return
2759
2760 if success and full_filename != '-':
2761
2762 def fixup():
2763 do_fixup = True
2764 fixup_policy = self.params.get('fixup')
2765 vid = info_dict['id']
2766
2767 if fixup_policy in ('ignore', 'never'):
2768 return
2769 elif fixup_policy == 'warn':
2770 do_fixup = False
2771 elif fixup_policy != 'force':
2772 assert fixup_policy in ('detect_or_warn', None)
2773 if not info_dict.get('__real_download'):
2774 do_fixup = False
2775
2776 def ffmpeg_fixup(cndn, msg, cls):
2777 if not cndn:
2778 return
2779 if not do_fixup:
2780 self.report_warning(f'{vid}: {msg}')
2781 return
2782 pp = cls(self)
2783 if pp.available:
2784 info_dict['__postprocessors'].append(pp)
2785 else:
2786 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2787
2788 stretched_ratio = info_dict.get('stretched_ratio')
2789 ffmpeg_fixup(
2790 stretched_ratio not in (1, None),
2791 f'Non-uniform pixel ratio {stretched_ratio}',
2792 FFmpegFixupStretchedPP)
2793
2794 ffmpeg_fixup(
2795 (info_dict.get('requested_formats') is None
2796 and info_dict.get('container') == 'm4a_dash'
2797 and info_dict.get('ext') == 'm4a'),
2798 'writing DASH m4a. Only some players support this container',
2799 FFmpegFixupM4aPP)
2800
2801 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2802 if 'protocol' in info_dict else None)
2803 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2804 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2805 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2806
2807 fixup()
2808 try:
2809 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2810 except PostProcessingError as err:
2811 self.report_error('Postprocessing: %s' % str(err))
2812 return
2813 try:
2814 for ph in self._post_hooks:
2815 ph(info_dict['filepath'])
2816 except Exception as err:
2817 self.report_error('post hooks: %s' % str(err))
2818 return
2819 must_record_download_archive = True
2820
2821 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2822 self.record_download_archive(info_dict)
2823 max_downloads = self.params.get('max_downloads')
2824 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2825 raise MaxDownloadsReached()
2826
2827 def download(self, url_list):
2828 """Download a given list of URLs."""
2829 outtmpl = self.outtmpl_dict['default']
2830 if (len(url_list) > 1
2831 and outtmpl != '-'
2832 and '%' not in outtmpl
2833 and self.params.get('max_downloads') != 1):
2834 raise SameFileError(outtmpl)
2835
2836 for url in url_list:
2837 try:
2838 # It also downloads the videos
2839 res = self.extract_info(
2840 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2841 except UnavailableVideoError:
2842 self.report_error('unable to download video')
2843 except MaxDownloadsReached:
2844 self.to_screen('[info] Maximum number of downloaded files reached')
2845 raise
2846 except ExistingVideoReached:
2847 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2848 raise
2849 except RejectedVideoReached:
2850 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2851 raise
2852 else:
2853 if self.params.get('dump_single_json', False):
2854 self.post_extract(res)
2855 self.to_stdout(json.dumps(res, default=repr))
2856
2857 return self._download_retcode
2858
2859 def download_with_info_file(self, info_filename):
2860 with contextlib.closing(fileinput.FileInput(
2861 [info_filename], mode='r',
2862 openhook=fileinput.hook_encoded('utf-8'))) as f:
2863 # FileInput doesn't have a read method, we can't call json.load
2864 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2865 try:
2866 self.process_ie_result(info, download=True)
2867 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2868 webpage_url = info.get('webpage_url')
2869 if webpage_url is not None:
2870 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2871 return self.download([webpage_url])
2872 else:
2873 raise
2874 return self._download_retcode
2875
2876 @staticmethod
2877 def filter_requested_info(info_dict, actually_filter=True):
2878 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2879 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2880 if actually_filter:
2881 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2882 empty_values = (None, {}, [], set(), tuple())
2883 reject = lambda k, v: k not in keep_keys and (
2884 k.startswith('_') or k in remove_keys or v in empty_values)
2885 else:
2886 info_dict['epoch'] = int(time.time())
2887 reject = lambda k, v: k in remove_keys
2888 filter_fn = lambda obj: (
2889 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2890 else obj if not isinstance(obj, dict)
2891 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2892 return filter_fn(info_dict)
2893
2894 def run_pp(self, pp, infodict):
2895 files_to_delete = []
2896 if '__files_to_move' not in infodict:
2897 infodict['__files_to_move'] = {}
2898 files_to_delete, infodict = pp.run(infodict)
2899 if not files_to_delete:
2900 return infodict
2901
2902 if self.params.get('keepvideo', False):
2903 for f in files_to_delete:
2904 infodict['__files_to_move'].setdefault(f, '')
2905 else:
2906 for old_filename in set(files_to_delete):
2907 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2908 try:
2909 os.remove(encodeFilename(old_filename))
2910 except (IOError, OSError):
2911 self.report_warning('Unable to remove downloaded original file')
2912 if old_filename in infodict['__files_to_move']:
2913 del infodict['__files_to_move'][old_filename]
2914 return infodict
2915
2916 @staticmethod
2917 def post_extract(info_dict):
2918 def actual_post_extract(info_dict):
2919 if info_dict.get('_type') in ('playlist', 'multi_video'):
2920 for video_dict in info_dict.get('entries', {}):
2921 actual_post_extract(video_dict or {})
2922 return
2923
2924 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2925 extra = post_extractor().items()
2926 info_dict.update(extra)
2927 info_dict.pop('__post_extractor', None)
2928
2929 original_infodict = info_dict.get('__original_infodict') or {}
2930 original_infodict.update(extra)
2931 original_infodict.pop('__post_extractor', None)
2932
2933 actual_post_extract(info_dict or {})
2934
2935 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2936 info = dict(ie_info)
2937 info['__files_to_move'] = files_to_move or {}
2938 for pp in self._pps[key]:
2939 info = self.run_pp(pp, info)
2940 return info, info.pop('__files_to_move', None)
2941
2942 def post_process(self, filename, ie_info, files_to_move=None):
2943 """Run all the postprocessors on the given file."""
2944 info = dict(ie_info)
2945 info['filepath'] = filename
2946 info['__files_to_move'] = files_to_move or {}
2947
2948 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2949 info = self.run_pp(pp, info)
2950 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2951 del info['__files_to_move']
2952 for pp in self._pps['after_move']:
2953 info = self.run_pp(pp, info)
2954 return info
2955
2956 def _make_archive_id(self, info_dict):
2957 video_id = info_dict.get('id')
2958 if not video_id:
2959 return
2960 # Future-proof against any change in case
2961 # and backwards compatibility with prior versions
2962 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2963 if extractor is None:
2964 url = str_or_none(info_dict.get('url'))
2965 if not url:
2966 return
2967 # Try to find matching extractor for the URL and take its ie_key
2968 for ie in self._ies:
2969 if ie.suitable(url):
2970 extractor = ie.ie_key()
2971 break
2972 else:
2973 return
2974 return '%s %s' % (extractor.lower(), video_id)
2975
2976 def in_download_archive(self, info_dict):
2977 fn = self.params.get('download_archive')
2978 if fn is None:
2979 return False
2980
2981 vid_id = self._make_archive_id(info_dict)
2982 if not vid_id:
2983 return False # Incomplete video information
2984
2985 return vid_id in self.archive
2986
2987 def record_download_archive(self, info_dict):
2988 fn = self.params.get('download_archive')
2989 if fn is None:
2990 return
2991 vid_id = self._make_archive_id(info_dict)
2992 assert vid_id
2993 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2994 archive_file.write(vid_id + '\n')
2995 self.archive.add(vid_id)
2996
2997 @staticmethod
2998 def format_resolution(format, default='unknown'):
2999 if format.get('vcodec') == 'none':
3000 if format.get('acodec') == 'none':
3001 return 'images'
3002 return 'audio only'
3003 if format.get('resolution') is not None:
3004 return format['resolution']
3005 if format.get('width') and format.get('height'):
3006 res = '%dx%d' % (format['width'], format['height'])
3007 elif format.get('height'):
3008 res = '%sp' % format['height']
3009 elif format.get('width'):
3010 res = '%dx?' % format['width']
3011 else:
3012 res = default
3013 return res
3014
3015 def _format_note(self, fdict):
3016 res = ''
3017 if fdict.get('ext') in ['f4f', 'f4m']:
3018 res += '(unsupported) '
3019 if fdict.get('language'):
3020 if res:
3021 res += ' '
3022 res += '[%s] ' % fdict['language']
3023 if fdict.get('format_note') is not None:
3024 res += fdict['format_note'] + ' '
3025 if fdict.get('tbr') is not None:
3026 res += '%4dk ' % fdict['tbr']
3027 if fdict.get('container') is not None:
3028 if res:
3029 res += ', '
3030 res += '%s container' % fdict['container']
3031 if (fdict.get('vcodec') is not None
3032 and fdict.get('vcodec') != 'none'):
3033 if res:
3034 res += ', '
3035 res += fdict['vcodec']
3036 if fdict.get('vbr') is not None:
3037 res += '@'
3038 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3039 res += 'video@'
3040 if fdict.get('vbr') is not None:
3041 res += '%4dk' % fdict['vbr']
3042 if fdict.get('fps') is not None:
3043 if res:
3044 res += ', '
3045 res += '%sfps' % fdict['fps']
3046 if fdict.get('acodec') is not None:
3047 if res:
3048 res += ', '
3049 if fdict['acodec'] == 'none':
3050 res += 'video only'
3051 else:
3052 res += '%-5s' % fdict['acodec']
3053 elif fdict.get('abr') is not None:
3054 if res:
3055 res += ', '
3056 res += 'audio'
3057 if fdict.get('abr') is not None:
3058 res += '@%3dk' % fdict['abr']
3059 if fdict.get('asr') is not None:
3060 res += ' (%5dHz)' % fdict['asr']
3061 if fdict.get('filesize') is not None:
3062 if res:
3063 res += ', '
3064 res += format_bytes(fdict['filesize'])
3065 elif fdict.get('filesize_approx') is not None:
3066 if res:
3067 res += ', '
3068 res += '~' + format_bytes(fdict['filesize_approx'])
3069 return res
3070
3071 def list_formats(self, info_dict):
3072 formats = info_dict.get('formats', [info_dict])
3073 new_format = (
3074 'list-formats' not in self.params.get('compat_opts', [])
3075 and self.params.get('listformats_table', True) is not False)
3076 if new_format:
3077 table = [
3078 [
3079 format_field(f, 'format_id'),
3080 format_field(f, 'ext'),
3081 self.format_resolution(f),
3082 format_field(f, 'fps', '%d'),
3083 '|',
3084 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3085 format_field(f, 'tbr', '%4dk'),
3086 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3087 '|',
3088 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3089 format_field(f, 'vbr', '%4dk'),
3090 format_field(f, 'acodec', default='unknown').replace('none', ''),
3091 format_field(f, 'abr', '%3dk'),
3092 format_field(f, 'asr', '%5dHz'),
3093 ', '.join(filter(None, (
3094 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3095 format_field(f, 'language', '[%s]'),
3096 format_field(f, 'format_note'),
3097 format_field(f, 'container', ignore=(None, f.get('ext'))),
3098 ))),
3099 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3100 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3101 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3102 else:
3103 table = [
3104 [
3105 format_field(f, 'format_id'),
3106 format_field(f, 'ext'),
3107 self.format_resolution(f),
3108 self._format_note(f)]
3109 for f in formats
3110 if f.get('preference') is None or f['preference'] >= -1000]
3111 header_line = ['format code', 'extension', 'resolution', 'note']
3112
3113 self.to_screen(
3114 '[info] Available formats for %s:' % info_dict['id'])
3115 self.to_stdout(render_table(
3116 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3117
3118 def list_thumbnails(self, info_dict):
3119 thumbnails = list(info_dict.get('thumbnails'))
3120 if not thumbnails:
3121 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3122 return
3123
3124 self.to_screen(
3125 '[info] Thumbnails for %s:' % info_dict['id'])
3126 self.to_stdout(render_table(
3127 ['ID', 'width', 'height', 'URL'],
3128 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3129
3130 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3131 if not subtitles:
3132 self.to_screen('%s has no %s' % (video_id, name))
3133 return
3134 self.to_screen(
3135 'Available %s for %s:' % (name, video_id))
3136
3137 def _row(lang, formats):
3138 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3139 if len(set(names)) == 1:
3140 names = [] if names[0] == 'unknown' else names[:1]
3141 return [lang, ', '.join(names), ', '.join(exts)]
3142
3143 self.to_stdout(render_table(
3144 ['Language', 'Name', 'Formats'],
3145 [_row(lang, formats) for lang, formats in subtitles.items()],
3146 hideEmpty=True))
3147
3148 def urlopen(self, req):
3149 """ Start an HTTP download """
3150 if isinstance(req, compat_basestring):
3151 req = sanitized_Request(req)
3152 return self._opener.open(req, timeout=self._socket_timeout)
3153
3154 def print_debug_header(self):
3155 if not self.params.get('verbose'):
3156 return
3157
3158 if type('') is not compat_str:
3159 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3160 self.report_warning(
3161 'Your Python is broken! Update to a newer and supported version')
3162
3163 stdout_encoding = getattr(
3164 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3165 encoding_str = (
3166 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3167 locale.getpreferredencoding(),
3168 sys.getfilesystemencoding(),
3169 stdout_encoding,
3170 self.get_encoding()))
3171 write_string(encoding_str, encoding=None)
3172
3173 source = (
3174 '(exe)' if hasattr(sys, 'frozen')
3175 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3176 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3177 else '')
3178 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3179 if _LAZY_LOADER:
3180 self._write_string('[debug] Lazy loading extractors enabled\n')
3181 if _PLUGIN_CLASSES:
3182 self._write_string(
3183 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3184 if self.params.get('compat_opts'):
3185 self._write_string(
3186 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3187 try:
3188 sp = subprocess.Popen(
3189 ['git', 'rev-parse', '--short', 'HEAD'],
3190 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3191 cwd=os.path.dirname(os.path.abspath(__file__)))
3192 out, err = process_communicate_or_kill(sp)
3193 out = out.decode().strip()
3194 if re.match('[0-9a-f]+', out):
3195 self._write_string('[debug] Git HEAD: %s\n' % out)
3196 except Exception:
3197 try:
3198 sys.exc_clear()
3199 except Exception:
3200 pass
3201
3202 def python_implementation():
3203 impl_name = platform.python_implementation()
3204 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3205 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3206 return impl_name
3207
3208 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3209 platform.python_version(),
3210 python_implementation(),
3211 platform.architecture()[0],
3212 platform_name()))
3213
3214 exe_versions = FFmpegPostProcessor.get_versions(self)
3215 exe_versions['rtmpdump'] = rtmpdump_version()
3216 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3217 exe_str = ', '.join(
3218 '%s %s' % (exe, v)
3219 for exe, v in sorted(exe_versions.items())
3220 if v
3221 )
3222 if not exe_str:
3223 exe_str = 'none'
3224 self._write_string('[debug] exe versions: %s\n' % exe_str)
3225
3226 proxy_map = {}
3227 for handler in self._opener.handlers:
3228 if hasattr(handler, 'proxies'):
3229 proxy_map.update(handler.proxies)
3230 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3231
3232 if self.params.get('call_home', False):
3233 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3234 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3235 return
3236 latest_version = self.urlopen(
3237 'https://yt-dl.org/latest/version').read().decode('utf-8')
3238 if version_tuple(latest_version) > version_tuple(__version__):
3239 self.report_warning(
3240 'You are using an outdated version (newest version: %s)! '
3241 'See https://yt-dl.org/update if you need help updating.' %
3242 latest_version)
3243
3244 def _setup_opener(self):
3245 timeout_val = self.params.get('socket_timeout')
3246 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3247
3248 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3249 opts_cookiefile = self.params.get('cookiefile')
3250 opts_proxy = self.params.get('proxy')
3251
3252 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3253
3254 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3255 if opts_proxy is not None:
3256 if opts_proxy == '':
3257 proxies = {}
3258 else:
3259 proxies = {'http': opts_proxy, 'https': opts_proxy}
3260 else:
3261 proxies = compat_urllib_request.getproxies()
3262 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3263 if 'http' in proxies and 'https' not in proxies:
3264 proxies['https'] = proxies['http']
3265 proxy_handler = PerRequestProxyHandler(proxies)
3266
3267 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3268 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3269 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3270 redirect_handler = YoutubeDLRedirectHandler()
3271 data_handler = compat_urllib_request_DataHandler()
3272
3273 # When passing our own FileHandler instance, build_opener won't add the
3274 # default FileHandler and allows us to disable the file protocol, which
3275 # can be used for malicious purposes (see
3276 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3277 file_handler = compat_urllib_request.FileHandler()
3278
3279 def file_open(*args, **kwargs):
3280 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3281 file_handler.file_open = file_open
3282
3283 opener = compat_urllib_request.build_opener(
3284 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3285
3286 # Delete the default user-agent header, which would otherwise apply in
3287 # cases where our custom HTTP handler doesn't come into play
3288 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3289 opener.addheaders = []
3290 self._opener = opener
3291
3292 def encode(self, s):
3293 if isinstance(s, bytes):
3294 return s # Already encoded
3295
3296 try:
3297 return s.encode(self.get_encoding())
3298 except UnicodeEncodeError as err:
3299 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3300 raise
3301
3302 def get_encoding(self):
3303 encoding = self.params.get('encoding')
3304 if encoding is None:
3305 encoding = preferredencoding()
3306 return encoding
3307
3308 def _write_thumbnails(self, info_dict, filename): # return the extensions
3309 write_all = self.params.get('write_all_thumbnails', False)
3310 thumbnails = []
3311 if write_all or self.params.get('writethumbnail', False):
3312 thumbnails = info_dict.get('thumbnails') or []
3313 multiple = write_all and len(thumbnails) > 1
3314
3315 ret = []
3316 for t in thumbnails[::-1]:
3317 thumb_ext = determine_ext(t['url'], 'jpg')
3318 suffix = '%s.' % t['id'] if multiple else ''
3319 thumb_display_id = '%s ' % t['id'] if multiple else ''
3320 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3321
3322 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3323 ret.append(suffix + thumb_ext)
3324 t['filepath'] = thumb_filename
3325 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3326 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3327 else:
3328 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3329 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3330 try:
3331 uf = self.urlopen(t['url'])
3332 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3333 shutil.copyfileobj(uf, thumbf)
3334 ret.append(suffix + thumb_ext)
3335 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3336 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3337 t['filepath'] = thumb_filename
3338 except network_exceptions as err:
3339 self.report_warning('Unable to download thumbnail "%s": %s' %
3340 (t['url'], error_to_compat_str(err)))
3341 if ret and not write_all:
3342 break
3343 return ret