]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
494c0d33b877650faa109d4f0e7e701f6310c0a1
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30 from zipimport import zipimporter
31
32 from .compat import (
33 compat_basestring,
34 compat_get_terminal_size,
35 compat_kwargs,
36 compat_numeric_types,
37 compat_os_name,
38 compat_str,
39 compat_tokenize_tokenize,
40 compat_urllib_error,
41 compat_urllib_request,
42 compat_urllib_request_DataHandler,
43 )
44 from .cookies import load_cookies
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
57 DownloadError,
58 encode_compat_str,
59 encodeFilename,
60 EntryNotInPlaylist,
61 error_to_compat_str,
62 ExistingVideoReached,
63 expand_path,
64 ExtractorError,
65 float_or_none,
66 format_bytes,
67 format_field,
68 STR_FORMAT_RE,
69 formatSeconds,
70 GeoRestrictedError,
71 HEADRequest,
72 int_or_none,
73 iri_to_uri,
74 ISO3166Utils,
75 LazyList,
76 locked_file,
77 make_dir,
78 make_HTTPS_handler,
79 MaxDownloadsReached,
80 network_exceptions,
81 orderedSet,
82 OUTTMPL_TYPES,
83 PagedList,
84 parse_filesize,
85 PerRequestProxyHandler,
86 platform_name,
87 PostProcessingError,
88 preferredencoding,
89 prepend_extension,
90 process_communicate_or_kill,
91 register_socks_protocols,
92 RejectedVideoReached,
93 render_table,
94 replace_extension,
95 SameFileError,
96 sanitize_filename,
97 sanitize_path,
98 sanitize_url,
99 sanitized_Request,
100 std_headers,
101 str_or_none,
102 strftime_or_none,
103 subtitles_filename,
104 ThrottledDownload,
105 to_high_limit_path,
106 traverse_obj,
107 try_get,
108 UnavailableVideoError,
109 url_basename,
110 version_tuple,
111 write_json_file,
112 write_string,
113 YoutubeDLCookieProcessor,
114 YoutubeDLHandler,
115 YoutubeDLRedirectHandler,
116 )
117 from .cache import Cache
118 from .extractor import (
119 gen_extractor_classes,
120 get_info_extractor,
121 _LAZY_LOADER,
122 _PLUGIN_CLASSES
123 )
124 from .extractor.openload import PhantomJSwrapper
125 from .downloader import (
126 get_suitable_downloader,
127 shorten_protocol_name
128 )
129 from .downloader.rtmp import rtmpdump_version
130 from .postprocessor import (
131 get_postprocessor,
132 FFmpegFixupDurationPP,
133 FFmpegFixupM3u8PP,
134 FFmpegFixupM4aPP,
135 FFmpegFixupStretchedPP,
136 FFmpegFixupTimestampPP,
137 FFmpegMergerPP,
138 FFmpegPostProcessor,
139 MoveFilesAfterDownloadPP,
140 )
141 from .version import __version__
142
143 if compat_os_name == 'nt':
144 import ctypes
145
146
147 class YoutubeDL(object):
148 """YoutubeDL class.
149
150 YoutubeDL objects are the ones responsible of downloading the
151 actual video file and writing it to disk if the user has requested
152 it, among some other tasks. In most cases there should be one per
153 program. As, given a video URL, the downloader doesn't know how to
154 extract all the needed information, task that InfoExtractors do, it
155 has to pass the URL to one of them.
156
157 For this, YoutubeDL objects have a method that allows
158 InfoExtractors to be registered in a given order. When it is passed
159 a URL, the YoutubeDL object handles it to the first InfoExtractor it
160 finds that reports being able to handle it. The InfoExtractor extracts
161 all the information about the video or videos the URL refers to, and
162 YoutubeDL process the extracted information, possibly using a File
163 Downloader to download the video.
164
165 YoutubeDL objects accept a lot of parameters. In order not to saturate
166 the object constructor with arguments, it receives a dictionary of
167 options instead. These options are available through the params
168 attribute for the InfoExtractors to use. The YoutubeDL also
169 registers itself as the downloader in charge for the InfoExtractors
170 that are added to it, so this is a "mutual registration".
171
172 Available options:
173
174 username: Username for authentication purposes.
175 password: Password for authentication purposes.
176 videopassword: Password for accessing a video.
177 ap_mso: Adobe Pass multiple-system operator identifier.
178 ap_username: Multiple-system operator account username.
179 ap_password: Multiple-system operator account password.
180 usenetrc: Use netrc for authentication instead.
181 verbose: Print additional info to stdout.
182 quiet: Do not print messages to stdout.
183 no_warnings: Do not print out anything for warnings.
184 forceprint: A list of templates to force print
185 forceurl: Force printing final URL. (Deprecated)
186 forcetitle: Force printing title. (Deprecated)
187 forceid: Force printing ID. (Deprecated)
188 forcethumbnail: Force printing thumbnail URL. (Deprecated)
189 forcedescription: Force printing description. (Deprecated)
190 forcefilename: Force printing final filename. (Deprecated)
191 forceduration: Force printing duration. (Deprecated)
192 forcejson: Force printing info_dict as JSON.
193 dump_single_json: Force printing the info_dict of the whole playlist
194 (or video) as a single JSON line.
195 force_write_download_archive: Force writing download archive regardless
196 of 'skip_download' or 'simulate'.
197 simulate: Do not download the video files.
198 format: Video format code. see "FORMAT SELECTION" for more details.
199 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
200 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
201 extracting metadata even if the video is not actually
202 available for download (experimental)
203 format_sort: How to sort the video formats. see "Sorting Formats"
204 for more details.
205 format_sort_force: Force the given format_sort. see "Sorting Formats"
206 for more details.
207 allow_multiple_video_streams: Allow multiple video streams to be merged
208 into a single file
209 allow_multiple_audio_streams: Allow multiple audio streams to be merged
210 into a single file
211 check_formats Whether to test if the formats are downloadable.
212 Can be True (check all), False (check none)
213 or None (check only if requested by extractor)
214 paths: Dictionary of output paths. The allowed keys are 'home'
215 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
216 outtmpl: Dictionary of templates for output names. Allowed keys
217 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
218 A string a also accepted for backward compatibility
219 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
220 restrictfilenames: Do not allow "&" and spaces in file names
221 trim_file_name: Limit length of filename (extension excluded)
222 windowsfilenames: Force the filenames to be windows compatible
223 ignoreerrors: Do not stop on download errors
224 (Default True when running yt-dlp,
225 but False when directly accessing YoutubeDL class)
226 skip_playlist_after_errors: Number of allowed failures until the rest of
227 the playlist is skipped
228 force_generic_extractor: Force downloader to use the generic extractor
229 overwrites: Overwrite all video and metadata files if True,
230 overwrite only non-video files if None
231 and don't overwrite any file if False
232 playliststart: Playlist item to start at.
233 playlistend: Playlist item to end at.
234 playlist_items: Specific indices of playlist to download.
235 playlistreverse: Download playlist items in reverse order.
236 playlistrandom: Download playlist items in random order.
237 matchtitle: Download only matching titles.
238 rejecttitle: Reject downloads for matching titles.
239 logger: Log messages to a logging.Logger instance.
240 logtostderr: Log messages to stderr instead of stdout.
241 writedescription: Write the video description to a .description file
242 writeinfojson: Write the video description to a .info.json file
243 clean_infojson: Remove private fields from the infojson
244 writecomments: Extract video comments. This will not be written to disk
245 unless writeinfojson is also given
246 writeannotations: Write the video annotations to a .annotations.xml file
247 writethumbnail: Write the thumbnail image to a file
248 allow_playlist_files: Whether to write playlists' description, infojson etc
249 also to disk when using the 'write*' options
250 write_all_thumbnails: Write all thumbnail formats to files
251 writelink: Write an internet shortcut file, depending on the
252 current platform (.url/.webloc/.desktop)
253 writeurllink: Write a Windows internet shortcut file (.url)
254 writewebloclink: Write a macOS internet shortcut file (.webloc)
255 writedesktoplink: Write a Linux internet shortcut file (.desktop)
256 writesubtitles: Write the video subtitles to a file
257 writeautomaticsub: Write the automatically generated subtitles to a file
258 allsubtitles: Deprecated - Use subtitlelangs = ['all']
259 Downloads all the subtitles of the video
260 (requires writesubtitles or writeautomaticsub)
261 listsubtitles: Lists all available subtitles for the video
262 subtitlesformat: The format code for subtitles
263 subtitleslangs: List of languages of the subtitles to download (can be regex).
264 The list may contain "all" to refer to all the available
265 subtitles. The language can be prefixed with a "-" to
266 exclude it from the requested languages. Eg: ['all', '-live_chat']
267 keepvideo: Keep the video file after post-processing
268 daterange: A DateRange object, download only if the upload_date is in the range.
269 skip_download: Skip the actual download of the video file
270 cachedir: Location of the cache files in the filesystem.
271 False to disable filesystem cache.
272 noplaylist: Download single video instead of a playlist if in doubt.
273 age_limit: An integer representing the user's age in years.
274 Unsuitable videos for the given age are skipped.
275 min_views: An integer representing the minimum view count the video
276 must have in order to not be skipped.
277 Videos without view count information are always
278 downloaded. None for no limit.
279 max_views: An integer representing the maximum view count.
280 Videos that are more popular than that are not
281 downloaded.
282 Videos without view count information are always
283 downloaded. None for no limit.
284 download_archive: File name of a file where all downloads are recorded.
285 Videos already present in the file are not downloaded
286 again.
287 break_on_existing: Stop the download process after attempting to download a
288 file that is in the archive.
289 break_on_reject: Stop the download process when encountering a video that
290 has been filtered out.
291 cookiefile: File name where cookies should be read from and dumped to
292 cookiesfrombrowser: A tuple containing the name of the browser and the profile
293 name/path from where cookies are loaded.
294 Eg: ('chrome', ) or (vivaldi, 'default')
295 nocheckcertificate:Do not verify SSL certificates
296 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
297 At the moment, this is only supported by YouTube.
298 proxy: URL of the proxy server to use
299 geo_verification_proxy: URL of the proxy to use for IP address verification
300 on geo-restricted sites.
301 socket_timeout: Time to wait for unresponsive hosts, in seconds
302 bidi_workaround: Work around buggy terminals without bidirectional text
303 support, using fridibi
304 debug_printtraffic:Print out sent and received HTTP traffic
305 include_ads: Download ads as well
306 default_search: Prepend this string if an input url is not valid.
307 'auto' for elaborate guessing
308 encoding: Use this encoding instead of the system-specified.
309 extract_flat: Do not resolve URLs, return the immediate result.
310 Pass in 'in_playlist' to only show this behavior for
311 playlist items.
312 postprocessors: A list of dictionaries, each with an entry
313 * key: The name of the postprocessor. See
314 yt_dlp/postprocessor/__init__.py for a list.
315 * when: When to run the postprocessor. Can be one of
316 pre_process|before_dl|post_process|after_move.
317 Assumed to be 'post_process' if not given
318 post_hooks: A list of functions that get called as the final step
319 for each video file, after all postprocessors have been
320 called. The filename will be passed as the only argument.
321 progress_hooks: A list of functions that get called on download
322 progress, with a dictionary with the entries
323 * status: One of "downloading", "error", or "finished".
324 Check this first and ignore unknown values.
325
326 If status is one of "downloading", or "finished", the
327 following properties may also be present:
328 * filename: The final filename (always present)
329 * tmpfilename: The filename we're currently writing to
330 * downloaded_bytes: Bytes on disk
331 * total_bytes: Size of the whole file, None if unknown
332 * total_bytes_estimate: Guess of the eventual file size,
333 None if unavailable.
334 * elapsed: The number of seconds since download started.
335 * eta: The estimated time in seconds, None if unknown
336 * speed: The download speed in bytes/second, None if
337 unknown
338 * fragment_index: The counter of the currently
339 downloaded video fragment.
340 * fragment_count: The number of fragments (= individual
341 files that will be merged)
342
343 Progress hooks are guaranteed to be called at least once
344 (with status "finished") if the download is successful.
345 merge_output_format: Extension to use when merging formats.
346 final_ext: Expected final extension; used to detect when the file was
347 already downloaded and converted. "merge_output_format" is
348 replaced by this extension when given
349 fixup: Automatically correct known faults of the file.
350 One of:
351 - "never": do nothing
352 - "warn": only emit a warning
353 - "detect_or_warn": check whether we can do anything
354 about it, warn otherwise (default)
355 source_address: Client-side IP address to bind to.
356 call_home: Boolean, true iff we are allowed to contact the
357 yt-dlp servers for debugging. (BROKEN)
358 sleep_interval_requests: Number of seconds to sleep between requests
359 during extraction
360 sleep_interval: Number of seconds to sleep before each download when
361 used alone or a lower bound of a range for randomized
362 sleep before each download (minimum possible number
363 of seconds to sleep) when used along with
364 max_sleep_interval.
365 max_sleep_interval:Upper bound of a range for randomized sleep before each
366 download (maximum possible number of seconds to sleep).
367 Must only be used along with sleep_interval.
368 Actual sleep time will be a random float from range
369 [sleep_interval; max_sleep_interval].
370 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
371 listformats: Print an overview of available video formats and exit.
372 list_thumbnails: Print a table of all thumbnails and exit.
373 match_filter: A function that gets called with the info_dict of
374 every video.
375 If it returns a message, the video is ignored.
376 If it returns None, the video is downloaded.
377 match_filter_func in utils.py is one example for this.
378 no_color: Do not emit color codes in output.
379 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
380 HTTP header
381 geo_bypass_country:
382 Two-letter ISO 3166-2 country code that will be used for
383 explicit geographic restriction bypassing via faking
384 X-Forwarded-For HTTP header
385 geo_bypass_ip_block:
386 IP range in CIDR notation that will be used similarly to
387 geo_bypass_country
388
389 The following options determine which downloader is picked:
390 external_downloader: A dictionary of protocol keys and the executable of the
391 external downloader to use for it. The allowed protocols
392 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
393 Set the value to 'native' to use the native downloader
394 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
395 or {'m3u8': 'ffmpeg'} instead.
396 Use the native HLS downloader instead of ffmpeg/avconv
397 if True, otherwise use ffmpeg/avconv if False, otherwise
398 use downloader suggested by extractor if None.
399 compat_opts: Compatibility options. See "Differences in default behavior".
400 The following options do not work when used through the API:
401 filename, abort-on-error, multistreams, no-live-chat,
402 no-playlist-metafiles. Refer __init__.py for their implementation
403
404 The following parameters are not used by YoutubeDL itself, they are used by
405 the downloader (see yt_dlp/downloader/common.py):
406 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
407 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
408 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
409
410 The following options are used by the post processors:
411 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
412 otherwise prefer ffmpeg. (avconv support is deprecated)
413 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
414 to the binary or its containing directory.
415 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
416 and a list of additional command-line arguments for the
417 postprocessor/executable. The dict can also have "PP+EXE" keys
418 which are used when the given exe is used by the given PP.
419 Use 'default' as the name for arguments to passed to all PP
420
421 The following options are used by the extractors:
422 extractor_retries: Number of times to retry for known errors
423 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
424 hls_split_discontinuity: Split HLS playlists to different formats at
425 discontinuities such as ad breaks (default: False)
426 extractor_args: A dictionary of arguments to be passed to the extractors.
427 See "EXTRACTOR ARGUMENTS" for details.
428 Eg: {'youtube': {'skip': ['dash', 'hls']}}
429 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
430 If True (default), DASH manifests and related
431 data will be downloaded and processed by extractor.
432 You can reduce network I/O by disabling it if you don't
433 care about DASH. (only for youtube)
434 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
435 If True (default), HLS manifests and related
436 data will be downloaded and processed by extractor.
437 You can reduce network I/O by disabling it if you don't
438 care about HLS. (only for youtube)
439 """
440
441 _NUMERIC_FIELDS = set((
442 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
443 'timestamp', 'upload_year', 'upload_month', 'upload_day',
444 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
445 'average_rating', 'comment_count', 'age_limit',
446 'start_time', 'end_time',
447 'chapter_number', 'season_number', 'episode_number',
448 'track_number', 'disc_number', 'release_year',
449 'playlist_index',
450 ))
451
452 params = None
453 _ies = []
454 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
455 _printed_messages = set()
456 _first_webpage_request = True
457 _download_retcode = None
458 _num_downloads = None
459 _playlist_level = 0
460 _playlist_urls = set()
461 _screen_file = None
462
463 def __init__(self, params=None, auto_init=True):
464 """Create a FileDownloader object with the given options."""
465 if params is None:
466 params = {}
467 self._ies = []
468 self._ies_instances = {}
469 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
470 self._printed_messages = set()
471 self._first_webpage_request = True
472 self._post_hooks = []
473 self._progress_hooks = []
474 self._download_retcode = 0
475 self._num_downloads = 0
476 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
477 self._err_file = sys.stderr
478 self.params = {
479 # Default parameters
480 'nocheckcertificate': False,
481 }
482 self.params.update(params)
483 self.cache = Cache(self)
484
485 if sys.version_info < (3, 6):
486 self.report_warning(
487 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
488
489 def check_deprecated(param, option, suggestion):
490 if self.params.get(param) is not None:
491 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
492 return True
493 return False
494
495 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
496 if self.params.get('geo_verification_proxy') is None:
497 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
498
499 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
500 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
501 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
502
503 for msg in self.params.get('warnings', []):
504 self.report_warning(msg)
505
506 if self.params.get('final_ext'):
507 if self.params.get('merge_output_format'):
508 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
509 self.params['merge_output_format'] = self.params['final_ext']
510
511 if 'overwrites' in self.params and self.params['overwrites'] is None:
512 del self.params['overwrites']
513
514 if params.get('bidi_workaround', False):
515 try:
516 import pty
517 master, slave = pty.openpty()
518 width = compat_get_terminal_size().columns
519 if width is None:
520 width_args = []
521 else:
522 width_args = ['-w', str(width)]
523 sp_kwargs = dict(
524 stdin=subprocess.PIPE,
525 stdout=slave,
526 stderr=self._err_file)
527 try:
528 self._output_process = subprocess.Popen(
529 ['bidiv'] + width_args, **sp_kwargs
530 )
531 except OSError:
532 self._output_process = subprocess.Popen(
533 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
534 self._output_channel = os.fdopen(master, 'rb')
535 except OSError as ose:
536 if ose.errno == errno.ENOENT:
537 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
538 else:
539 raise
540
541 if (sys.platform != 'win32'
542 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
543 and not params.get('restrictfilenames', False)):
544 # Unicode filesystem API will throw errors (#1474, #13027)
545 self.report_warning(
546 'Assuming --restrict-filenames since file system encoding '
547 'cannot encode all characters. '
548 'Set the LC_ALL environment variable to fix this.')
549 self.params['restrictfilenames'] = True
550
551 self.outtmpl_dict = self.parse_outtmpl()
552
553 # Creating format selector here allows us to catch syntax errors before the extraction
554 self.format_selector = (
555 None if self.params.get('format') is None
556 else self.build_format_selector(self.params['format']))
557
558 self._setup_opener()
559
560 """Preload the archive, if any is specified"""
561 def preload_download_archive(fn):
562 if fn is None:
563 return False
564 self.write_debug('Loading archive file %r\n' % fn)
565 try:
566 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
567 for line in archive_file:
568 self.archive.add(line.strip())
569 except IOError as ioe:
570 if ioe.errno != errno.ENOENT:
571 raise
572 return False
573 return True
574
575 self.archive = set()
576 preload_download_archive(self.params.get('download_archive'))
577
578 if auto_init:
579 self.print_debug_header()
580 self.add_default_info_extractors()
581
582 for pp_def_raw in self.params.get('postprocessors', []):
583 pp_def = dict(pp_def_raw)
584 when = pp_def.pop('when', 'post_process')
585 pp_class = get_postprocessor(pp_def.pop('key'))
586 pp = pp_class(self, **compat_kwargs(pp_def))
587 self.add_post_processor(pp, when=when)
588
589 for ph in self.params.get('post_hooks', []):
590 self.add_post_hook(ph)
591
592 for ph in self.params.get('progress_hooks', []):
593 self.add_progress_hook(ph)
594
595 register_socks_protocols()
596
597 def warn_if_short_id(self, argv):
598 # short YouTube ID starting with dash?
599 idxs = [
600 i for i, a in enumerate(argv)
601 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
602 if idxs:
603 correct_argv = (
604 ['yt-dlp']
605 + [a for i, a in enumerate(argv) if i not in idxs]
606 + ['--'] + [argv[i] for i in idxs]
607 )
608 self.report_warning(
609 'Long argument string detected. '
610 'Use -- to separate parameters and URLs, like this:\n%s\n' %
611 args_to_str(correct_argv))
612
613 def add_info_extractor(self, ie):
614 """Add an InfoExtractor object to the end of the list."""
615 self._ies.append(ie)
616 if not isinstance(ie, type):
617 self._ies_instances[ie.ie_key()] = ie
618 ie.set_downloader(self)
619
620 def get_info_extractor(self, ie_key):
621 """
622 Get an instance of an IE with name ie_key, it will try to get one from
623 the _ies list, if there's no instance it will create a new one and add
624 it to the extractor list.
625 """
626 ie = self._ies_instances.get(ie_key)
627 if ie is None:
628 ie = get_info_extractor(ie_key)()
629 self.add_info_extractor(ie)
630 return ie
631
632 def add_default_info_extractors(self):
633 """
634 Add the InfoExtractors returned by gen_extractors to the end of the list
635 """
636 for ie in gen_extractor_classes():
637 self.add_info_extractor(ie)
638
639 def add_post_processor(self, pp, when='post_process'):
640 """Add a PostProcessor object to the end of the chain."""
641 self._pps[when].append(pp)
642 pp.set_downloader(self)
643
644 def add_post_hook(self, ph):
645 """Add the post hook"""
646 self._post_hooks.append(ph)
647
648 def add_progress_hook(self, ph):
649 """Add the progress hook (currently only for the file downloader)"""
650 self._progress_hooks.append(ph)
651
652 def _bidi_workaround(self, message):
653 if not hasattr(self, '_output_channel'):
654 return message
655
656 assert hasattr(self, '_output_process')
657 assert isinstance(message, compat_str)
658 line_count = message.count('\n') + 1
659 self._output_process.stdin.write((message + '\n').encode('utf-8'))
660 self._output_process.stdin.flush()
661 res = ''.join(self._output_channel.readline().decode('utf-8')
662 for _ in range(line_count))
663 return res[:-len('\n')]
664
665 def _write_string(self, message, out=None, only_once=False):
666 if only_once:
667 if message in self._printed_messages:
668 return
669 self._printed_messages.add(message)
670 write_string(message, out=out, encoding=self.params.get('encoding'))
671
672 def to_stdout(self, message, skip_eol=False, quiet=False):
673 """Print message to stdout"""
674 if self.params.get('logger'):
675 self.params['logger'].debug(message)
676 elif not quiet or self.params.get('verbose'):
677 self._write_string(
678 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
679 self._err_file if quiet else self._screen_file)
680
681 def to_stderr(self, message, only_once=False):
682 """Print message to stderr"""
683 assert isinstance(message, compat_str)
684 if self.params.get('logger'):
685 self.params['logger'].error(message)
686 else:
687 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
688
689 def to_console_title(self, message):
690 if not self.params.get('consoletitle', False):
691 return
692 if compat_os_name == 'nt':
693 if ctypes.windll.kernel32.GetConsoleWindow():
694 # c_wchar_p() might not be necessary if `message` is
695 # already of type unicode()
696 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
697 elif 'TERM' in os.environ:
698 self._write_string('\033]0;%s\007' % message, self._screen_file)
699
700 def save_console_title(self):
701 if not self.params.get('consoletitle', False):
702 return
703 if self.params.get('simulate', False):
704 return
705 if compat_os_name != 'nt' and 'TERM' in os.environ:
706 # Save the title on stack
707 self._write_string('\033[22;0t', self._screen_file)
708
709 def restore_console_title(self):
710 if not self.params.get('consoletitle', False):
711 return
712 if self.params.get('simulate', False):
713 return
714 if compat_os_name != 'nt' and 'TERM' in os.environ:
715 # Restore the title from stack
716 self._write_string('\033[23;0t', self._screen_file)
717
718 def __enter__(self):
719 self.save_console_title()
720 return self
721
722 def __exit__(self, *args):
723 self.restore_console_title()
724
725 if self.params.get('cookiefile') is not None:
726 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
727
728 def trouble(self, message=None, tb=None):
729 """Determine action to take when a download problem appears.
730
731 Depending on if the downloader has been configured to ignore
732 download errors or not, this method may throw an exception or
733 not when errors are found, after printing the message.
734
735 tb, if given, is additional traceback information.
736 """
737 if message is not None:
738 self.to_stderr(message)
739 if self.params.get('verbose'):
740 if tb is None:
741 if sys.exc_info()[0]: # if .trouble has been called from an except block
742 tb = ''
743 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
744 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
745 tb += encode_compat_str(traceback.format_exc())
746 else:
747 tb_data = traceback.format_list(traceback.extract_stack())
748 tb = ''.join(tb_data)
749 if tb:
750 self.to_stderr(tb)
751 if not self.params.get('ignoreerrors', False):
752 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
753 exc_info = sys.exc_info()[1].exc_info
754 else:
755 exc_info = sys.exc_info()
756 raise DownloadError(message, exc_info)
757 self._download_retcode = 1
758
759 def to_screen(self, message, skip_eol=False):
760 """Print message to stdout if not in quiet mode"""
761 self.to_stdout(
762 message, skip_eol, quiet=self.params.get('quiet', False))
763
764 def report_warning(self, message, only_once=False):
765 '''
766 Print the message to stderr, it will be prefixed with 'WARNING:'
767 If stderr is a tty file the 'WARNING:' will be colored
768 '''
769 if self.params.get('logger') is not None:
770 self.params['logger'].warning(message)
771 else:
772 if self.params.get('no_warnings'):
773 return
774 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
775 _msg_header = '\033[0;33mWARNING:\033[0m'
776 else:
777 _msg_header = 'WARNING:'
778 warning_message = '%s %s' % (_msg_header, message)
779 self.to_stderr(warning_message, only_once)
780
781 def report_error(self, message, tb=None):
782 '''
783 Do the same as trouble, but prefixes the message with 'ERROR:', colored
784 in red if stderr is a tty file.
785 '''
786 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
787 _msg_header = '\033[0;31mERROR:\033[0m'
788 else:
789 _msg_header = 'ERROR:'
790 error_message = '%s %s' % (_msg_header, message)
791 self.trouble(error_message, tb)
792
793 def write_debug(self, message, only_once=False):
794 '''Log debug message or Print message to stderr'''
795 if not self.params.get('verbose', False):
796 return
797 message = '[debug] %s' % message
798 if self.params.get('logger'):
799 self.params['logger'].debug(message)
800 else:
801 self.to_stderr(message, only_once)
802
803 def report_file_already_downloaded(self, file_name):
804 """Report file has already been fully downloaded."""
805 try:
806 self.to_screen('[download] %s has already been downloaded' % file_name)
807 except UnicodeEncodeError:
808 self.to_screen('[download] The file has already been downloaded')
809
810 def report_file_delete(self, file_name):
811 """Report that existing file will be deleted."""
812 try:
813 self.to_screen('Deleting existing file %s' % file_name)
814 except UnicodeEncodeError:
815 self.to_screen('Deleting existing file')
816
817 def parse_outtmpl(self):
818 outtmpl_dict = self.params.get('outtmpl', {})
819 if not isinstance(outtmpl_dict, dict):
820 outtmpl_dict = {'default': outtmpl_dict}
821 outtmpl_dict.update({
822 k: v for k, v in DEFAULT_OUTTMPL.items()
823 if not outtmpl_dict.get(k)})
824 for key, val in outtmpl_dict.items():
825 if isinstance(val, bytes):
826 self.report_warning(
827 'Parameter outtmpl is bytes, but should be a unicode string. '
828 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
829 return outtmpl_dict
830
831 def get_output_path(self, dir_type='', filename=None):
832 paths = self.params.get('paths', {})
833 assert isinstance(paths, dict)
834 path = os.path.join(
835 expand_path(paths.get('home', '').strip()),
836 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
837 filename or '')
838
839 # Temporary fix for #4787
840 # 'Treat' all problem characters by passing filename through preferredencoding
841 # to workaround encoding issues with subprocess on python2 @ Windows
842 if sys.version_info < (3, 0) and sys.platform == 'win32':
843 path = encodeFilename(path, True).decode(preferredencoding())
844 return sanitize_path(path, force=self.params.get('windowsfilenames'))
845
846 @staticmethod
847 def validate_outtmpl(tmpl):
848 ''' @return None or Exception object '''
849 try:
850 re.sub(
851 STR_FORMAT_RE.format(''),
852 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
853 tmpl
854 ) % collections.defaultdict(int)
855 return None
856 except ValueError as err:
857 return err
858
859 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
860 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
861 info_dict = dict(info_dict)
862 na = self.params.get('outtmpl_na_placeholder', 'NA')
863
864 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
865 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
866 if info_dict.get('duration', None) is not None
867 else None)
868 info_dict['epoch'] = int(time.time())
869 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
870 if info_dict.get('resolution') is None:
871 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
872
873 # For fields playlist_index and autonumber convert all occurrences
874 # of %(field)s to %(field)0Nd for backward compatibility
875 field_size_compat_map = {
876 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
877 'autonumber': self.params.get('autonumber_size') or 5,
878 }
879
880 TMPL_DICT = {}
881 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
882 MATH_FUNCTIONS = {
883 '+': float.__add__,
884 '-': float.__sub__,
885 }
886 # Field is of the form key1.key2...
887 # where keys (except first) can be string, int or slice
888 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
889 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
890 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
891 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
892 (?P<negate>-)?
893 (?P<fields>{field})
894 (?P<maths>(?:{math_op}{math_field})*)
895 (?:>(?P<strf_format>.+?))?
896 (?:\|(?P<default>.*?))?
897 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
898
899 get_key = lambda k: traverse_obj(
900 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
901
902 def get_value(mdict):
903 # Object traversal
904 value = get_key(mdict['fields'])
905 # Negative
906 if mdict['negate']:
907 value = float_or_none(value)
908 if value is not None:
909 value *= -1
910 # Do maths
911 offset_key = mdict['maths']
912 if offset_key:
913 value = float_or_none(value)
914 operator = None
915 while offset_key:
916 item = re.match(
917 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
918 offset_key).group(0)
919 offset_key = offset_key[len(item):]
920 if operator is None:
921 operator = MATH_FUNCTIONS[item]
922 continue
923 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
924 offset = float_or_none(item)
925 if offset is None:
926 offset = float_or_none(get_key(item))
927 try:
928 value = operator(value, multiplier * offset)
929 except (TypeError, ZeroDivisionError):
930 return None
931 operator = None
932 # Datetime formatting
933 if mdict['strf_format']:
934 value = strftime_or_none(value, mdict['strf_format'])
935
936 return value
937
938 def create_key(outer_mobj):
939 if not outer_mobj.group('has_key'):
940 return '%{}'.format(outer_mobj.group(0))
941
942 key = outer_mobj.group('key')
943 fmt = outer_mobj.group('format')
944 mobj = re.match(INTERNAL_FORMAT_RE, key)
945 if mobj is None:
946 value, default, mobj = None, na, {'fields': ''}
947 else:
948 mobj = mobj.groupdict()
949 default = mobj['default'] if mobj['default'] is not None else na
950 value = get_value(mobj)
951
952 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
953 fmt = '0{:d}d'.format(field_size_compat_map[key])
954
955 value = default if value is None else value
956
957 if fmt == 'c':
958 value = compat_str(value)
959 if value is None:
960 value, fmt = default, 's'
961 else:
962 value = value[0]
963 elif fmt[-1] not in 'rs': # numeric
964 value = float_or_none(value)
965 if value is None:
966 value, fmt = default, 's'
967 if sanitize:
968 if fmt[-1] == 'r':
969 # If value is an object, sanitize might convert it to a string
970 # So we convert it to repr first
971 value, fmt = repr(value), '%ss' % fmt[:-1]
972 if fmt[-1] in 'csr':
973 value = sanitize(mobj['fields'].split('.')[-1], value)
974 key += '\0%s' % fmt
975 TMPL_DICT[key] = value
976 return '%({key}){fmt}'.format(key=key, fmt=fmt)
977
978 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
979
980 def _prepare_filename(self, info_dict, tmpl_type='default'):
981 try:
982 sanitize = lambda k, v: sanitize_filename(
983 compat_str(v),
984 restricted=self.params.get('restrictfilenames'),
985 is_id=(k == 'id' or k.endswith('_id')))
986 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
987 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
988
989 # expand_path translates '%%' into '%' and '$$' into '$'
990 # correspondingly that is not what we want since we need to keep
991 # '%%' intact for template dict substitution step. Working around
992 # with boundary-alike separator hack.
993 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
994 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
995
996 # outtmpl should be expand_path'ed before template dict substitution
997 # because meta fields may contain env variables we don't want to
998 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
999 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1000 filename = expand_path(outtmpl).replace(sep, '') % template_dict
1001
1002 force_ext = OUTTMPL_TYPES.get(tmpl_type)
1003 if force_ext is not None:
1004 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1005
1006 # https://github.com/blackjack4494/youtube-dlc/issues/85
1007 trim_file_name = self.params.get('trim_file_name', False)
1008 if trim_file_name:
1009 fn_groups = filename.rsplit('.')
1010 ext = fn_groups[-1]
1011 sub_ext = ''
1012 if len(fn_groups) > 2:
1013 sub_ext = fn_groups[-2]
1014 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1015
1016 return filename
1017 except ValueError as err:
1018 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1019 return None
1020
1021 def prepare_filename(self, info_dict, dir_type='', warn=False):
1022 """Generate the output filename."""
1023
1024 filename = self._prepare_filename(info_dict, dir_type or 'default')
1025
1026 if warn:
1027 if not self.params.get('paths'):
1028 pass
1029 elif filename == '-':
1030 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1031 elif os.path.isabs(filename):
1032 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1033 self.__prepare_filename_warned = True
1034 if filename == '-' or not filename:
1035 return filename
1036
1037 return self.get_output_path(dir_type, filename)
1038
1039 def _match_entry(self, info_dict, incomplete=False, silent=False):
1040 """ Returns None if the file should be downloaded """
1041
1042 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1043
1044 def check_filter():
1045 if 'title' in info_dict:
1046 # This can happen when we're just evaluating the playlist
1047 title = info_dict['title']
1048 matchtitle = self.params.get('matchtitle', False)
1049 if matchtitle:
1050 if not re.search(matchtitle, title, re.IGNORECASE):
1051 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1052 rejecttitle = self.params.get('rejecttitle', False)
1053 if rejecttitle:
1054 if re.search(rejecttitle, title, re.IGNORECASE):
1055 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1056 date = info_dict.get('upload_date')
1057 if date is not None:
1058 dateRange = self.params.get('daterange', DateRange())
1059 if date not in dateRange:
1060 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1061 view_count = info_dict.get('view_count')
1062 if view_count is not None:
1063 min_views = self.params.get('min_views')
1064 if min_views is not None and view_count < min_views:
1065 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1066 max_views = self.params.get('max_views')
1067 if max_views is not None and view_count > max_views:
1068 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1069 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1070 return 'Skipping "%s" because it is age restricted' % video_title
1071
1072 if not incomplete:
1073 match_filter = self.params.get('match_filter')
1074 if match_filter is not None:
1075 ret = match_filter(info_dict)
1076 if ret is not None:
1077 return ret
1078 return None
1079
1080 if self.in_download_archive(info_dict):
1081 reason = '%s has already been recorded in the archive' % video_title
1082 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1083 else:
1084 reason = check_filter()
1085 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1086 if reason is not None:
1087 if not silent:
1088 self.to_screen('[download] ' + reason)
1089 if self.params.get(break_opt, False):
1090 raise break_err()
1091 return reason
1092
1093 @staticmethod
1094 def add_extra_info(info_dict, extra_info):
1095 '''Set the keys from extra_info in info dict if they are missing'''
1096 for key, value in extra_info.items():
1097 info_dict.setdefault(key, value)
1098
1099 def extract_info(self, url, download=True, ie_key=None, extra_info={},
1100 process=True, force_generic_extractor=False):
1101 """
1102 Return a list with a dictionary for each video extracted.
1103
1104 Arguments:
1105 url -- URL to extract
1106
1107 Keyword arguments:
1108 download -- whether to download videos during extraction
1109 ie_key -- extractor key hint
1110 extra_info -- dictionary containing the extra values to add to each result
1111 process -- whether to resolve all unresolved references (URLs, playlist items),
1112 must be True for download to work.
1113 force_generic_extractor -- force using the generic extractor
1114 """
1115
1116 if not ie_key and force_generic_extractor:
1117 ie_key = 'Generic'
1118
1119 if ie_key:
1120 ies = [self.get_info_extractor(ie_key)]
1121 else:
1122 ies = self._ies
1123
1124 for ie in ies:
1125 if not ie.suitable(url):
1126 continue
1127
1128 ie_key = ie.ie_key()
1129 ie = self.get_info_extractor(ie_key)
1130 if not ie.working():
1131 self.report_warning('The program functionality for this site has been marked as broken, '
1132 'and will probably not work.')
1133
1134 try:
1135 temp_id = str_or_none(
1136 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1137 else ie._match_id(url))
1138 except (AssertionError, IndexError, AttributeError):
1139 temp_id = None
1140 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1141 self.to_screen("[%s] %s: has already been recorded in archive" % (
1142 ie_key, temp_id))
1143 break
1144 return self.__extract_info(url, ie, download, extra_info, process)
1145 else:
1146 self.report_error('no suitable InfoExtractor for URL %s' % url)
1147
1148 def __handle_extraction_exceptions(func, handle_all_errors=True):
1149 def wrapper(self, *args, **kwargs):
1150 try:
1151 return func(self, *args, **kwargs)
1152 except GeoRestrictedError as e:
1153 msg = e.msg
1154 if e.countries:
1155 msg += '\nThis video is available in %s.' % ', '.join(
1156 map(ISO3166Utils.short2full, e.countries))
1157 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1158 self.report_error(msg)
1159 except ExtractorError as e: # An error we somewhat expected
1160 self.report_error(compat_str(e), e.format_traceback())
1161 except ThrottledDownload:
1162 self.to_stderr('\r')
1163 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1164 return wrapper(self, *args, **kwargs)
1165 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1166 raise
1167 except Exception as e:
1168 if handle_all_errors and self.params.get('ignoreerrors', False):
1169 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1170 else:
1171 raise
1172 return wrapper
1173
1174 @__handle_extraction_exceptions
1175 def __extract_info(self, url, ie, download, extra_info, process):
1176 ie_result = ie.extract(url)
1177 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1178 return
1179 if isinstance(ie_result, list):
1180 # Backwards compatibility: old IE result format
1181 ie_result = {
1182 '_type': 'compat_list',
1183 'entries': ie_result,
1184 }
1185 if extra_info.get('original_url'):
1186 ie_result.setdefault('original_url', extra_info['original_url'])
1187 self.add_default_extra_info(ie_result, ie, url)
1188 if process:
1189 return self.process_ie_result(ie_result, download, extra_info)
1190 else:
1191 return ie_result
1192
1193 def add_default_extra_info(self, ie_result, ie, url):
1194 if url is not None:
1195 self.add_extra_info(ie_result, {
1196 'webpage_url': url,
1197 'original_url': url,
1198 'webpage_url_basename': url_basename(url),
1199 })
1200 if ie is not None:
1201 self.add_extra_info(ie_result, {
1202 'extractor': ie.IE_NAME,
1203 'extractor_key': ie.ie_key(),
1204 })
1205
1206 def process_ie_result(self, ie_result, download=True, extra_info={}):
1207 """
1208 Take the result of the ie(may be modified) and resolve all unresolved
1209 references (URLs, playlist items).
1210
1211 It will also download the videos if 'download'.
1212 Returns the resolved ie_result.
1213 """
1214 result_type = ie_result.get('_type', 'video')
1215
1216 if result_type in ('url', 'url_transparent'):
1217 ie_result['url'] = sanitize_url(ie_result['url'])
1218 if ie_result.get('original_url'):
1219 extra_info.setdefault('original_url', ie_result['original_url'])
1220
1221 extract_flat = self.params.get('extract_flat', False)
1222 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1223 or extract_flat is True):
1224 info_copy = ie_result.copy()
1225 self.add_extra_info(info_copy, extra_info)
1226 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1227 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1228 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1229 return ie_result
1230
1231 if result_type == 'video':
1232 self.add_extra_info(ie_result, extra_info)
1233 ie_result = self.process_video_result(ie_result, download=download)
1234 additional_urls = (ie_result or {}).get('additional_urls')
1235 if additional_urls:
1236 # TODO: Improve MetadataFromFieldPP to allow setting a list
1237 if isinstance(additional_urls, compat_str):
1238 additional_urls = [additional_urls]
1239 self.to_screen(
1240 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1241 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1242 ie_result['additional_entries'] = [
1243 self.extract_info(
1244 url, download, extra_info,
1245 force_generic_extractor=self.params.get('force_generic_extractor'))
1246 for url in additional_urls
1247 ]
1248 return ie_result
1249 elif result_type == 'url':
1250 # We have to add extra_info to the results because it may be
1251 # contained in a playlist
1252 return self.extract_info(
1253 ie_result['url'], download,
1254 ie_key=ie_result.get('ie_key'),
1255 extra_info=extra_info)
1256 elif result_type == 'url_transparent':
1257 # Use the information from the embedding page
1258 info = self.extract_info(
1259 ie_result['url'], ie_key=ie_result.get('ie_key'),
1260 extra_info=extra_info, download=False, process=False)
1261
1262 # extract_info may return None when ignoreerrors is enabled and
1263 # extraction failed with an error, don't crash and return early
1264 # in this case
1265 if not info:
1266 return info
1267
1268 force_properties = dict(
1269 (k, v) for k, v in ie_result.items() if v is not None)
1270 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1271 if f in force_properties:
1272 del force_properties[f]
1273 new_result = info.copy()
1274 new_result.update(force_properties)
1275
1276 # Extracted info may not be a video result (i.e.
1277 # info.get('_type', 'video') != video) but rather an url or
1278 # url_transparent. In such cases outer metadata (from ie_result)
1279 # should be propagated to inner one (info). For this to happen
1280 # _type of info should be overridden with url_transparent. This
1281 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1282 if new_result.get('_type') == 'url':
1283 new_result['_type'] = 'url_transparent'
1284
1285 return self.process_ie_result(
1286 new_result, download=download, extra_info=extra_info)
1287 elif result_type in ('playlist', 'multi_video'):
1288 # Protect from infinite recursion due to recursively nested playlists
1289 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1290 webpage_url = ie_result['webpage_url']
1291 if webpage_url in self._playlist_urls:
1292 self.to_screen(
1293 '[download] Skipping already downloaded playlist: %s'
1294 % ie_result.get('title') or ie_result.get('id'))
1295 return
1296
1297 self._playlist_level += 1
1298 self._playlist_urls.add(webpage_url)
1299 self._sanitize_thumbnails(ie_result)
1300 try:
1301 return self.__process_playlist(ie_result, download)
1302 finally:
1303 self._playlist_level -= 1
1304 if not self._playlist_level:
1305 self._playlist_urls.clear()
1306 elif result_type == 'compat_list':
1307 self.report_warning(
1308 'Extractor %s returned a compat_list result. '
1309 'It needs to be updated.' % ie_result.get('extractor'))
1310
1311 def _fixup(r):
1312 self.add_extra_info(
1313 r,
1314 {
1315 'extractor': ie_result['extractor'],
1316 'webpage_url': ie_result['webpage_url'],
1317 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1318 'extractor_key': ie_result['extractor_key'],
1319 }
1320 )
1321 return r
1322 ie_result['entries'] = [
1323 self.process_ie_result(_fixup(r), download, extra_info)
1324 for r in ie_result['entries']
1325 ]
1326 return ie_result
1327 else:
1328 raise Exception('Invalid result type: %s' % result_type)
1329
1330 def _ensure_dir_exists(self, path):
1331 return make_dir(path, self.report_error)
1332
1333 def __process_playlist(self, ie_result, download):
1334 # We process each entry in the playlist
1335 playlist = ie_result.get('title') or ie_result.get('id')
1336 self.to_screen('[download] Downloading playlist: %s' % playlist)
1337
1338 if 'entries' not in ie_result:
1339 raise EntryNotInPlaylist()
1340 incomplete_entries = bool(ie_result.get('requested_entries'))
1341 if incomplete_entries:
1342 def fill_missing_entries(entries, indexes):
1343 ret = [None] * max(*indexes)
1344 for i, entry in zip(indexes, entries):
1345 ret[i - 1] = entry
1346 return ret
1347 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1348
1349 playlist_results = []
1350
1351 playliststart = self.params.get('playliststart', 1)
1352 playlistend = self.params.get('playlistend')
1353 # For backwards compatibility, interpret -1 as whole list
1354 if playlistend == -1:
1355 playlistend = None
1356
1357 playlistitems_str = self.params.get('playlist_items')
1358 playlistitems = None
1359 if playlistitems_str is not None:
1360 def iter_playlistitems(format):
1361 for string_segment in format.split(','):
1362 if '-' in string_segment:
1363 start, end = string_segment.split('-')
1364 for item in range(int(start), int(end) + 1):
1365 yield int(item)
1366 else:
1367 yield int(string_segment)
1368 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1369
1370 ie_entries = ie_result['entries']
1371 msg = (
1372 'Downloading %d videos' if not isinstance(ie_entries, list)
1373 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1374 if not isinstance(ie_entries, (list, PagedList)):
1375 ie_entries = LazyList(ie_entries)
1376
1377 def get_entry(i):
1378 return YoutubeDL.__handle_extraction_exceptions(
1379 lambda self, i: ie_entries[i - 1],
1380 False
1381 )(self, i)
1382
1383 entries = []
1384 for i in playlistitems or itertools.count(playliststart):
1385 if playlistitems is None and playlistend is not None and playlistend < i:
1386 break
1387 entry = None
1388 try:
1389 entry = get_entry(i)
1390 if entry is None:
1391 raise EntryNotInPlaylist()
1392 except (IndexError, EntryNotInPlaylist):
1393 if incomplete_entries:
1394 raise EntryNotInPlaylist()
1395 elif not playlistitems:
1396 break
1397 entries.append(entry)
1398 try:
1399 if entry is not None:
1400 self._match_entry(entry, incomplete=True, silent=True)
1401 except (ExistingVideoReached, RejectedVideoReached):
1402 break
1403 ie_result['entries'] = entries
1404
1405 # Save playlist_index before re-ordering
1406 entries = [
1407 ((playlistitems[i - 1] if playlistitems else i), entry)
1408 for i, entry in enumerate(entries, 1)
1409 if entry is not None]
1410 n_entries = len(entries)
1411
1412 if not playlistitems and (playliststart or playlistend):
1413 playlistitems = list(range(playliststart, playliststart + n_entries))
1414 ie_result['requested_entries'] = playlistitems
1415
1416 if self.params.get('allow_playlist_files', True):
1417 ie_copy = {
1418 'playlist': playlist,
1419 'playlist_id': ie_result.get('id'),
1420 'playlist_title': ie_result.get('title'),
1421 'playlist_uploader': ie_result.get('uploader'),
1422 'playlist_uploader_id': ie_result.get('uploader_id'),
1423 'playlist_index': 0,
1424 }
1425 ie_copy.update(dict(ie_result))
1426
1427 if self.params.get('writeinfojson', False):
1428 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1429 if not self._ensure_dir_exists(encodeFilename(infofn)):
1430 return
1431 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1432 self.to_screen('[info] Playlist metadata is already present')
1433 else:
1434 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1435 try:
1436 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1437 except (OSError, IOError):
1438 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1439
1440 # TODO: This should be passed to ThumbnailsConvertor if necessary
1441 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1442
1443 if self.params.get('writedescription', False):
1444 descfn = self.prepare_filename(ie_copy, 'pl_description')
1445 if not self._ensure_dir_exists(encodeFilename(descfn)):
1446 return
1447 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1448 self.to_screen('[info] Playlist description is already present')
1449 elif ie_result.get('description') is None:
1450 self.report_warning('There\'s no playlist description to write.')
1451 else:
1452 try:
1453 self.to_screen('[info] Writing playlist description to: ' + descfn)
1454 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1455 descfile.write(ie_result['description'])
1456 except (OSError, IOError):
1457 self.report_error('Cannot write playlist description file ' + descfn)
1458 return
1459
1460 if self.params.get('playlistreverse', False):
1461 entries = entries[::-1]
1462 if self.params.get('playlistrandom', False):
1463 random.shuffle(entries)
1464
1465 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1466
1467 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1468 failures = 0
1469 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1470 for i, entry_tuple in enumerate(entries, 1):
1471 playlist_index, entry = entry_tuple
1472 if 'playlist_index' in self.params.get('compat_options', []):
1473 playlist_index = playlistitems[i - 1] if playlistitems else i
1474 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1475 # This __x_forwarded_for_ip thing is a bit ugly but requires
1476 # minimal changes
1477 if x_forwarded_for:
1478 entry['__x_forwarded_for_ip'] = x_forwarded_for
1479 extra = {
1480 'n_entries': n_entries,
1481 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1482 'playlist_index': playlist_index,
1483 'playlist_autonumber': i,
1484 'playlist': playlist,
1485 'playlist_id': ie_result.get('id'),
1486 'playlist_title': ie_result.get('title'),
1487 'playlist_uploader': ie_result.get('uploader'),
1488 'playlist_uploader_id': ie_result.get('uploader_id'),
1489 'extractor': ie_result['extractor'],
1490 'webpage_url': ie_result['webpage_url'],
1491 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1492 'extractor_key': ie_result['extractor_key'],
1493 }
1494
1495 if self._match_entry(entry, incomplete=True) is not None:
1496 continue
1497
1498 entry_result = self.__process_iterable_entry(entry, download, extra)
1499 if not entry_result:
1500 failures += 1
1501 if failures >= max_failures:
1502 self.report_error(
1503 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1504 break
1505 # TODO: skip failed (empty) entries?
1506 playlist_results.append(entry_result)
1507 ie_result['entries'] = playlist_results
1508 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1509 return ie_result
1510
1511 @__handle_extraction_exceptions
1512 def __process_iterable_entry(self, entry, download, extra_info):
1513 return self.process_ie_result(
1514 entry, download=download, extra_info=extra_info)
1515
1516 def _build_format_filter(self, filter_spec):
1517 " Returns a function to filter the formats according to the filter_spec "
1518
1519 OPERATORS = {
1520 '<': operator.lt,
1521 '<=': operator.le,
1522 '>': operator.gt,
1523 '>=': operator.ge,
1524 '=': operator.eq,
1525 '!=': operator.ne,
1526 }
1527 operator_rex = re.compile(r'''(?x)\s*
1528 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1529 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1530 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1531 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1532 m = operator_rex.fullmatch(filter_spec)
1533 if m:
1534 try:
1535 comparison_value = int(m.group('value'))
1536 except ValueError:
1537 comparison_value = parse_filesize(m.group('value'))
1538 if comparison_value is None:
1539 comparison_value = parse_filesize(m.group('value') + 'B')
1540 if comparison_value is None:
1541 raise ValueError(
1542 'Invalid value %r in format specification %r' % (
1543 m.group('value'), filter_spec))
1544 op = OPERATORS[m.group('op')]
1545
1546 if not m:
1547 STR_OPERATORS = {
1548 '=': operator.eq,
1549 '^=': lambda attr, value: attr.startswith(value),
1550 '$=': lambda attr, value: attr.endswith(value),
1551 '*=': lambda attr, value: value in attr,
1552 }
1553 str_operator_rex = re.compile(r'''(?x)\s*
1554 (?P<key>[a-zA-Z0-9._-]+)\s*
1555 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1556 (?P<value>[a-zA-Z0-9._-]+)\s*
1557 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1558 m = str_operator_rex.fullmatch(filter_spec)
1559 if m:
1560 comparison_value = m.group('value')
1561 str_op = STR_OPERATORS[m.group('op')]
1562 if m.group('negation'):
1563 op = lambda attr, value: not str_op(attr, value)
1564 else:
1565 op = str_op
1566
1567 if not m:
1568 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1569
1570 def _filter(f):
1571 actual_value = f.get(m.group('key'))
1572 if actual_value is None:
1573 return m.group('none_inclusive')
1574 return op(actual_value, comparison_value)
1575 return _filter
1576
1577 def _default_format_spec(self, info_dict, download=True):
1578
1579 def can_merge():
1580 merger = FFmpegMergerPP(self)
1581 return merger.available and merger.can_merge()
1582
1583 prefer_best = (
1584 not self.params.get('simulate', False)
1585 and download
1586 and (
1587 not can_merge()
1588 or info_dict.get('is_live', False)
1589 or self.outtmpl_dict['default'] == '-'))
1590 compat = (
1591 prefer_best
1592 or self.params.get('allow_multiple_audio_streams', False)
1593 or 'format-spec' in self.params.get('compat_opts', []))
1594
1595 return (
1596 'best/bestvideo+bestaudio' if prefer_best
1597 else 'bestvideo*+bestaudio/best' if not compat
1598 else 'bestvideo+bestaudio/best')
1599
1600 def build_format_selector(self, format_spec):
1601 def syntax_error(note, start):
1602 message = (
1603 'Invalid format specification: '
1604 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1605 return SyntaxError(message)
1606
1607 PICKFIRST = 'PICKFIRST'
1608 MERGE = 'MERGE'
1609 SINGLE = 'SINGLE'
1610 GROUP = 'GROUP'
1611 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1612
1613 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1614 'video': self.params.get('allow_multiple_video_streams', False)}
1615
1616 check_formats = self.params.get('check_formats')
1617
1618 def _parse_filter(tokens):
1619 filter_parts = []
1620 for type, string, start, _, _ in tokens:
1621 if type == tokenize.OP and string == ']':
1622 return ''.join(filter_parts)
1623 else:
1624 filter_parts.append(string)
1625
1626 def _remove_unused_ops(tokens):
1627 # Remove operators that we don't use and join them with the surrounding strings
1628 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1629 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1630 last_string, last_start, last_end, last_line = None, None, None, None
1631 for type, string, start, end, line in tokens:
1632 if type == tokenize.OP and string == '[':
1633 if last_string:
1634 yield tokenize.NAME, last_string, last_start, last_end, last_line
1635 last_string = None
1636 yield type, string, start, end, line
1637 # everything inside brackets will be handled by _parse_filter
1638 for type, string, start, end, line in tokens:
1639 yield type, string, start, end, line
1640 if type == tokenize.OP and string == ']':
1641 break
1642 elif type == tokenize.OP and string in ALLOWED_OPS:
1643 if last_string:
1644 yield tokenize.NAME, last_string, last_start, last_end, last_line
1645 last_string = None
1646 yield type, string, start, end, line
1647 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1648 if not last_string:
1649 last_string = string
1650 last_start = start
1651 last_end = end
1652 else:
1653 last_string += string
1654 if last_string:
1655 yield tokenize.NAME, last_string, last_start, last_end, last_line
1656
1657 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1658 selectors = []
1659 current_selector = None
1660 for type, string, start, _, _ in tokens:
1661 # ENCODING is only defined in python 3.x
1662 if type == getattr(tokenize, 'ENCODING', None):
1663 continue
1664 elif type in [tokenize.NAME, tokenize.NUMBER]:
1665 current_selector = FormatSelector(SINGLE, string, [])
1666 elif type == tokenize.OP:
1667 if string == ')':
1668 if not inside_group:
1669 # ')' will be handled by the parentheses group
1670 tokens.restore_last_token()
1671 break
1672 elif inside_merge and string in ['/', ',']:
1673 tokens.restore_last_token()
1674 break
1675 elif inside_choice and string == ',':
1676 tokens.restore_last_token()
1677 break
1678 elif string == ',':
1679 if not current_selector:
1680 raise syntax_error('"," must follow a format selector', start)
1681 selectors.append(current_selector)
1682 current_selector = None
1683 elif string == '/':
1684 if not current_selector:
1685 raise syntax_error('"/" must follow a format selector', start)
1686 first_choice = current_selector
1687 second_choice = _parse_format_selection(tokens, inside_choice=True)
1688 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1689 elif string == '[':
1690 if not current_selector:
1691 current_selector = FormatSelector(SINGLE, 'best', [])
1692 format_filter = _parse_filter(tokens)
1693 current_selector.filters.append(format_filter)
1694 elif string == '(':
1695 if current_selector:
1696 raise syntax_error('Unexpected "("', start)
1697 group = _parse_format_selection(tokens, inside_group=True)
1698 current_selector = FormatSelector(GROUP, group, [])
1699 elif string == '+':
1700 if not current_selector:
1701 raise syntax_error('Unexpected "+"', start)
1702 selector_1 = current_selector
1703 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1704 if not selector_2:
1705 raise syntax_error('Expected a selector', start)
1706 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1707 else:
1708 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1709 elif type == tokenize.ENDMARKER:
1710 break
1711 if current_selector:
1712 selectors.append(current_selector)
1713 return selectors
1714
1715 def _merge(formats_pair):
1716 format_1, format_2 = formats_pair
1717
1718 formats_info = []
1719 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1720 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1721
1722 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1723 get_no_more = {'video': False, 'audio': False}
1724 for (i, fmt_info) in enumerate(formats_info):
1725 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1726 formats_info.pop(i)
1727 continue
1728 for aud_vid in ['audio', 'video']:
1729 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1730 if get_no_more[aud_vid]:
1731 formats_info.pop(i)
1732 get_no_more[aud_vid] = True
1733
1734 if len(formats_info) == 1:
1735 return formats_info[0]
1736
1737 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1738 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1739
1740 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1741 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1742
1743 output_ext = self.params.get('merge_output_format')
1744 if not output_ext:
1745 if the_only_video:
1746 output_ext = the_only_video['ext']
1747 elif the_only_audio and not video_fmts:
1748 output_ext = the_only_audio['ext']
1749 else:
1750 output_ext = 'mkv'
1751
1752 new_dict = {
1753 'requested_formats': formats_info,
1754 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1755 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1756 'ext': output_ext,
1757 }
1758
1759 if the_only_video:
1760 new_dict.update({
1761 'width': the_only_video.get('width'),
1762 'height': the_only_video.get('height'),
1763 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1764 'fps': the_only_video.get('fps'),
1765 'vcodec': the_only_video.get('vcodec'),
1766 'vbr': the_only_video.get('vbr'),
1767 'stretched_ratio': the_only_video.get('stretched_ratio'),
1768 })
1769
1770 if the_only_audio:
1771 new_dict.update({
1772 'acodec': the_only_audio.get('acodec'),
1773 'abr': the_only_audio.get('abr'),
1774 })
1775
1776 return new_dict
1777
1778 def _check_formats(formats):
1779 if not check_formats:
1780 yield from formats
1781 return
1782 for f in formats:
1783 self.to_screen('[info] Testing format %s' % f['format_id'])
1784 temp_file = tempfile.NamedTemporaryFile(
1785 suffix='.tmp', delete=False,
1786 dir=self.get_output_path('temp') or None)
1787 temp_file.close()
1788 try:
1789 success, _ = self.dl(temp_file.name, f, test=True)
1790 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1791 success = False
1792 finally:
1793 if os.path.exists(temp_file.name):
1794 try:
1795 os.remove(temp_file.name)
1796 except OSError:
1797 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1798 if success:
1799 yield f
1800 else:
1801 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1802
1803 def _build_selector_function(selector):
1804 if isinstance(selector, list): # ,
1805 fs = [_build_selector_function(s) for s in selector]
1806
1807 def selector_function(ctx):
1808 for f in fs:
1809 yield from f(ctx)
1810 return selector_function
1811
1812 elif selector.type == GROUP: # ()
1813 selector_function = _build_selector_function(selector.selector)
1814
1815 elif selector.type == PICKFIRST: # /
1816 fs = [_build_selector_function(s) for s in selector.selector]
1817
1818 def selector_function(ctx):
1819 for f in fs:
1820 picked_formats = list(f(ctx))
1821 if picked_formats:
1822 return picked_formats
1823 return []
1824
1825 elif selector.type == MERGE: # +
1826 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1827
1828 def selector_function(ctx):
1829 for pair in itertools.product(
1830 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1831 yield _merge(pair)
1832
1833 elif selector.type == SINGLE: # atom
1834 format_spec = selector.selector or 'best'
1835
1836 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1837 if format_spec == 'all':
1838 def selector_function(ctx):
1839 yield from _check_formats(ctx['formats'])
1840 elif format_spec == 'mergeall':
1841 def selector_function(ctx):
1842 formats = list(_check_formats(ctx['formats']))
1843 if not formats:
1844 return
1845 merged_format = formats[-1]
1846 for f in formats[-2::-1]:
1847 merged_format = _merge((merged_format, f))
1848 yield merged_format
1849
1850 else:
1851 format_fallback, format_reverse, format_idx = False, True, 1
1852 mobj = re.match(
1853 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1854 format_spec)
1855 if mobj is not None:
1856 format_idx = int_or_none(mobj.group('n'), default=1)
1857 format_reverse = mobj.group('bw')[0] == 'b'
1858 format_type = (mobj.group('type') or [None])[0]
1859 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1860 format_modified = mobj.group('mod') is not None
1861
1862 format_fallback = not format_type and not format_modified # for b, w
1863 _filter_f = (
1864 (lambda f: f.get('%scodec' % format_type) != 'none')
1865 if format_type and format_modified # bv*, ba*, wv*, wa*
1866 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1867 if format_type # bv, ba, wv, wa
1868 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1869 if not format_modified # b, w
1870 else lambda f: True) # b*, w*
1871 filter_f = lambda f: _filter_f(f) and (
1872 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1873 else:
1874 filter_f = ((lambda f: f.get('ext') == format_spec)
1875 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1876 else (lambda f: f.get('format_id') == format_spec)) # id
1877
1878 def selector_function(ctx):
1879 formats = list(ctx['formats'])
1880 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1881 if format_fallback and ctx['incomplete_formats'] and not matches:
1882 # for extractors with incomplete formats (audio only (soundcloud)
1883 # or video only (imgur)) best/worst will fallback to
1884 # best/worst {video,audio}-only format
1885 matches = formats
1886 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1887 try:
1888 yield matches[format_idx - 1]
1889 except IndexError:
1890 return
1891
1892 filters = [self._build_format_filter(f) for f in selector.filters]
1893
1894 def final_selector(ctx):
1895 ctx_copy = copy.deepcopy(ctx)
1896 for _filter in filters:
1897 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1898 return selector_function(ctx_copy)
1899 return final_selector
1900
1901 stream = io.BytesIO(format_spec.encode('utf-8'))
1902 try:
1903 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1904 except tokenize.TokenError:
1905 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1906
1907 class TokenIterator(object):
1908 def __init__(self, tokens):
1909 self.tokens = tokens
1910 self.counter = 0
1911
1912 def __iter__(self):
1913 return self
1914
1915 def __next__(self):
1916 if self.counter >= len(self.tokens):
1917 raise StopIteration()
1918 value = self.tokens[self.counter]
1919 self.counter += 1
1920 return value
1921
1922 next = __next__
1923
1924 def restore_last_token(self):
1925 self.counter -= 1
1926
1927 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1928 return _build_selector_function(parsed_selector)
1929
1930 def _calc_headers(self, info_dict):
1931 res = std_headers.copy()
1932
1933 add_headers = info_dict.get('http_headers')
1934 if add_headers:
1935 res.update(add_headers)
1936
1937 cookies = self._calc_cookies(info_dict)
1938 if cookies:
1939 res['Cookie'] = cookies
1940
1941 if 'X-Forwarded-For' not in res:
1942 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1943 if x_forwarded_for_ip:
1944 res['X-Forwarded-For'] = x_forwarded_for_ip
1945
1946 return res
1947
1948 def _calc_cookies(self, info_dict):
1949 pr = sanitized_Request(info_dict['url'])
1950 self.cookiejar.add_cookie_header(pr)
1951 return pr.get_header('Cookie')
1952
1953 def _sanitize_thumbnails(self, info_dict):
1954 thumbnails = info_dict.get('thumbnails')
1955 if thumbnails is None:
1956 thumbnail = info_dict.get('thumbnail')
1957 if thumbnail:
1958 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1959 if thumbnails:
1960 thumbnails.sort(key=lambda t: (
1961 t.get('preference') if t.get('preference') is not None else -1,
1962 t.get('width') if t.get('width') is not None else -1,
1963 t.get('height') if t.get('height') is not None else -1,
1964 t.get('id') if t.get('id') is not None else '',
1965 t.get('url')))
1966
1967 def thumbnail_tester():
1968 if self.params.get('check_formats'):
1969 test_all = True
1970 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1971 else:
1972 test_all = False
1973 to_screen = self.write_debug
1974
1975 def test_thumbnail(t):
1976 if not test_all and not t.get('_test_url'):
1977 return True
1978 to_screen('Testing thumbnail %s' % t['id'])
1979 try:
1980 self.urlopen(HEADRequest(t['url']))
1981 except network_exceptions as err:
1982 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1983 t['id'], t['url'], error_to_compat_str(err)))
1984 return False
1985 return True
1986
1987 return test_thumbnail
1988
1989 for i, t in enumerate(thumbnails):
1990 if t.get('id') is None:
1991 t['id'] = '%d' % i
1992 if t.get('width') and t.get('height'):
1993 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1994 t['url'] = sanitize_url(t['url'])
1995
1996 if self.params.get('check_formats') is not False:
1997 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
1998 else:
1999 info_dict['thumbnails'] = thumbnails
2000
2001 def process_video_result(self, info_dict, download=True):
2002 assert info_dict.get('_type', 'video') == 'video'
2003
2004 if 'id' not in info_dict:
2005 raise ExtractorError('Missing "id" field in extractor result')
2006 if 'title' not in info_dict:
2007 raise ExtractorError('Missing "title" field in extractor result')
2008
2009 def report_force_conversion(field, field_not, conversion):
2010 self.report_warning(
2011 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2012 % (field, field_not, conversion))
2013
2014 def sanitize_string_field(info, string_field):
2015 field = info.get(string_field)
2016 if field is None or isinstance(field, compat_str):
2017 return
2018 report_force_conversion(string_field, 'a string', 'string')
2019 info[string_field] = compat_str(field)
2020
2021 def sanitize_numeric_fields(info):
2022 for numeric_field in self._NUMERIC_FIELDS:
2023 field = info.get(numeric_field)
2024 if field is None or isinstance(field, compat_numeric_types):
2025 continue
2026 report_force_conversion(numeric_field, 'numeric', 'int')
2027 info[numeric_field] = int_or_none(field)
2028
2029 sanitize_string_field(info_dict, 'id')
2030 sanitize_numeric_fields(info_dict)
2031
2032 if 'playlist' not in info_dict:
2033 # It isn't part of a playlist
2034 info_dict['playlist'] = None
2035 info_dict['playlist_index'] = None
2036
2037 self._sanitize_thumbnails(info_dict)
2038
2039 thumbnail = info_dict.get('thumbnail')
2040 thumbnails = info_dict.get('thumbnails')
2041 if thumbnail:
2042 info_dict['thumbnail'] = sanitize_url(thumbnail)
2043 elif thumbnails:
2044 info_dict['thumbnail'] = thumbnails[-1]['url']
2045
2046 if info_dict.get('display_id') is None and 'id' in info_dict:
2047 info_dict['display_id'] = info_dict['id']
2048
2049 for ts_key, date_key in (
2050 ('timestamp', 'upload_date'),
2051 ('release_timestamp', 'release_date'),
2052 ):
2053 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2054 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2055 # see http://bugs.python.org/issue1646728)
2056 try:
2057 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2058 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2059 except (ValueError, OverflowError, OSError):
2060 pass
2061
2062 live_keys = ('is_live', 'was_live')
2063 live_status = info_dict.get('live_status')
2064 if live_status is None:
2065 for key in live_keys:
2066 if info_dict.get(key) is False:
2067 continue
2068 if info_dict.get(key):
2069 live_status = key
2070 break
2071 if all(info_dict.get(key) is False for key in live_keys):
2072 live_status = 'not_live'
2073 if live_status:
2074 info_dict['live_status'] = live_status
2075 for key in live_keys:
2076 if info_dict.get(key) is None:
2077 info_dict[key] = (live_status == key)
2078
2079 # Auto generate title fields corresponding to the *_number fields when missing
2080 # in order to always have clean titles. This is very common for TV series.
2081 for field in ('chapter', 'season', 'episode'):
2082 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2083 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2084
2085 for cc_kind in ('subtitles', 'automatic_captions'):
2086 cc = info_dict.get(cc_kind)
2087 if cc:
2088 for _, subtitle in cc.items():
2089 for subtitle_format in subtitle:
2090 if subtitle_format.get('url'):
2091 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2092 if subtitle_format.get('ext') is None:
2093 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2094
2095 automatic_captions = info_dict.get('automatic_captions')
2096 subtitles = info_dict.get('subtitles')
2097
2098 info_dict['requested_subtitles'] = self.process_subtitles(
2099 info_dict['id'], subtitles, automatic_captions)
2100
2101 # We now pick which formats have to be downloaded
2102 if info_dict.get('formats') is None:
2103 # There's only one format available
2104 formats = [info_dict]
2105 else:
2106 formats = info_dict['formats']
2107
2108 if not formats:
2109 if not self.params.get('ignore_no_formats_error'):
2110 raise ExtractorError('No video formats found!')
2111 else:
2112 self.report_warning('No video formats found!')
2113
2114 def is_wellformed(f):
2115 url = f.get('url')
2116 if not url:
2117 self.report_warning(
2118 '"url" field is missing or empty - skipping format, '
2119 'there is an error in extractor')
2120 return False
2121 if isinstance(url, bytes):
2122 sanitize_string_field(f, 'url')
2123 return True
2124
2125 # Filter out malformed formats for better extraction robustness
2126 formats = list(filter(is_wellformed, formats))
2127
2128 formats_dict = {}
2129
2130 # We check that all the formats have the format and format_id fields
2131 for i, format in enumerate(formats):
2132 sanitize_string_field(format, 'format_id')
2133 sanitize_numeric_fields(format)
2134 format['url'] = sanitize_url(format['url'])
2135 if not format.get('format_id'):
2136 format['format_id'] = compat_str(i)
2137 else:
2138 # Sanitize format_id from characters used in format selector expression
2139 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2140 format_id = format['format_id']
2141 if format_id not in formats_dict:
2142 formats_dict[format_id] = []
2143 formats_dict[format_id].append(format)
2144
2145 # Make sure all formats have unique format_id
2146 for format_id, ambiguous_formats in formats_dict.items():
2147 if len(ambiguous_formats) > 1:
2148 for i, format in enumerate(ambiguous_formats):
2149 format['format_id'] = '%s-%d' % (format_id, i)
2150
2151 for i, format in enumerate(formats):
2152 if format.get('format') is None:
2153 format['format'] = '{id} - {res}{note}'.format(
2154 id=format['format_id'],
2155 res=self.format_resolution(format),
2156 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2157 )
2158 # Automatically determine file extension if missing
2159 if format.get('ext') is None:
2160 format['ext'] = determine_ext(format['url']).lower()
2161 # Automatically determine protocol if missing (useful for format
2162 # selection purposes)
2163 if format.get('protocol') is None:
2164 format['protocol'] = determine_protocol(format)
2165 # Add HTTP headers, so that external programs can use them from the
2166 # json output
2167 full_format_info = info_dict.copy()
2168 full_format_info.update(format)
2169 format['http_headers'] = self._calc_headers(full_format_info)
2170 # Remove private housekeeping stuff
2171 if '__x_forwarded_for_ip' in info_dict:
2172 del info_dict['__x_forwarded_for_ip']
2173
2174 # TODO Central sorting goes here
2175
2176 if formats and formats[0] is not info_dict:
2177 # only set the 'formats' fields if the original info_dict list them
2178 # otherwise we end up with a circular reference, the first (and unique)
2179 # element in the 'formats' field in info_dict is info_dict itself,
2180 # which can't be exported to json
2181 info_dict['formats'] = formats
2182
2183 info_dict, _ = self.pre_process(info_dict)
2184
2185 list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2186 if list_only:
2187 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2188 if self.params.get('list_thumbnails'):
2189 self.list_thumbnails(info_dict)
2190 if self.params.get('listformats'):
2191 if not info_dict.get('formats'):
2192 raise ExtractorError('No video formats found', expected=True)
2193 self.list_formats(info_dict)
2194 if self.params.get('listsubtitles'):
2195 if 'automatic_captions' in info_dict:
2196 self.list_subtitles(
2197 info_dict['id'], automatic_captions, 'automatic captions')
2198 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2199 return
2200
2201 format_selector = self.format_selector
2202 if format_selector is None:
2203 req_format = self._default_format_spec(info_dict, download=download)
2204 self.write_debug('Default format spec: %s' % req_format)
2205 format_selector = self.build_format_selector(req_format)
2206
2207 # While in format selection we may need to have an access to the original
2208 # format set in order to calculate some metrics or do some processing.
2209 # For now we need to be able to guess whether original formats provided
2210 # by extractor are incomplete or not (i.e. whether extractor provides only
2211 # video-only or audio-only formats) for proper formats selection for
2212 # extractors with such incomplete formats (see
2213 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2214 # Since formats may be filtered during format selection and may not match
2215 # the original formats the results may be incorrect. Thus original formats
2216 # or pre-calculated metrics should be passed to format selection routines
2217 # as well.
2218 # We will pass a context object containing all necessary additional data
2219 # instead of just formats.
2220 # This fixes incorrect format selection issue (see
2221 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2222 incomplete_formats = (
2223 # All formats are video-only or
2224 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2225 # all formats are audio-only
2226 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2227
2228 ctx = {
2229 'formats': formats,
2230 'incomplete_formats': incomplete_formats,
2231 }
2232
2233 formats_to_download = list(format_selector(ctx))
2234 if not formats_to_download:
2235 if not self.params.get('ignore_no_formats_error'):
2236 raise ExtractorError('Requested format is not available', expected=True)
2237 else:
2238 self.report_warning('Requested format is not available')
2239 # Process what we can, even without any available formats.
2240 self.process_info(dict(info_dict))
2241 elif download:
2242 self.to_screen(
2243 '[info] %s: Downloading %d format(s): %s' % (
2244 info_dict['id'], len(formats_to_download),
2245 ", ".join([f['format_id'] for f in formats_to_download])))
2246 for fmt in formats_to_download:
2247 new_info = dict(info_dict)
2248 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2249 new_info['__original_infodict'] = info_dict
2250 new_info.update(fmt)
2251 self.process_info(new_info)
2252 # We update the info dict with the best quality format (backwards compatibility)
2253 if formats_to_download:
2254 info_dict.update(formats_to_download[-1])
2255 return info_dict
2256
2257 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2258 """Select the requested subtitles and their format"""
2259 available_subs = {}
2260 if normal_subtitles and self.params.get('writesubtitles'):
2261 available_subs.update(normal_subtitles)
2262 if automatic_captions and self.params.get('writeautomaticsub'):
2263 for lang, cap_info in automatic_captions.items():
2264 if lang not in available_subs:
2265 available_subs[lang] = cap_info
2266
2267 if (not self.params.get('writesubtitles') and not
2268 self.params.get('writeautomaticsub') or not
2269 available_subs):
2270 return None
2271
2272 all_sub_langs = available_subs.keys()
2273 if self.params.get('allsubtitles', False):
2274 requested_langs = all_sub_langs
2275 elif self.params.get('subtitleslangs', False):
2276 requested_langs = set()
2277 for lang in self.params.get('subtitleslangs'):
2278 if lang == 'all':
2279 requested_langs.update(all_sub_langs)
2280 continue
2281 discard = lang[0] == '-'
2282 if discard:
2283 lang = lang[1:]
2284 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2285 if discard:
2286 for lang in current_langs:
2287 requested_langs.discard(lang)
2288 else:
2289 requested_langs.update(current_langs)
2290 elif 'en' in available_subs:
2291 requested_langs = ['en']
2292 else:
2293 requested_langs = [list(all_sub_langs)[0]]
2294 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2295
2296 formats_query = self.params.get('subtitlesformat', 'best')
2297 formats_preference = formats_query.split('/') if formats_query else []
2298 subs = {}
2299 for lang in requested_langs:
2300 formats = available_subs.get(lang)
2301 if formats is None:
2302 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2303 continue
2304 for ext in formats_preference:
2305 if ext == 'best':
2306 f = formats[-1]
2307 break
2308 matches = list(filter(lambda f: f['ext'] == ext, formats))
2309 if matches:
2310 f = matches[-1]
2311 break
2312 else:
2313 f = formats[-1]
2314 self.report_warning(
2315 'No subtitle format found matching "%s" for language %s, '
2316 'using %s' % (formats_query, lang, f['ext']))
2317 subs[lang] = f
2318 return subs
2319
2320 def __forced_printings(self, info_dict, filename, incomplete):
2321 def print_mandatory(field, actual_field=None):
2322 if actual_field is None:
2323 actual_field = field
2324 if (self.params.get('force%s' % field, False)
2325 and (not incomplete or info_dict.get(actual_field) is not None)):
2326 self.to_stdout(info_dict[actual_field])
2327
2328 def print_optional(field):
2329 if (self.params.get('force%s' % field, False)
2330 and info_dict.get(field) is not None):
2331 self.to_stdout(info_dict[field])
2332
2333 info_dict = info_dict.copy()
2334 if filename is not None:
2335 info_dict['filename'] = filename
2336 if info_dict.get('requested_formats') is not None:
2337 # For RTMP URLs, also include the playpath
2338 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2339 elif 'url' in info_dict:
2340 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2341
2342 for tmpl in self.params.get('forceprint', []):
2343 if re.match(r'\w+$', tmpl):
2344 tmpl = '%({})s'.format(tmpl)
2345 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2346 self.to_stdout(tmpl % info_copy)
2347
2348 print_mandatory('title')
2349 print_mandatory('id')
2350 print_mandatory('url', 'urls')
2351 print_optional('thumbnail')
2352 print_optional('description')
2353 print_optional('filename')
2354 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2355 self.to_stdout(formatSeconds(info_dict['duration']))
2356 print_mandatory('format')
2357
2358 if self.params.get('forcejson', False):
2359 self.post_extract(info_dict)
2360 self.to_stdout(json.dumps(info_dict, default=repr))
2361
2362 def dl(self, name, info, subtitle=False, test=False):
2363
2364 if test:
2365 verbose = self.params.get('verbose')
2366 params = {
2367 'test': True,
2368 'quiet': not verbose,
2369 'verbose': verbose,
2370 'noprogress': not verbose,
2371 'nopart': True,
2372 'skip_unavailable_fragments': False,
2373 'keep_fragments': False,
2374 'overwrites': True,
2375 '_no_ytdl_file': True,
2376 }
2377 else:
2378 params = self.params
2379 fd = get_suitable_downloader(info, params)(self, params)
2380 if not test:
2381 for ph in self._progress_hooks:
2382 fd.add_progress_hook(ph)
2383 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2384 self.write_debug('Invoking downloader on "%s"' % urls)
2385 new_info = dict(info)
2386 if new_info.get('http_headers') is None:
2387 new_info['http_headers'] = self._calc_headers(new_info)
2388 return fd.download(name, new_info, subtitle)
2389
2390 def process_info(self, info_dict):
2391 """Process a single resolved IE result."""
2392
2393 assert info_dict.get('_type', 'video') == 'video'
2394
2395 info_dict.setdefault('__postprocessors', [])
2396
2397 max_downloads = self.params.get('max_downloads')
2398 if max_downloads is not None:
2399 if self._num_downloads >= int(max_downloads):
2400 raise MaxDownloadsReached()
2401
2402 # TODO: backward compatibility, to be removed
2403 info_dict['fulltitle'] = info_dict['title']
2404
2405 if 'format' not in info_dict and 'ext' in info_dict:
2406 info_dict['format'] = info_dict['ext']
2407
2408 if self._match_entry(info_dict) is not None:
2409 return
2410
2411 self.post_extract(info_dict)
2412 self._num_downloads += 1
2413
2414 # info_dict['_filename'] needs to be set for backward compatibility
2415 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2416 temp_filename = self.prepare_filename(info_dict, 'temp')
2417 files_to_move = {}
2418
2419 # Forced printings
2420 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2421
2422 if self.params.get('simulate', False):
2423 if self.params.get('force_write_download_archive', False):
2424 self.record_download_archive(info_dict)
2425
2426 # Do nothing else if in simulate mode
2427 return
2428
2429 if full_filename is None:
2430 return
2431
2432 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2433 return
2434 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2435 return
2436
2437 if self.params.get('writedescription', False):
2438 descfn = self.prepare_filename(info_dict, 'description')
2439 if not self._ensure_dir_exists(encodeFilename(descfn)):
2440 return
2441 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2442 self.to_screen('[info] Video description is already present')
2443 elif info_dict.get('description') is None:
2444 self.report_warning('There\'s no description to write.')
2445 else:
2446 try:
2447 self.to_screen('[info] Writing video description to: ' + descfn)
2448 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2449 descfile.write(info_dict['description'])
2450 except (OSError, IOError):
2451 self.report_error('Cannot write description file ' + descfn)
2452 return
2453
2454 if self.params.get('writeannotations', False):
2455 annofn = self.prepare_filename(info_dict, 'annotation')
2456 if not self._ensure_dir_exists(encodeFilename(annofn)):
2457 return
2458 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2459 self.to_screen('[info] Video annotations are already present')
2460 elif not info_dict.get('annotations'):
2461 self.report_warning('There are no annotations to write.')
2462 else:
2463 try:
2464 self.to_screen('[info] Writing video annotations to: ' + annofn)
2465 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2466 annofile.write(info_dict['annotations'])
2467 except (KeyError, TypeError):
2468 self.report_warning('There are no annotations to write.')
2469 except (OSError, IOError):
2470 self.report_error('Cannot write annotations file: ' + annofn)
2471 return
2472
2473 subtitles_are_requested = any([self.params.get('writesubtitles', False),
2474 self.params.get('writeautomaticsub')])
2475
2476 if subtitles_are_requested and info_dict.get('requested_subtitles'):
2477 # subtitles download errors are already managed as troubles in relevant IE
2478 # that way it will silently go on when used with unsupporting IE
2479 subtitles = info_dict['requested_subtitles']
2480 # ie = self.get_info_extractor(info_dict['extractor_key'])
2481 for sub_lang, sub_info in subtitles.items():
2482 sub_format = sub_info['ext']
2483 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2484 sub_filename_final = subtitles_filename(
2485 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2486 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2487 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2488 sub_info['filepath'] = sub_filename
2489 files_to_move[sub_filename] = sub_filename_final
2490 else:
2491 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2492 if sub_info.get('data') is not None:
2493 try:
2494 # Use newline='' to prevent conversion of newline characters
2495 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2496 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2497 subfile.write(sub_info['data'])
2498 sub_info['filepath'] = sub_filename
2499 files_to_move[sub_filename] = sub_filename_final
2500 except (OSError, IOError):
2501 self.report_error('Cannot write subtitles file ' + sub_filename)
2502 return
2503 else:
2504 try:
2505 self.dl(sub_filename, sub_info.copy(), subtitle=True)
2506 sub_info['filepath'] = sub_filename
2507 files_to_move[sub_filename] = sub_filename_final
2508 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2509 self.report_warning('Unable to download subtitle for "%s": %s' %
2510 (sub_lang, error_to_compat_str(err)))
2511 continue
2512
2513 if self.params.get('writeinfojson', False):
2514 infofn = self.prepare_filename(info_dict, 'infojson')
2515 if not self._ensure_dir_exists(encodeFilename(infofn)):
2516 return
2517 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2518 self.to_screen('[info] Video metadata is already present')
2519 else:
2520 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2521 try:
2522 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2523 except (OSError, IOError):
2524 self.report_error('Cannot write video metadata to JSON file ' + infofn)
2525 return
2526 info_dict['__infojson_filename'] = infofn
2527
2528 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2529 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2530 thumb_filename = replace_extension(
2531 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2532 files_to_move[thumb_filename_temp] = thumb_filename
2533
2534 # Write internet shortcut files
2535 url_link = webloc_link = desktop_link = False
2536 if self.params.get('writelink', False):
2537 if sys.platform == "darwin": # macOS.
2538 webloc_link = True
2539 elif sys.platform.startswith("linux"):
2540 desktop_link = True
2541 else: # if sys.platform in ['win32', 'cygwin']:
2542 url_link = True
2543 if self.params.get('writeurllink', False):
2544 url_link = True
2545 if self.params.get('writewebloclink', False):
2546 webloc_link = True
2547 if self.params.get('writedesktoplink', False):
2548 desktop_link = True
2549
2550 if url_link or webloc_link or desktop_link:
2551 if 'webpage_url' not in info_dict:
2552 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2553 return
2554 ascii_url = iri_to_uri(info_dict['webpage_url'])
2555
2556 def _write_link_file(extension, template, newline, embed_filename):
2557 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2558 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2559 self.to_screen('[info] Internet shortcut is already present')
2560 else:
2561 try:
2562 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2563 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2564 template_vars = {'url': ascii_url}
2565 if embed_filename:
2566 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2567 linkfile.write(template % template_vars)
2568 except (OSError, IOError):
2569 self.report_error('Cannot write internet shortcut ' + linkfn)
2570 return False
2571 return True
2572
2573 if url_link:
2574 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2575 return
2576 if webloc_link:
2577 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2578 return
2579 if desktop_link:
2580 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2581 return
2582
2583 try:
2584 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2585 except PostProcessingError as err:
2586 self.report_error('Preprocessing: %s' % str(err))
2587 return
2588
2589 must_record_download_archive = False
2590 if self.params.get('skip_download', False):
2591 info_dict['filepath'] = temp_filename
2592 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2593 info_dict['__files_to_move'] = files_to_move
2594 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2595 else:
2596 # Download
2597 try:
2598
2599 def existing_file(*filepaths):
2600 ext = info_dict.get('ext')
2601 final_ext = self.params.get('final_ext', ext)
2602 existing_files = []
2603 for file in orderedSet(filepaths):
2604 if final_ext != ext:
2605 converted = replace_extension(file, final_ext, ext)
2606 if os.path.exists(encodeFilename(converted)):
2607 existing_files.append(converted)
2608 if os.path.exists(encodeFilename(file)):
2609 existing_files.append(file)
2610
2611 if not existing_files or self.params.get('overwrites', False):
2612 for file in orderedSet(existing_files):
2613 self.report_file_delete(file)
2614 os.remove(encodeFilename(file))
2615 return None
2616
2617 self.report_file_already_downloaded(existing_files[0])
2618 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2619 return existing_files[0]
2620
2621 success = True
2622 if info_dict.get('requested_formats') is not None:
2623
2624 def compatible_formats(formats):
2625 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2626 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2627 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2628 if len(video_formats) > 2 or len(audio_formats) > 2:
2629 return False
2630
2631 # Check extension
2632 exts = set(format.get('ext') for format in formats)
2633 COMPATIBLE_EXTS = (
2634 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2635 set(('webm',)),
2636 )
2637 for ext_sets in COMPATIBLE_EXTS:
2638 if ext_sets.issuperset(exts):
2639 return True
2640 # TODO: Check acodec/vcodec
2641 return False
2642
2643 requested_formats = info_dict['requested_formats']
2644 old_ext = info_dict['ext']
2645 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2646 info_dict['ext'] = 'mkv'
2647 self.report_warning(
2648 'Requested formats are incompatible for merge and will be merged into mkv.')
2649
2650 def correct_ext(filename):
2651 filename_real_ext = os.path.splitext(filename)[1][1:]
2652 filename_wo_ext = (
2653 os.path.splitext(filename)[0]
2654 if filename_real_ext == old_ext
2655 else filename)
2656 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2657
2658 # Ensure filename always has a correct extension for successful merge
2659 full_filename = correct_ext(full_filename)
2660 temp_filename = correct_ext(temp_filename)
2661 dl_filename = existing_file(full_filename, temp_filename)
2662 info_dict['__real_download'] = False
2663
2664 _protocols = set(determine_protocol(f) for f in requested_formats)
2665 if len(_protocols) == 1:
2666 info_dict['protocol'] = _protocols.pop()
2667 directly_mergable = (
2668 'no-direct-merge' not in self.params.get('compat_opts', [])
2669 and info_dict.get('protocol') is not None # All requested formats have same protocol
2670 and not self.params.get('allow_unplayable_formats')
2671 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2672 if directly_mergable:
2673 info_dict['url'] = requested_formats[0]['url']
2674 # Treat it as a single download
2675 dl_filename = existing_file(full_filename, temp_filename)
2676 if dl_filename is None:
2677 success, real_download = self.dl(temp_filename, info_dict)
2678 info_dict['__real_download'] = real_download
2679 else:
2680 downloaded = []
2681 merger = FFmpegMergerPP(self)
2682 if self.params.get('allow_unplayable_formats'):
2683 self.report_warning(
2684 'You have requested merging of multiple formats '
2685 'while also allowing unplayable formats to be downloaded. '
2686 'The formats won\'t be merged to prevent data corruption.')
2687 elif not merger.available:
2688 self.report_warning(
2689 'You have requested merging of multiple formats but ffmpeg is not installed. '
2690 'The formats won\'t be merged.')
2691
2692 if dl_filename is None:
2693 for f in requested_formats:
2694 new_info = dict(info_dict)
2695 del new_info['requested_formats']
2696 new_info.update(f)
2697 fname = prepend_extension(
2698 self.prepare_filename(new_info, 'temp'),
2699 'f%s' % f['format_id'], new_info['ext'])
2700 if not self._ensure_dir_exists(fname):
2701 return
2702 downloaded.append(fname)
2703 partial_success, real_download = self.dl(fname, new_info)
2704 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2705 success = success and partial_success
2706 if merger.available and not self.params.get('allow_unplayable_formats'):
2707 info_dict['__postprocessors'].append(merger)
2708 info_dict['__files_to_merge'] = downloaded
2709 # Even if there were no downloads, it is being merged only now
2710 info_dict['__real_download'] = True
2711 else:
2712 for file in downloaded:
2713 files_to_move[file] = None
2714 else:
2715 # Just a single file
2716 dl_filename = existing_file(full_filename, temp_filename)
2717 if dl_filename is None:
2718 success, real_download = self.dl(temp_filename, info_dict)
2719 info_dict['__real_download'] = real_download
2720
2721 dl_filename = dl_filename or temp_filename
2722 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2723
2724 except network_exceptions as err:
2725 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2726 return
2727 except (OSError, IOError) as err:
2728 raise UnavailableVideoError(err)
2729 except (ContentTooShortError, ) as err:
2730 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2731 return
2732
2733 if success and full_filename != '-':
2734
2735 def fixup():
2736 do_fixup = True
2737 fixup_policy = self.params.get('fixup')
2738 vid = info_dict['id']
2739
2740 if fixup_policy in ('ignore', 'never'):
2741 return
2742 elif fixup_policy == 'warn':
2743 do_fixup = False
2744 elif fixup_policy != 'force':
2745 assert fixup_policy in ('detect_or_warn', None)
2746 if not info_dict.get('__real_download'):
2747 do_fixup = False
2748
2749 def ffmpeg_fixup(cndn, msg, cls):
2750 if not cndn:
2751 return
2752 if not do_fixup:
2753 self.report_warning(f'{vid}: {msg}')
2754 return
2755 pp = cls(self)
2756 if pp.available:
2757 info_dict['__postprocessors'].append(pp)
2758 else:
2759 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2760
2761 stretched_ratio = info_dict.get('stretched_ratio')
2762 ffmpeg_fixup(
2763 stretched_ratio not in (1, None),
2764 f'Non-uniform pixel ratio {stretched_ratio}',
2765 FFmpegFixupStretchedPP)
2766
2767 ffmpeg_fixup(
2768 (info_dict.get('requested_formats') is None
2769 and info_dict.get('container') == 'm4a_dash'
2770 and info_dict.get('ext') == 'm4a'),
2771 'writing DASH m4a. Only some players support this container',
2772 FFmpegFixupM4aPP)
2773
2774 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2775 if 'protocol' in info_dict else None)
2776 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2777 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2778 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2779
2780 fixup()
2781 try:
2782 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2783 except PostProcessingError as err:
2784 self.report_error('Postprocessing: %s' % str(err))
2785 return
2786 try:
2787 for ph in self._post_hooks:
2788 ph(info_dict['filepath'])
2789 except Exception as err:
2790 self.report_error('post hooks: %s' % str(err))
2791 return
2792 must_record_download_archive = True
2793
2794 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2795 self.record_download_archive(info_dict)
2796 max_downloads = self.params.get('max_downloads')
2797 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2798 raise MaxDownloadsReached()
2799
2800 def download(self, url_list):
2801 """Download a given list of URLs."""
2802 outtmpl = self.outtmpl_dict['default']
2803 if (len(url_list) > 1
2804 and outtmpl != '-'
2805 and '%' not in outtmpl
2806 and self.params.get('max_downloads') != 1):
2807 raise SameFileError(outtmpl)
2808
2809 for url in url_list:
2810 try:
2811 # It also downloads the videos
2812 res = self.extract_info(
2813 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2814 except UnavailableVideoError:
2815 self.report_error('unable to download video')
2816 except MaxDownloadsReached:
2817 self.to_screen('[info] Maximum number of downloaded files reached')
2818 raise
2819 except ExistingVideoReached:
2820 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2821 raise
2822 except RejectedVideoReached:
2823 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2824 raise
2825 else:
2826 if self.params.get('dump_single_json', False):
2827 self.post_extract(res)
2828 self.to_stdout(json.dumps(res, default=repr))
2829
2830 return self._download_retcode
2831
2832 def download_with_info_file(self, info_filename):
2833 with contextlib.closing(fileinput.FileInput(
2834 [info_filename], mode='r',
2835 openhook=fileinput.hook_encoded('utf-8'))) as f:
2836 # FileInput doesn't have a read method, we can't call json.load
2837 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2838 try:
2839 self.process_ie_result(info, download=True)
2840 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2841 webpage_url = info.get('webpage_url')
2842 if webpage_url is not None:
2843 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2844 return self.download([webpage_url])
2845 else:
2846 raise
2847 return self._download_retcode
2848
2849 @staticmethod
2850 def filter_requested_info(info_dict, actually_filter=True):
2851 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2852 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2853 if actually_filter:
2854 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2855 empty_values = (None, {}, [], set(), tuple())
2856 reject = lambda k, v: k not in keep_keys and (
2857 k.startswith('_') or k in remove_keys or v in empty_values)
2858 else:
2859 info_dict['epoch'] = int(time.time())
2860 reject = lambda k, v: k in remove_keys
2861 filter_fn = lambda obj: (
2862 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2863 else obj if not isinstance(obj, dict)
2864 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2865 return filter_fn(info_dict)
2866
2867 def run_pp(self, pp, infodict):
2868 files_to_delete = []
2869 if '__files_to_move' not in infodict:
2870 infodict['__files_to_move'] = {}
2871 files_to_delete, infodict = pp.run(infodict)
2872 if not files_to_delete:
2873 return infodict
2874
2875 if self.params.get('keepvideo', False):
2876 for f in files_to_delete:
2877 infodict['__files_to_move'].setdefault(f, '')
2878 else:
2879 for old_filename in set(files_to_delete):
2880 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2881 try:
2882 os.remove(encodeFilename(old_filename))
2883 except (IOError, OSError):
2884 self.report_warning('Unable to remove downloaded original file')
2885 if old_filename in infodict['__files_to_move']:
2886 del infodict['__files_to_move'][old_filename]
2887 return infodict
2888
2889 @staticmethod
2890 def post_extract(info_dict):
2891 def actual_post_extract(info_dict):
2892 if info_dict.get('_type') in ('playlist', 'multi_video'):
2893 for video_dict in info_dict.get('entries', {}):
2894 actual_post_extract(video_dict or {})
2895 return
2896
2897 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2898 extra = post_extractor().items()
2899 info_dict.update(extra)
2900 info_dict.pop('__post_extractor', None)
2901
2902 original_infodict = info_dict.get('__original_infodict') or {}
2903 original_infodict.update(extra)
2904 original_infodict.pop('__post_extractor', None)
2905
2906 actual_post_extract(info_dict or {})
2907
2908 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2909 info = dict(ie_info)
2910 info['__files_to_move'] = files_to_move or {}
2911 for pp in self._pps[key]:
2912 info = self.run_pp(pp, info)
2913 return info, info.pop('__files_to_move', None)
2914
2915 def post_process(self, filename, ie_info, files_to_move=None):
2916 """Run all the postprocessors on the given file."""
2917 info = dict(ie_info)
2918 info['filepath'] = filename
2919 info['__files_to_move'] = files_to_move or {}
2920
2921 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2922 info = self.run_pp(pp, info)
2923 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2924 del info['__files_to_move']
2925 for pp in self._pps['after_move']:
2926 info = self.run_pp(pp, info)
2927 return info
2928
2929 def _make_archive_id(self, info_dict):
2930 video_id = info_dict.get('id')
2931 if not video_id:
2932 return
2933 # Future-proof against any change in case
2934 # and backwards compatibility with prior versions
2935 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2936 if extractor is None:
2937 url = str_or_none(info_dict.get('url'))
2938 if not url:
2939 return
2940 # Try to find matching extractor for the URL and take its ie_key
2941 for ie in self._ies:
2942 if ie.suitable(url):
2943 extractor = ie.ie_key()
2944 break
2945 else:
2946 return
2947 return '%s %s' % (extractor.lower(), video_id)
2948
2949 def in_download_archive(self, info_dict):
2950 fn = self.params.get('download_archive')
2951 if fn is None:
2952 return False
2953
2954 vid_id = self._make_archive_id(info_dict)
2955 if not vid_id:
2956 return False # Incomplete video information
2957
2958 return vid_id in self.archive
2959
2960 def record_download_archive(self, info_dict):
2961 fn = self.params.get('download_archive')
2962 if fn is None:
2963 return
2964 vid_id = self._make_archive_id(info_dict)
2965 assert vid_id
2966 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2967 archive_file.write(vid_id + '\n')
2968 self.archive.add(vid_id)
2969
2970 @staticmethod
2971 def format_resolution(format, default='unknown'):
2972 if format.get('vcodec') == 'none':
2973 if format.get('acodec') == 'none':
2974 return 'images'
2975 return 'audio only'
2976 if format.get('resolution') is not None:
2977 return format['resolution']
2978 if format.get('width') and format.get('height'):
2979 res = '%dx%d' % (format['width'], format['height'])
2980 elif format.get('height'):
2981 res = '%sp' % format['height']
2982 elif format.get('width'):
2983 res = '%dx?' % format['width']
2984 else:
2985 res = default
2986 return res
2987
2988 def _format_note(self, fdict):
2989 res = ''
2990 if fdict.get('ext') in ['f4f', 'f4m']:
2991 res += '(unsupported) '
2992 if fdict.get('language'):
2993 if res:
2994 res += ' '
2995 res += '[%s] ' % fdict['language']
2996 if fdict.get('format_note') is not None:
2997 res += fdict['format_note'] + ' '
2998 if fdict.get('tbr') is not None:
2999 res += '%4dk ' % fdict['tbr']
3000 if fdict.get('container') is not None:
3001 if res:
3002 res += ', '
3003 res += '%s container' % fdict['container']
3004 if (fdict.get('vcodec') is not None
3005 and fdict.get('vcodec') != 'none'):
3006 if res:
3007 res += ', '
3008 res += fdict['vcodec']
3009 if fdict.get('vbr') is not None:
3010 res += '@'
3011 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3012 res += 'video@'
3013 if fdict.get('vbr') is not None:
3014 res += '%4dk' % fdict['vbr']
3015 if fdict.get('fps') is not None:
3016 if res:
3017 res += ', '
3018 res += '%sfps' % fdict['fps']
3019 if fdict.get('acodec') is not None:
3020 if res:
3021 res += ', '
3022 if fdict['acodec'] == 'none':
3023 res += 'video only'
3024 else:
3025 res += '%-5s' % fdict['acodec']
3026 elif fdict.get('abr') is not None:
3027 if res:
3028 res += ', '
3029 res += 'audio'
3030 if fdict.get('abr') is not None:
3031 res += '@%3dk' % fdict['abr']
3032 if fdict.get('asr') is not None:
3033 res += ' (%5dHz)' % fdict['asr']
3034 if fdict.get('filesize') is not None:
3035 if res:
3036 res += ', '
3037 res += format_bytes(fdict['filesize'])
3038 elif fdict.get('filesize_approx') is not None:
3039 if res:
3040 res += ', '
3041 res += '~' + format_bytes(fdict['filesize_approx'])
3042 return res
3043
3044 def list_formats(self, info_dict):
3045 formats = info_dict.get('formats', [info_dict])
3046 new_format = (
3047 'list-formats' not in self.params.get('compat_opts', [])
3048 and self.params.get('listformats_table', True) is not False)
3049 if new_format:
3050 table = [
3051 [
3052 format_field(f, 'format_id'),
3053 format_field(f, 'ext'),
3054 self.format_resolution(f),
3055 format_field(f, 'fps', '%d'),
3056 '|',
3057 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3058 format_field(f, 'tbr', '%4dk'),
3059 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3060 '|',
3061 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3062 format_field(f, 'vbr', '%4dk'),
3063 format_field(f, 'acodec', default='unknown').replace('none', ''),
3064 format_field(f, 'abr', '%3dk'),
3065 format_field(f, 'asr', '%5dHz'),
3066 ', '.join(filter(None, (
3067 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3068 format_field(f, 'language', '[%s]'),
3069 format_field(f, 'format_note'),
3070 format_field(f, 'container', ignore=(None, f.get('ext'))),
3071 format_field(f, 'asr', '%5dHz')))),
3072 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3073 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3074 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3075 else:
3076 table = [
3077 [
3078 format_field(f, 'format_id'),
3079 format_field(f, 'ext'),
3080 self.format_resolution(f),
3081 self._format_note(f)]
3082 for f in formats
3083 if f.get('preference') is None or f['preference'] >= -1000]
3084 header_line = ['format code', 'extension', 'resolution', 'note']
3085
3086 self.to_screen(
3087 '[info] Available formats for %s:' % info_dict['id'])
3088 self.to_stdout(render_table(
3089 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3090
3091 def list_thumbnails(self, info_dict):
3092 thumbnails = list(info_dict.get('thumbnails'))
3093 if not thumbnails:
3094 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3095 return
3096
3097 self.to_screen(
3098 '[info] Thumbnails for %s:' % info_dict['id'])
3099 self.to_stdout(render_table(
3100 ['ID', 'width', 'height', 'URL'],
3101 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3102
3103 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3104 if not subtitles:
3105 self.to_screen('%s has no %s' % (video_id, name))
3106 return
3107 self.to_screen(
3108 'Available %s for %s:' % (name, video_id))
3109
3110 def _row(lang, formats):
3111 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3112 if len(set(names)) == 1:
3113 names = [] if names[0] == 'unknown' else names[:1]
3114 return [lang, ', '.join(names), ', '.join(exts)]
3115
3116 self.to_stdout(render_table(
3117 ['Language', 'Name', 'Formats'],
3118 [_row(lang, formats) for lang, formats in subtitles.items()],
3119 hideEmpty=True))
3120
3121 def urlopen(self, req):
3122 """ Start an HTTP download """
3123 if isinstance(req, compat_basestring):
3124 req = sanitized_Request(req)
3125 return self._opener.open(req, timeout=self._socket_timeout)
3126
3127 def print_debug_header(self):
3128 if not self.params.get('verbose'):
3129 return
3130
3131 if type('') is not compat_str:
3132 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3133 self.report_warning(
3134 'Your Python is broken! Update to a newer and supported version')
3135
3136 stdout_encoding = getattr(
3137 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3138 encoding_str = (
3139 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3140 locale.getpreferredencoding(),
3141 sys.getfilesystemencoding(),
3142 stdout_encoding,
3143 self.get_encoding()))
3144 write_string(encoding_str, encoding=None)
3145
3146 source = (
3147 '(exe)' if hasattr(sys, 'frozen')
3148 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3149 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3150 else '')
3151 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3152 if _LAZY_LOADER:
3153 self._write_string('[debug] Lazy loading extractors enabled\n')
3154 if _PLUGIN_CLASSES:
3155 self._write_string(
3156 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3157 if self.params.get('compat_opts'):
3158 self._write_string(
3159 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3160 try:
3161 sp = subprocess.Popen(
3162 ['git', 'rev-parse', '--short', 'HEAD'],
3163 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3164 cwd=os.path.dirname(os.path.abspath(__file__)))
3165 out, err = process_communicate_or_kill(sp)
3166 out = out.decode().strip()
3167 if re.match('[0-9a-f]+', out):
3168 self._write_string('[debug] Git HEAD: %s\n' % out)
3169 except Exception:
3170 try:
3171 sys.exc_clear()
3172 except Exception:
3173 pass
3174
3175 def python_implementation():
3176 impl_name = platform.python_implementation()
3177 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3178 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3179 return impl_name
3180
3181 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3182 platform.python_version(),
3183 python_implementation(),
3184 platform.architecture()[0],
3185 platform_name()))
3186
3187 exe_versions = FFmpegPostProcessor.get_versions(self)
3188 exe_versions['rtmpdump'] = rtmpdump_version()
3189 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3190 exe_str = ', '.join(
3191 '%s %s' % (exe, v)
3192 for exe, v in sorted(exe_versions.items())
3193 if v
3194 )
3195 if not exe_str:
3196 exe_str = 'none'
3197 self._write_string('[debug] exe versions: %s\n' % exe_str)
3198
3199 proxy_map = {}
3200 for handler in self._opener.handlers:
3201 if hasattr(handler, 'proxies'):
3202 proxy_map.update(handler.proxies)
3203 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3204
3205 if self.params.get('call_home', False):
3206 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3207 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3208 return
3209 latest_version = self.urlopen(
3210 'https://yt-dl.org/latest/version').read().decode('utf-8')
3211 if version_tuple(latest_version) > version_tuple(__version__):
3212 self.report_warning(
3213 'You are using an outdated version (newest version: %s)! '
3214 'See https://yt-dl.org/update if you need help updating.' %
3215 latest_version)
3216
3217 def _setup_opener(self):
3218 timeout_val = self.params.get('socket_timeout')
3219 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3220
3221 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3222 opts_cookiefile = self.params.get('cookiefile')
3223 opts_proxy = self.params.get('proxy')
3224
3225 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3226
3227 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3228 if opts_proxy is not None:
3229 if opts_proxy == '':
3230 proxies = {}
3231 else:
3232 proxies = {'http': opts_proxy, 'https': opts_proxy}
3233 else:
3234 proxies = compat_urllib_request.getproxies()
3235 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3236 if 'http' in proxies and 'https' not in proxies:
3237 proxies['https'] = proxies['http']
3238 proxy_handler = PerRequestProxyHandler(proxies)
3239
3240 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3241 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3242 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3243 redirect_handler = YoutubeDLRedirectHandler()
3244 data_handler = compat_urllib_request_DataHandler()
3245
3246 # When passing our own FileHandler instance, build_opener won't add the
3247 # default FileHandler and allows us to disable the file protocol, which
3248 # can be used for malicious purposes (see
3249 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3250 file_handler = compat_urllib_request.FileHandler()
3251
3252 def file_open(*args, **kwargs):
3253 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3254 file_handler.file_open = file_open
3255
3256 opener = compat_urllib_request.build_opener(
3257 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3258
3259 # Delete the default user-agent header, which would otherwise apply in
3260 # cases where our custom HTTP handler doesn't come into play
3261 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3262 opener.addheaders = []
3263 self._opener = opener
3264
3265 def encode(self, s):
3266 if isinstance(s, bytes):
3267 return s # Already encoded
3268
3269 try:
3270 return s.encode(self.get_encoding())
3271 except UnicodeEncodeError as err:
3272 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3273 raise
3274
3275 def get_encoding(self):
3276 encoding = self.params.get('encoding')
3277 if encoding is None:
3278 encoding = preferredencoding()
3279 return encoding
3280
3281 def _write_thumbnails(self, info_dict, filename): # return the extensions
3282 write_all = self.params.get('write_all_thumbnails', False)
3283 thumbnails = []
3284 if write_all or self.params.get('writethumbnail', False):
3285 thumbnails = info_dict.get('thumbnails') or []
3286 multiple = write_all and len(thumbnails) > 1
3287
3288 ret = []
3289 for t in thumbnails[::-1]:
3290 thumb_ext = determine_ext(t['url'], 'jpg')
3291 suffix = '%s.' % t['id'] if multiple else ''
3292 thumb_display_id = '%s ' % t['id'] if multiple else ''
3293 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3294
3295 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3296 ret.append(suffix + thumb_ext)
3297 t['filepath'] = thumb_filename
3298 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3299 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3300 else:
3301 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3302 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3303 try:
3304 uf = self.urlopen(t['url'])
3305 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3306 shutil.copyfileobj(uf, thumbf)
3307 ret.append(suffix + thumb_ext)
3308 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3309 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3310 t['filepath'] = thumb_filename
3311 except network_exceptions as err:
3312 self.report_warning('Unable to download thumbnail "%s": %s' %
3313 (t['url'], error_to_compat_str(err)))
3314 if ret and not write_all:
3315 break
3316 return ret