]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
Revert "[build] Build Windows x86 version with py3.8"
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import time
24 import tokenize
25 import traceback
26 import random
27
28 from string import ascii_letters
29 from zipimport import zipimporter
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_kwargs,
36 compat_numeric_types,
37 compat_os_name,
38 compat_str,
39 compat_tokenize_tokenize,
40 compat_urllib_error,
41 compat_urllib_request,
42 compat_urllib_request_DataHandler,
43 )
44 from .utils import (
45 age_restricted,
46 args_to_str,
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
50 DEFAULT_OUTTMPL,
51 determine_ext,
52 determine_protocol,
53 DOT_DESKTOP_LINK_TEMPLATE,
54 DOT_URL_LINK_TEMPLATE,
55 DOT_WEBLOC_LINK_TEMPLATE,
56 DownloadError,
57 encode_compat_str,
58 encodeFilename,
59 EntryNotInPlaylist,
60 error_to_compat_str,
61 ExistingVideoReached,
62 expand_path,
63 ExtractorError,
64 float_or_none,
65 format_bytes,
66 format_field,
67 STR_FORMAT_RE,
68 formatSeconds,
69 GeoRestrictedError,
70 int_or_none,
71 iri_to_uri,
72 ISO3166Utils,
73 LazyList,
74 locked_file,
75 make_dir,
76 make_HTTPS_handler,
77 MaxDownloadsReached,
78 network_exceptions,
79 orderedSet,
80 OUTTMPL_TYPES,
81 PagedList,
82 parse_filesize,
83 PerRequestProxyHandler,
84 platform_name,
85 PostProcessingError,
86 preferredencoding,
87 prepend_extension,
88 process_communicate_or_kill,
89 random_uuidv4,
90 register_socks_protocols,
91 RejectedVideoReached,
92 render_table,
93 replace_extension,
94 SameFileError,
95 sanitize_filename,
96 sanitize_path,
97 sanitize_url,
98 sanitized_Request,
99 std_headers,
100 str_or_none,
101 strftime_or_none,
102 subtitles_filename,
103 to_high_limit_path,
104 traverse_obj,
105 UnavailableVideoError,
106 url_basename,
107 version_tuple,
108 write_json_file,
109 write_string,
110 YoutubeDLCookieJar,
111 YoutubeDLCookieProcessor,
112 YoutubeDLHandler,
113 YoutubeDLRedirectHandler,
114 )
115 from .cache import Cache
116 from .extractor import (
117 gen_extractor_classes,
118 get_info_extractor,
119 _LAZY_LOADER,
120 _PLUGIN_CLASSES
121 )
122 from .extractor.openload import PhantomJSwrapper
123 from .downloader import (
124 get_suitable_downloader,
125 shorten_protocol_name
126 )
127 from .downloader.rtmp import rtmpdump_version
128 from .postprocessor import (
129 FFmpegFixupM3u8PP,
130 FFmpegFixupM4aPP,
131 FFmpegFixupStretchedPP,
132 FFmpegMergerPP,
133 FFmpegPostProcessor,
134 # FFmpegSubtitlesConvertorPP,
135 get_postprocessor,
136 MoveFilesAfterDownloadPP,
137 )
138 from .version import __version__
139
140 if compat_os_name == 'nt':
141 import ctypes
142
143
144 class YoutubeDL(object):
145 """YoutubeDL class.
146
147 YoutubeDL objects are the ones responsible of downloading the
148 actual video file and writing it to disk if the user has requested
149 it, among some other tasks. In most cases there should be one per
150 program. As, given a video URL, the downloader doesn't know how to
151 extract all the needed information, task that InfoExtractors do, it
152 has to pass the URL to one of them.
153
154 For this, YoutubeDL objects have a method that allows
155 InfoExtractors to be registered in a given order. When it is passed
156 a URL, the YoutubeDL object handles it to the first InfoExtractor it
157 finds that reports being able to handle it. The InfoExtractor extracts
158 all the information about the video or videos the URL refers to, and
159 YoutubeDL process the extracted information, possibly using a File
160 Downloader to download the video.
161
162 YoutubeDL objects accept a lot of parameters. In order not to saturate
163 the object constructor with arguments, it receives a dictionary of
164 options instead. These options are available through the params
165 attribute for the InfoExtractors to use. The YoutubeDL also
166 registers itself as the downloader in charge for the InfoExtractors
167 that are added to it, so this is a "mutual registration".
168
169 Available options:
170
171 username: Username for authentication purposes.
172 password: Password for authentication purposes.
173 videopassword: Password for accessing a video.
174 ap_mso: Adobe Pass multiple-system operator identifier.
175 ap_username: Multiple-system operator account username.
176 ap_password: Multiple-system operator account password.
177 usenetrc: Use netrc for authentication instead.
178 verbose: Print additional info to stdout.
179 quiet: Do not print messages to stdout.
180 no_warnings: Do not print out anything for warnings.
181 forceprint: A list of templates to force print
182 forceurl: Force printing final URL. (Deprecated)
183 forcetitle: Force printing title. (Deprecated)
184 forceid: Force printing ID. (Deprecated)
185 forcethumbnail: Force printing thumbnail URL. (Deprecated)
186 forcedescription: Force printing description. (Deprecated)
187 forcefilename: Force printing final filename. (Deprecated)
188 forceduration: Force printing duration. (Deprecated)
189 forcejson: Force printing info_dict as JSON.
190 dump_single_json: Force printing the info_dict of the whole playlist
191 (or video) as a single JSON line.
192 force_write_download_archive: Force writing download archive regardless
193 of 'skip_download' or 'simulate'.
194 simulate: Do not download the video files.
195 format: Video format code. see "FORMAT SELECTION" for more details.
196 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
197 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
198 extracting metadata even if the video is not actually
199 available for download (experimental)
200 format_sort: How to sort the video formats. see "Sorting Formats"
201 for more details.
202 format_sort_force: Force the given format_sort. see "Sorting Formats"
203 for more details.
204 allow_multiple_video_streams: Allow multiple video streams to be merged
205 into a single file
206 allow_multiple_audio_streams: Allow multiple audio streams to be merged
207 into a single file
208 paths: Dictionary of output paths. The allowed keys are 'home'
209 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
210 outtmpl: Dictionary of templates for output names. Allowed keys
211 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
212 A string a also accepted for backward compatibility
213 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
214 restrictfilenames: Do not allow "&" and spaces in file names
215 trim_file_name: Limit length of filename (extension excluded)
216 windowsfilenames: Force the filenames to be windows compatible
217 ignoreerrors: Do not stop on download errors
218 (Default True when running yt-dlp,
219 but False when directly accessing YoutubeDL class)
220 skip_playlist_after_errors: Number of allowed failures until the rest of
221 the playlist is skipped
222 force_generic_extractor: Force downloader to use the generic extractor
223 overwrites: Overwrite all video and metadata files if True,
224 overwrite only non-video files if None
225 and don't overwrite any file if False
226 playliststart: Playlist item to start at.
227 playlistend: Playlist item to end at.
228 playlist_items: Specific indices of playlist to download.
229 playlistreverse: Download playlist items in reverse order.
230 playlistrandom: Download playlist items in random order.
231 matchtitle: Download only matching titles.
232 rejecttitle: Reject downloads for matching titles.
233 logger: Log messages to a logging.Logger instance.
234 logtostderr: Log messages to stderr instead of stdout.
235 writedescription: Write the video description to a .description file
236 writeinfojson: Write the video description to a .info.json file
237 clean_infojson: Remove private fields from the infojson
238 writecomments: Extract video comments. This will not be written to disk
239 unless writeinfojson is also given
240 writeannotations: Write the video annotations to a .annotations.xml file
241 writethumbnail: Write the thumbnail image to a file
242 allow_playlist_files: Whether to write playlists' description, infojson etc
243 also to disk when using the 'write*' options
244 write_all_thumbnails: Write all thumbnail formats to files
245 writelink: Write an internet shortcut file, depending on the
246 current platform (.url/.webloc/.desktop)
247 writeurllink: Write a Windows internet shortcut file (.url)
248 writewebloclink: Write a macOS internet shortcut file (.webloc)
249 writedesktoplink: Write a Linux internet shortcut file (.desktop)
250 writesubtitles: Write the video subtitles to a file
251 writeautomaticsub: Write the automatically generated subtitles to a file
252 allsubtitles: Deprecated - Use subtitlelangs = ['all']
253 Downloads all the subtitles of the video
254 (requires writesubtitles or writeautomaticsub)
255 listsubtitles: Lists all available subtitles for the video
256 subtitlesformat: The format code for subtitles
257 subtitleslangs: List of languages of the subtitles to download (can be regex).
258 The list may contain "all" to refer to all the available
259 subtitles. The language can be prefixed with a "-" to
260 exclude it from the requested languages. Eg: ['all', '-live_chat']
261 keepvideo: Keep the video file after post-processing
262 daterange: A DateRange object, download only if the upload_date is in the range.
263 skip_download: Skip the actual download of the video file
264 cachedir: Location of the cache files in the filesystem.
265 False to disable filesystem cache.
266 noplaylist: Download single video instead of a playlist if in doubt.
267 age_limit: An integer representing the user's age in years.
268 Unsuitable videos for the given age are skipped.
269 min_views: An integer representing the minimum view count the video
270 must have in order to not be skipped.
271 Videos without view count information are always
272 downloaded. None for no limit.
273 max_views: An integer representing the maximum view count.
274 Videos that are more popular than that are not
275 downloaded.
276 Videos without view count information are always
277 downloaded. None for no limit.
278 download_archive: File name of a file where all downloads are recorded.
279 Videos already present in the file are not downloaded
280 again.
281 break_on_existing: Stop the download process after attempting to download a
282 file that is in the archive.
283 break_on_reject: Stop the download process when encountering a video that
284 has been filtered out.
285 cookiefile: File name where cookies should be read from and dumped to
286 nocheckcertificate:Do not verify SSL certificates
287 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
288 At the moment, this is only supported by YouTube.
289 proxy: URL of the proxy server to use
290 geo_verification_proxy: URL of the proxy to use for IP address verification
291 on geo-restricted sites.
292 socket_timeout: Time to wait for unresponsive hosts, in seconds
293 bidi_workaround: Work around buggy terminals without bidirectional text
294 support, using fridibi
295 debug_printtraffic:Print out sent and received HTTP traffic
296 include_ads: Download ads as well
297 default_search: Prepend this string if an input url is not valid.
298 'auto' for elaborate guessing
299 encoding: Use this encoding instead of the system-specified.
300 extract_flat: Do not resolve URLs, return the immediate result.
301 Pass in 'in_playlist' to only show this behavior for
302 playlist items.
303 postprocessors: A list of dictionaries, each with an entry
304 * key: The name of the postprocessor. See
305 yt_dlp/postprocessor/__init__.py for a list.
306 * when: When to run the postprocessor. Can be one of
307 pre_process|before_dl|post_process|after_move.
308 Assumed to be 'post_process' if not given
309 post_hooks: A list of functions that get called as the final step
310 for each video file, after all postprocessors have been
311 called. The filename will be passed as the only argument.
312 progress_hooks: A list of functions that get called on download
313 progress, with a dictionary with the entries
314 * status: One of "downloading", "error", or "finished".
315 Check this first and ignore unknown values.
316
317 If status is one of "downloading", or "finished", the
318 following properties may also be present:
319 * filename: The final filename (always present)
320 * tmpfilename: The filename we're currently writing to
321 * downloaded_bytes: Bytes on disk
322 * total_bytes: Size of the whole file, None if unknown
323 * total_bytes_estimate: Guess of the eventual file size,
324 None if unavailable.
325 * elapsed: The number of seconds since download started.
326 * eta: The estimated time in seconds, None if unknown
327 * speed: The download speed in bytes/second, None if
328 unknown
329 * fragment_index: The counter of the currently
330 downloaded video fragment.
331 * fragment_count: The number of fragments (= individual
332 files that will be merged)
333
334 Progress hooks are guaranteed to be called at least once
335 (with status "finished") if the download is successful.
336 merge_output_format: Extension to use when merging formats.
337 final_ext: Expected final extension; used to detect when the file was
338 already downloaded and converted. "merge_output_format" is
339 replaced by this extension when given
340 fixup: Automatically correct known faults of the file.
341 One of:
342 - "never": do nothing
343 - "warn": only emit a warning
344 - "detect_or_warn": check whether we can do anything
345 about it, warn otherwise (default)
346 source_address: Client-side IP address to bind to.
347 call_home: Boolean, true iff we are allowed to contact the
348 yt-dlp servers for debugging. (BROKEN)
349 sleep_interval_requests: Number of seconds to sleep between requests
350 during extraction
351 sleep_interval: Number of seconds to sleep before each download when
352 used alone or a lower bound of a range for randomized
353 sleep before each download (minimum possible number
354 of seconds to sleep) when used along with
355 max_sleep_interval.
356 max_sleep_interval:Upper bound of a range for randomized sleep before each
357 download (maximum possible number of seconds to sleep).
358 Must only be used along with sleep_interval.
359 Actual sleep time will be a random float from range
360 [sleep_interval; max_sleep_interval].
361 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
362 listformats: Print an overview of available video formats and exit.
363 list_thumbnails: Print a table of all thumbnails and exit.
364 match_filter: A function that gets called with the info_dict of
365 every video.
366 If it returns a message, the video is ignored.
367 If it returns None, the video is downloaded.
368 match_filter_func in utils.py is one example for this.
369 no_color: Do not emit color codes in output.
370 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
371 HTTP header
372 geo_bypass_country:
373 Two-letter ISO 3166-2 country code that will be used for
374 explicit geographic restriction bypassing via faking
375 X-Forwarded-For HTTP header
376 geo_bypass_ip_block:
377 IP range in CIDR notation that will be used similarly to
378 geo_bypass_country
379
380 The following options determine which downloader is picked:
381 external_downloader: A dictionary of protocol keys and the executable of the
382 external downloader to use for it. The allowed protocols
383 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
384 Set the value to 'native' to use the native downloader
385 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
386 or {'m3u8': 'ffmpeg'} instead.
387 Use the native HLS downloader instead of ffmpeg/avconv
388 if True, otherwise use ffmpeg/avconv if False, otherwise
389 use downloader suggested by extractor if None.
390 compat_opts: Compatibility options. See "Differences in default behavior".
391 Note that only format-sort, format-spec, no-live-chat,
392 no-attach-info-json, playlist-index, list-formats,
393 no-direct-merge, no-youtube-channel-redirect,
394 and no-youtube-unavailable-videos works when used via the API
395
396 The following parameters are not used by YoutubeDL itself, they are used by
397 the downloader (see yt_dlp/downloader/common.py):
398 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
399 noresizebuffer, retries, continuedl, noprogress, consoletitle,
400 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
401 http_chunk_size.
402
403 The following options are used by the post processors:
404 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
405 otherwise prefer ffmpeg. (avconv support is deprecated)
406 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
407 to the binary or its containing directory.
408 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
409 and a list of additional command-line arguments for the
410 postprocessor/executable. The dict can also have "PP+EXE" keys
411 which are used when the given exe is used by the given PP.
412 Use 'default' as the name for arguments to passed to all PP
413
414 The following options are used by the extractors:
415 extractor_retries: Number of times to retry for known errors
416 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
417 hls_split_discontinuity: Split HLS playlists to different formats at
418 discontinuities such as ad breaks (default: False)
419 youtube_include_dash_manifest: If True (default), DASH manifests and related
420 data will be downloaded and processed by extractor.
421 You can reduce network I/O by disabling it if you don't
422 care about DASH. (only for youtube)
423 youtube_include_hls_manifest: If True (default), HLS manifests and related
424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about HLS. (only for youtube)
427 """
428
429 _NUMERIC_FIELDS = set((
430 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
431 'timestamp', 'upload_year', 'upload_month', 'upload_day',
432 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
433 'average_rating', 'comment_count', 'age_limit',
434 'start_time', 'end_time',
435 'chapter_number', 'season_number', 'episode_number',
436 'track_number', 'disc_number', 'release_year',
437 'playlist_index',
438 ))
439
440 params = None
441 _ies = []
442 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
443 __prepare_filename_warned = False
444 _first_webpage_request = True
445 _download_retcode = None
446 _num_downloads = None
447 _playlist_level = 0
448 _playlist_urls = set()
449 _screen_file = None
450
451 def __init__(self, params=None, auto_init=True):
452 """Create a FileDownloader object with the given options."""
453 if params is None:
454 params = {}
455 self._ies = []
456 self._ies_instances = {}
457 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
458 self.__prepare_filename_warned = False
459 self._first_webpage_request = True
460 self._post_hooks = []
461 self._progress_hooks = []
462 self._download_retcode = 0
463 self._num_downloads = 0
464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
465 self._err_file = sys.stderr
466 self.params = {
467 # Default parameters
468 'nocheckcertificate': False,
469 }
470 self.params.update(params)
471 self.cache = Cache(self)
472
473 if sys.version_info < (3, 6):
474 self.report_warning(
475 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
476 'Update to Python 3.6 or above' % sys.version_info[:2])
477
478 def check_deprecated(param, option, suggestion):
479 if self.params.get(param) is not None:
480 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
481 return True
482 return False
483
484 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
485 if self.params.get('geo_verification_proxy') is None:
486 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
487
488 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
489 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
490 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
491
492 for msg in self.params.get('warnings', []):
493 self.report_warning(msg)
494
495 if self.params.get('final_ext'):
496 if self.params.get('merge_output_format'):
497 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
498 self.params['merge_output_format'] = self.params['final_ext']
499
500 if 'overwrites' in self.params and self.params['overwrites'] is None:
501 del self.params['overwrites']
502
503 if params.get('bidi_workaround', False):
504 try:
505 import pty
506 master, slave = pty.openpty()
507 width = compat_get_terminal_size().columns
508 if width is None:
509 width_args = []
510 else:
511 width_args = ['-w', str(width)]
512 sp_kwargs = dict(
513 stdin=subprocess.PIPE,
514 stdout=slave,
515 stderr=self._err_file)
516 try:
517 self._output_process = subprocess.Popen(
518 ['bidiv'] + width_args, **sp_kwargs
519 )
520 except OSError:
521 self._output_process = subprocess.Popen(
522 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
523 self._output_channel = os.fdopen(master, 'rb')
524 except OSError as ose:
525 if ose.errno == errno.ENOENT:
526 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
527 else:
528 raise
529
530 if (sys.platform != 'win32'
531 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
532 and not params.get('restrictfilenames', False)):
533 # Unicode filesystem API will throw errors (#1474, #13027)
534 self.report_warning(
535 'Assuming --restrict-filenames since file system encoding '
536 'cannot encode all characters. '
537 'Set the LC_ALL environment variable to fix this.')
538 self.params['restrictfilenames'] = True
539
540 self.outtmpl_dict = self.parse_outtmpl()
541
542 self._setup_opener()
543
544 """Preload the archive, if any is specified"""
545 def preload_download_archive(fn):
546 if fn is None:
547 return False
548 self.write_debug('Loading archive file %r\n' % fn)
549 try:
550 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
551 for line in archive_file:
552 self.archive.add(line.strip())
553 except IOError as ioe:
554 if ioe.errno != errno.ENOENT:
555 raise
556 return False
557 return True
558
559 self.archive = set()
560 preload_download_archive(self.params.get('download_archive'))
561
562 if auto_init:
563 self.print_debug_header()
564 self.add_default_info_extractors()
565
566 for pp_def_raw in self.params.get('postprocessors', []):
567 pp_class = get_postprocessor(pp_def_raw['key'])
568 pp_def = dict(pp_def_raw)
569 del pp_def['key']
570 if 'when' in pp_def:
571 when = pp_def['when']
572 del pp_def['when']
573 else:
574 when = 'post_process'
575 pp = pp_class(self, **compat_kwargs(pp_def))
576 self.add_post_processor(pp, when=when)
577
578 for ph in self.params.get('post_hooks', []):
579 self.add_post_hook(ph)
580
581 for ph in self.params.get('progress_hooks', []):
582 self.add_progress_hook(ph)
583
584 register_socks_protocols()
585
586 def warn_if_short_id(self, argv):
587 # short YouTube ID starting with dash?
588 idxs = [
589 i for i, a in enumerate(argv)
590 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
591 if idxs:
592 correct_argv = (
593 ['yt-dlp']
594 + [a for i, a in enumerate(argv) if i not in idxs]
595 + ['--'] + [argv[i] for i in idxs]
596 )
597 self.report_warning(
598 'Long argument string detected. '
599 'Use -- to separate parameters and URLs, like this:\n%s\n' %
600 args_to_str(correct_argv))
601
602 def add_info_extractor(self, ie):
603 """Add an InfoExtractor object to the end of the list."""
604 self._ies.append(ie)
605 if not isinstance(ie, type):
606 self._ies_instances[ie.ie_key()] = ie
607 ie.set_downloader(self)
608
609 def get_info_extractor(self, ie_key):
610 """
611 Get an instance of an IE with name ie_key, it will try to get one from
612 the _ies list, if there's no instance it will create a new one and add
613 it to the extractor list.
614 """
615 ie = self._ies_instances.get(ie_key)
616 if ie is None:
617 ie = get_info_extractor(ie_key)()
618 self.add_info_extractor(ie)
619 return ie
620
621 def add_default_info_extractors(self):
622 """
623 Add the InfoExtractors returned by gen_extractors to the end of the list
624 """
625 for ie in gen_extractor_classes():
626 self.add_info_extractor(ie)
627
628 def add_post_processor(self, pp, when='post_process'):
629 """Add a PostProcessor object to the end of the chain."""
630 self._pps[when].append(pp)
631 pp.set_downloader(self)
632
633 def add_post_hook(self, ph):
634 """Add the post hook"""
635 self._post_hooks.append(ph)
636
637 def add_progress_hook(self, ph):
638 """Add the progress hook (currently only for the file downloader)"""
639 self._progress_hooks.append(ph)
640
641 def _bidi_workaround(self, message):
642 if not hasattr(self, '_output_channel'):
643 return message
644
645 assert hasattr(self, '_output_process')
646 assert isinstance(message, compat_str)
647 line_count = message.count('\n') + 1
648 self._output_process.stdin.write((message + '\n').encode('utf-8'))
649 self._output_process.stdin.flush()
650 res = ''.join(self._output_channel.readline().decode('utf-8')
651 for _ in range(line_count))
652 return res[:-len('\n')]
653
654 def _write_string(self, s, out=None):
655 write_string(s, out=out, encoding=self.params.get('encoding'))
656
657 def to_stdout(self, message, skip_eol=False, quiet=False):
658 """Print message to stdout"""
659 if self.params.get('logger'):
660 self.params['logger'].debug(message)
661 elif not quiet or self.params.get('verbose'):
662 self._write_string(
663 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
664 self._err_file if quiet else self._screen_file)
665
666 def to_stderr(self, message):
667 """Print message to stderr"""
668 assert isinstance(message, compat_str)
669 if self.params.get('logger'):
670 self.params['logger'].error(message)
671 else:
672 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
673
674 def to_console_title(self, message):
675 if not self.params.get('consoletitle', False):
676 return
677 if compat_os_name == 'nt':
678 if ctypes.windll.kernel32.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
682 elif 'TERM' in os.environ:
683 self._write_string('\033]0;%s\007' % message, self._screen_file)
684
685 def save_console_title(self):
686 if not self.params.get('consoletitle', False):
687 return
688 if self.params.get('simulate', False):
689 return
690 if compat_os_name != 'nt' and 'TERM' in os.environ:
691 # Save the title on stack
692 self._write_string('\033[22;0t', self._screen_file)
693
694 def restore_console_title(self):
695 if not self.params.get('consoletitle', False):
696 return
697 if self.params.get('simulate', False):
698 return
699 if compat_os_name != 'nt' and 'TERM' in os.environ:
700 # Restore the title from stack
701 self._write_string('\033[23;0t', self._screen_file)
702
703 def __enter__(self):
704 self.save_console_title()
705 return self
706
707 def __exit__(self, *args):
708 self.restore_console_title()
709
710 if self.params.get('cookiefile') is not None:
711 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
712
713 def trouble(self, message=None, tb=None):
714 """Determine action to take when a download problem appears.
715
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
719
720 tb, if given, is additional traceback information.
721 """
722 if message is not None:
723 self.to_stderr(message)
724 if self.params.get('verbose'):
725 if tb is None:
726 if sys.exc_info()[0]: # if .trouble has been called from an except block
727 tb = ''
728 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
729 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
730 tb += encode_compat_str(traceback.format_exc())
731 else:
732 tb_data = traceback.format_list(traceback.extract_stack())
733 tb = ''.join(tb_data)
734 if tb:
735 self.to_stderr(tb)
736 if not self.params.get('ignoreerrors', False):
737 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
738 exc_info = sys.exc_info()[1].exc_info
739 else:
740 exc_info = sys.exc_info()
741 raise DownloadError(message, exc_info)
742 self._download_retcode = 1
743
744 def to_screen(self, message, skip_eol=False):
745 """Print message to stdout if not in quiet mode"""
746 self.to_stdout(
747 message, skip_eol, quiet=self.params.get('quiet', False))
748
749 def report_warning(self, message):
750 '''
751 Print the message to stderr, it will be prefixed with 'WARNING:'
752 If stderr is a tty file the 'WARNING:' will be colored
753 '''
754 if self.params.get('logger') is not None:
755 self.params['logger'].warning(message)
756 else:
757 if self.params.get('no_warnings'):
758 return
759 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
760 _msg_header = '\033[0;33mWARNING:\033[0m'
761 else:
762 _msg_header = 'WARNING:'
763 warning_message = '%s %s' % (_msg_header, message)
764 self.to_stderr(warning_message)
765
766 def report_error(self, message, tb=None):
767 '''
768 Do the same as trouble, but prefixes the message with 'ERROR:', colored
769 in red if stderr is a tty file.
770 '''
771 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
772 _msg_header = '\033[0;31mERROR:\033[0m'
773 else:
774 _msg_header = 'ERROR:'
775 error_message = '%s %s' % (_msg_header, message)
776 self.trouble(error_message, tb)
777
778 def write_debug(self, message):
779 '''Log debug message or Print message to stderr'''
780 if not self.params.get('verbose', False):
781 return
782 message = '[debug] %s' % message
783 if self.params.get('logger'):
784 self.params['logger'].debug(message)
785 else:
786 self._write_string('%s\n' % message)
787
788 def report_file_already_downloaded(self, file_name):
789 """Report file has already been fully downloaded."""
790 try:
791 self.to_screen('[download] %s has already been downloaded' % file_name)
792 except UnicodeEncodeError:
793 self.to_screen('[download] The file has already been downloaded')
794
795 def report_file_delete(self, file_name):
796 """Report that existing file will be deleted."""
797 try:
798 self.to_screen('Deleting existing file %s' % file_name)
799 except UnicodeEncodeError:
800 self.to_screen('Deleting existing file')
801
802 def parse_outtmpl(self):
803 outtmpl_dict = self.params.get('outtmpl', {})
804 if not isinstance(outtmpl_dict, dict):
805 outtmpl_dict = {'default': outtmpl_dict}
806 outtmpl_dict.update({
807 k: v for k, v in DEFAULT_OUTTMPL.items()
808 if not outtmpl_dict.get(k)})
809 for key, val in outtmpl_dict.items():
810 if isinstance(val, bytes):
811 self.report_warning(
812 'Parameter outtmpl is bytes, but should be a unicode string. '
813 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
814 return outtmpl_dict
815
816 @staticmethod
817 def validate_outtmpl(tmpl):
818 ''' @return None or Exception object '''
819 try:
820 re.sub(
821 STR_FORMAT_RE.format(''),
822 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
823 tmpl
824 ) % collections.defaultdict(int)
825 return None
826 except ValueError as err:
827 return err
828
829 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
830 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
831 info_dict = dict(info_dict)
832 na = self.params.get('outtmpl_na_placeholder', 'NA')
833
834 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
835 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
836 if info_dict.get('duration', None) is not None
837 else None)
838 info_dict['epoch'] = int(time.time())
839 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
840 if info_dict.get('resolution') is None:
841 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
842
843 # For fields playlist_index and autonumber convert all occurrences
844 # of %(field)s to %(field)0Nd for backward compatibility
845 field_size_compat_map = {
846 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
847 'autonumber': self.params.get('autonumber_size') or 5,
848 }
849
850 EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
851 # Field is of the form key1.key2...
852 # where keys (except first) can be string, int or slice
853 FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
854 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
855 (?P<negate>-)?
856 (?P<fields>{0})
857 (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
858 (?:>(?P<strf_format>.+?))?
859 (?:\|(?P<default>.*?))?
860 $'''.format(FIELD_RE))
861 MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
862 MATH_FUNCTIONS = {
863 '+': float.__add__,
864 '-': float.__sub__,
865 }
866 tmpl_dict = {}
867
868 get_key = lambda k: traverse_obj(
869 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
870
871 def get_value(mdict):
872 # Object traversal
873 value = get_key(mdict['fields'])
874 # Negative
875 if mdict['negate']:
876 value = float_or_none(value)
877 if value is not None:
878 value *= -1
879 # Do maths
880 if mdict['maths']:
881 value = float_or_none(value)
882 operator = None
883 for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
884 if item == '' or value is None:
885 return None
886 if operator:
887 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
888 offset = float_or_none(item)
889 if offset is None:
890 offset = float_or_none(get_key(item))
891 try:
892 value = operator(value, multiplier * offset)
893 except (TypeError, ZeroDivisionError):
894 return None
895 operator = None
896 else:
897 operator = MATH_FUNCTIONS[item]
898 # Datetime formatting
899 if mdict['strf_format']:
900 value = strftime_or_none(value, mdict['strf_format'])
901
902 return value
903
904 def create_key(outer_mobj):
905 if not outer_mobj.group('has_key'):
906 return '%{}'.format(outer_mobj.group(0))
907
908 key = outer_mobj.group('key')
909 fmt = outer_mobj.group('format')
910 mobj = re.match(INTERNAL_FORMAT_RE, key)
911 if mobj is None:
912 value, default = None, na
913 else:
914 mobj = mobj.groupdict()
915 default = mobj['default'] if mobj['default'] is not None else na
916 value = get_value(mobj)
917
918 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
919 fmt = '0{:d}d'.format(field_size_compat_map[key])
920
921 value = default if value is None else value
922 key += '\0%s' % fmt
923
924 if fmt == 'c':
925 value = compat_str(value)
926 if value is None:
927 value, fmt = default, 's'
928 else:
929 value = value[0]
930 elif fmt[-1] not in 'rs': # numeric
931 value = float_or_none(value)
932 if value is None:
933 value, fmt = default, 's'
934 if sanitize:
935 if fmt[-1] == 'r':
936 # If value is an object, sanitize might convert it to a string
937 # So we convert it to repr first
938 value, fmt = repr(value), '%ss' % fmt[:-1]
939 value = sanitize(key, value)
940 tmpl_dict[key] = value
941 return '%({key}){fmt}'.format(key=key, fmt=fmt)
942
943 return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
944
945 def _prepare_filename(self, info_dict, tmpl_type='default'):
946 try:
947 sanitize = lambda k, v: sanitize_filename(
948 compat_str(v),
949 restricted=self.params.get('restrictfilenames'),
950 is_id=(k == 'id' or k.endswith('_id')))
951 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
952 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
953
954 # expand_path translates '%%' into '%' and '$$' into '$'
955 # correspondingly that is not what we want since we need to keep
956 # '%%' intact for template dict substitution step. Working around
957 # with boundary-alike separator hack.
958 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
959 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
960
961 # outtmpl should be expand_path'ed before template dict substitution
962 # because meta fields may contain env variables we don't want to
963 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
964 # title "Hello $PATH", we don't want `$PATH` to be expanded.
965 filename = expand_path(outtmpl).replace(sep, '') % template_dict
966
967 force_ext = OUTTMPL_TYPES.get(tmpl_type)
968 if force_ext is not None:
969 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
970
971 # https://github.com/blackjack4494/youtube-dlc/issues/85
972 trim_file_name = self.params.get('trim_file_name', False)
973 if trim_file_name:
974 fn_groups = filename.rsplit('.')
975 ext = fn_groups[-1]
976 sub_ext = ''
977 if len(fn_groups) > 2:
978 sub_ext = fn_groups[-2]
979 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
980
981 return filename
982 except ValueError as err:
983 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
984 return None
985
986 def prepare_filename(self, info_dict, dir_type='', warn=False):
987 """Generate the output filename."""
988 paths = self.params.get('paths', {})
989 assert isinstance(paths, dict)
990 filename = self._prepare_filename(info_dict, dir_type or 'default')
991
992 if warn and not self.__prepare_filename_warned:
993 if not paths:
994 pass
995 elif filename == '-':
996 self.report_warning('--paths is ignored when an outputting to stdout')
997 elif os.path.isabs(filename):
998 self.report_warning('--paths is ignored since an absolute path is given in output template')
999 self.__prepare_filename_warned = True
1000 if filename == '-' or not filename:
1001 return filename
1002
1003 homepath = expand_path(paths.get('home', '').strip())
1004 assert isinstance(homepath, compat_str)
1005 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1006 assert isinstance(subdir, compat_str)
1007 path = os.path.join(homepath, subdir, filename)
1008
1009 # Temporary fix for #4787
1010 # 'Treat' all problem characters by passing filename through preferredencoding
1011 # to workaround encoding issues with subprocess on python2 @ Windows
1012 if sys.version_info < (3, 0) and sys.platform == 'win32':
1013 path = encodeFilename(path, True).decode(preferredencoding())
1014 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1015
1016 def _match_entry(self, info_dict, incomplete=False, silent=False):
1017 """ Returns None if the file should be downloaded """
1018
1019 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1020
1021 def check_filter():
1022 if 'title' in info_dict:
1023 # This can happen when we're just evaluating the playlist
1024 title = info_dict['title']
1025 matchtitle = self.params.get('matchtitle', False)
1026 if matchtitle:
1027 if not re.search(matchtitle, title, re.IGNORECASE):
1028 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029 rejecttitle = self.params.get('rejecttitle', False)
1030 if rejecttitle:
1031 if re.search(rejecttitle, title, re.IGNORECASE):
1032 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033 date = info_dict.get('upload_date')
1034 if date is not None:
1035 dateRange = self.params.get('daterange', DateRange())
1036 if date not in dateRange:
1037 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038 view_count = info_dict.get('view_count')
1039 if view_count is not None:
1040 min_views = self.params.get('min_views')
1041 if min_views is not None and view_count < min_views:
1042 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043 max_views = self.params.get('max_views')
1044 if max_views is not None and view_count > max_views:
1045 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047 return 'Skipping "%s" because it is age restricted' % video_title
1048
1049 if not incomplete:
1050 match_filter = self.params.get('match_filter')
1051 if match_filter is not None:
1052 ret = match_filter(info_dict)
1053 if ret is not None:
1054 return ret
1055 return None
1056
1057 if self.in_download_archive(info_dict):
1058 reason = '%s has already been recorded in the archive' % video_title
1059 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1060 else:
1061 reason = check_filter()
1062 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1063 if reason is not None:
1064 if not silent:
1065 self.to_screen('[download] ' + reason)
1066 if self.params.get(break_opt, False):
1067 raise break_err()
1068 return reason
1069
1070 @staticmethod
1071 def add_extra_info(info_dict, extra_info):
1072 '''Set the keys from extra_info in info dict if they are missing'''
1073 for key, value in extra_info.items():
1074 info_dict.setdefault(key, value)
1075
1076 def extract_info(self, url, download=True, ie_key=None, extra_info={},
1077 process=True, force_generic_extractor=False):
1078 """
1079 Return a list with a dictionary for each video extracted.
1080
1081 Arguments:
1082 url -- URL to extract
1083
1084 Keyword arguments:
1085 download -- whether to download videos during extraction
1086 ie_key -- extractor key hint
1087 extra_info -- dictionary containing the extra values to add to each result
1088 process -- whether to resolve all unresolved references (URLs, playlist items),
1089 must be True for download to work.
1090 force_generic_extractor -- force using the generic extractor
1091 """
1092
1093 if not ie_key and force_generic_extractor:
1094 ie_key = 'Generic'
1095
1096 if ie_key:
1097 ies = [self.get_info_extractor(ie_key)]
1098 else:
1099 ies = self._ies
1100
1101 for ie in ies:
1102 if not ie.suitable(url):
1103 continue
1104
1105 ie_key = ie.ie_key()
1106 ie = self.get_info_extractor(ie_key)
1107 if not ie.working():
1108 self.report_warning('The program functionality for this site has been marked as broken, '
1109 'and will probably not work.')
1110
1111 try:
1112 temp_id = str_or_none(
1113 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1114 else ie._match_id(url))
1115 except (AssertionError, IndexError, AttributeError):
1116 temp_id = None
1117 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1118 self.to_screen("[%s] %s: has already been recorded in archive" % (
1119 ie_key, temp_id))
1120 break
1121 return self.__extract_info(url, ie, download, extra_info, process)
1122 else:
1123 self.report_error('no suitable InfoExtractor for URL %s' % url)
1124
1125 def __handle_extraction_exceptions(func):
1126 def wrapper(self, *args, **kwargs):
1127 try:
1128 return func(self, *args, **kwargs)
1129 except GeoRestrictedError as e:
1130 msg = e.msg
1131 if e.countries:
1132 msg += '\nThis video is available in %s.' % ', '.join(
1133 map(ISO3166Utils.short2full, e.countries))
1134 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1135 self.report_error(msg)
1136 except ExtractorError as e: # An error we somewhat expected
1137 self.report_error(compat_str(e), e.format_traceback())
1138 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1139 raise
1140 except Exception as e:
1141 if self.params.get('ignoreerrors', False):
1142 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1143 else:
1144 raise
1145 return wrapper
1146
1147 @__handle_extraction_exceptions
1148 def __extract_info(self, url, ie, download, extra_info, process):
1149 ie_result = ie.extract(url)
1150 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1151 return
1152 if isinstance(ie_result, list):
1153 # Backwards compatibility: old IE result format
1154 ie_result = {
1155 '_type': 'compat_list',
1156 'entries': ie_result,
1157 }
1158 self.add_default_extra_info(ie_result, ie, url)
1159 if process:
1160 return self.process_ie_result(ie_result, download, extra_info)
1161 else:
1162 return ie_result
1163
1164 def add_default_extra_info(self, ie_result, ie, url):
1165 self.add_extra_info(ie_result, {
1166 'extractor': ie.IE_NAME,
1167 'webpage_url': url,
1168 'original_url': url,
1169 'webpage_url_basename': url_basename(url),
1170 'extractor_key': ie.ie_key(),
1171 })
1172
1173 def process_ie_result(self, ie_result, download=True, extra_info={}):
1174 """
1175 Take the result of the ie(may be modified) and resolve all unresolved
1176 references (URLs, playlist items).
1177
1178 It will also download the videos if 'download'.
1179 Returns the resolved ie_result.
1180 """
1181 result_type = ie_result.get('_type', 'video')
1182
1183 if result_type in ('url', 'url_transparent'):
1184 ie_result['url'] = sanitize_url(ie_result['url'])
1185 extract_flat = self.params.get('extract_flat', False)
1186 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1187 or extract_flat is True):
1188 info_copy = ie_result.copy()
1189 self.add_extra_info(info_copy, extra_info)
1190 self.add_default_extra_info(
1191 info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1192 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1193 return ie_result
1194
1195 if result_type == 'video':
1196 self.add_extra_info(ie_result, extra_info)
1197 ie_result = self.process_video_result(ie_result, download=download)
1198 additional_urls = (ie_result or {}).get('additional_urls')
1199 if additional_urls:
1200 # TODO: Improve MetadataFromFieldPP to allow setting a list
1201 if isinstance(additional_urls, compat_str):
1202 additional_urls = [additional_urls]
1203 self.to_screen(
1204 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1205 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1206 ie_result['additional_entries'] = [
1207 self.extract_info(
1208 url, download, extra_info,
1209 force_generic_extractor=self.params.get('force_generic_extractor'))
1210 for url in additional_urls
1211 ]
1212 return ie_result
1213 elif result_type == 'url':
1214 # We have to add extra_info to the results because it may be
1215 # contained in a playlist
1216 return self.extract_info(
1217 ie_result['url'], download,
1218 ie_key=ie_result.get('ie_key'),
1219 extra_info=extra_info)
1220 elif result_type == 'url_transparent':
1221 # Use the information from the embedding page
1222 info = self.extract_info(
1223 ie_result['url'], ie_key=ie_result.get('ie_key'),
1224 extra_info=extra_info, download=False, process=False)
1225
1226 # extract_info may return None when ignoreerrors is enabled and
1227 # extraction failed with an error, don't crash and return early
1228 # in this case
1229 if not info:
1230 return info
1231
1232 force_properties = dict(
1233 (k, v) for k, v in ie_result.items() if v is not None)
1234 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1235 if f in force_properties:
1236 del force_properties[f]
1237 new_result = info.copy()
1238 new_result.update(force_properties)
1239
1240 # Extracted info may not be a video result (i.e.
1241 # info.get('_type', 'video') != video) but rather an url or
1242 # url_transparent. In such cases outer metadata (from ie_result)
1243 # should be propagated to inner one (info). For this to happen
1244 # _type of info should be overridden with url_transparent. This
1245 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1246 if new_result.get('_type') == 'url':
1247 new_result['_type'] = 'url_transparent'
1248
1249 return self.process_ie_result(
1250 new_result, download=download, extra_info=extra_info)
1251 elif result_type in ('playlist', 'multi_video'):
1252 # Protect from infinite recursion due to recursively nested playlists
1253 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1254 webpage_url = ie_result['webpage_url']
1255 if webpage_url in self._playlist_urls:
1256 self.to_screen(
1257 '[download] Skipping already downloaded playlist: %s'
1258 % ie_result.get('title') or ie_result.get('id'))
1259 return
1260
1261 self._playlist_level += 1
1262 self._playlist_urls.add(webpage_url)
1263 self._sanitize_thumbnails(ie_result)
1264 try:
1265 return self.__process_playlist(ie_result, download)
1266 finally:
1267 self._playlist_level -= 1
1268 if not self._playlist_level:
1269 self._playlist_urls.clear()
1270 elif result_type == 'compat_list':
1271 self.report_warning(
1272 'Extractor %s returned a compat_list result. '
1273 'It needs to be updated.' % ie_result.get('extractor'))
1274
1275 def _fixup(r):
1276 self.add_extra_info(
1277 r,
1278 {
1279 'extractor': ie_result['extractor'],
1280 'webpage_url': ie_result['webpage_url'],
1281 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1282 'extractor_key': ie_result['extractor_key'],
1283 }
1284 )
1285 return r
1286 ie_result['entries'] = [
1287 self.process_ie_result(_fixup(r), download, extra_info)
1288 for r in ie_result['entries']
1289 ]
1290 return ie_result
1291 else:
1292 raise Exception('Invalid result type: %s' % result_type)
1293
1294 def _ensure_dir_exists(self, path):
1295 return make_dir(path, self.report_error)
1296
1297 def __process_playlist(self, ie_result, download):
1298 # We process each entry in the playlist
1299 playlist = ie_result.get('title') or ie_result.get('id')
1300 self.to_screen('[download] Downloading playlist: %s' % playlist)
1301
1302 if 'entries' not in ie_result:
1303 raise EntryNotInPlaylist()
1304 incomplete_entries = bool(ie_result.get('requested_entries'))
1305 if incomplete_entries:
1306 def fill_missing_entries(entries, indexes):
1307 ret = [None] * max(*indexes)
1308 for i, entry in zip(indexes, entries):
1309 ret[i - 1] = entry
1310 return ret
1311 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1312
1313 playlist_results = []
1314
1315 playliststart = self.params.get('playliststart', 1)
1316 playlistend = self.params.get('playlistend')
1317 # For backwards compatibility, interpret -1 as whole list
1318 if playlistend == -1:
1319 playlistend = None
1320
1321 playlistitems_str = self.params.get('playlist_items')
1322 playlistitems = None
1323 if playlistitems_str is not None:
1324 def iter_playlistitems(format):
1325 for string_segment in format.split(','):
1326 if '-' in string_segment:
1327 start, end = string_segment.split('-')
1328 for item in range(int(start), int(end) + 1):
1329 yield int(item)
1330 else:
1331 yield int(string_segment)
1332 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1333
1334 ie_entries = ie_result['entries']
1335 msg = (
1336 'Downloading %d videos' if not isinstance(ie_entries, list)
1337 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1338 if not isinstance(ie_entries, (list, PagedList)):
1339 ie_entries = LazyList(ie_entries)
1340
1341 entries = []
1342 for i in playlistitems or itertools.count(playliststart):
1343 if playlistitems is None and playlistend is not None and playlistend < i:
1344 break
1345 entry = None
1346 try:
1347 entry = ie_entries[i - 1]
1348 if entry is None:
1349 raise EntryNotInPlaylist()
1350 except (IndexError, EntryNotInPlaylist):
1351 if incomplete_entries:
1352 raise EntryNotInPlaylist()
1353 elif not playlistitems:
1354 break
1355 entries.append(entry)
1356 try:
1357 if entry is not None:
1358 self._match_entry(entry, incomplete=True, silent=True)
1359 except (ExistingVideoReached, RejectedVideoReached):
1360 break
1361 ie_result['entries'] = entries
1362
1363 # Save playlist_index before re-ordering
1364 entries = [
1365 ((playlistitems[i - 1] if playlistitems else i), entry)
1366 for i, entry in enumerate(entries, 1)
1367 if entry is not None]
1368 n_entries = len(entries)
1369
1370 if not playlistitems and (playliststart or playlistend):
1371 playlistitems = list(range(playliststart, playliststart + n_entries))
1372 ie_result['requested_entries'] = playlistitems
1373
1374 if self.params.get('allow_playlist_files', True):
1375 ie_copy = {
1376 'playlist': playlist,
1377 'playlist_id': ie_result.get('id'),
1378 'playlist_title': ie_result.get('title'),
1379 'playlist_uploader': ie_result.get('uploader'),
1380 'playlist_uploader_id': ie_result.get('uploader_id'),
1381 'playlist_index': 0,
1382 }
1383 ie_copy.update(dict(ie_result))
1384
1385 if self.params.get('writeinfojson', False):
1386 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1387 if not self._ensure_dir_exists(encodeFilename(infofn)):
1388 return
1389 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1390 self.to_screen('[info] Playlist metadata is already present')
1391 else:
1392 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1393 try:
1394 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1395 except (OSError, IOError):
1396 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1397
1398 # TODO: This should be passed to ThumbnailsConvertor if necessary
1399 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1400
1401 if self.params.get('writedescription', False):
1402 descfn = self.prepare_filename(ie_copy, 'pl_description')
1403 if not self._ensure_dir_exists(encodeFilename(descfn)):
1404 return
1405 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1406 self.to_screen('[info] Playlist description is already present')
1407 elif ie_result.get('description') is None:
1408 self.report_warning('There\'s no playlist description to write.')
1409 else:
1410 try:
1411 self.to_screen('[info] Writing playlist description to: ' + descfn)
1412 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1413 descfile.write(ie_result['description'])
1414 except (OSError, IOError):
1415 self.report_error('Cannot write playlist description file ' + descfn)
1416 return
1417
1418 if self.params.get('playlistreverse', False):
1419 entries = entries[::-1]
1420 if self.params.get('playlistrandom', False):
1421 random.shuffle(entries)
1422
1423 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1424
1425 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1426 failures = 0
1427 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1428 for i, entry_tuple in enumerate(entries, 1):
1429 playlist_index, entry = entry_tuple
1430 if 'playlist_index' in self.params.get('compat_options', []):
1431 playlist_index = playlistitems[i - 1] if playlistitems else i
1432 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1433 # This __x_forwarded_for_ip thing is a bit ugly but requires
1434 # minimal changes
1435 if x_forwarded_for:
1436 entry['__x_forwarded_for_ip'] = x_forwarded_for
1437 extra = {
1438 'n_entries': n_entries,
1439 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1440 'playlist_index': playlist_index,
1441 'playlist_autonumber': i,
1442 'playlist': playlist,
1443 'playlist_id': ie_result.get('id'),
1444 'playlist_title': ie_result.get('title'),
1445 'playlist_uploader': ie_result.get('uploader'),
1446 'playlist_uploader_id': ie_result.get('uploader_id'),
1447 'extractor': ie_result['extractor'],
1448 'webpage_url': ie_result['webpage_url'],
1449 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1450 'extractor_key': ie_result['extractor_key'],
1451 }
1452
1453 if self._match_entry(entry, incomplete=True) is not None:
1454 continue
1455
1456 entry_result = self.__process_iterable_entry(entry, download, extra)
1457 if not entry_result:
1458 failures += 1
1459 if failures >= max_failures:
1460 self.report_error(
1461 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1462 break
1463 # TODO: skip failed (empty) entries?
1464 playlist_results.append(entry_result)
1465 ie_result['entries'] = playlist_results
1466 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1467 return ie_result
1468
1469 @__handle_extraction_exceptions
1470 def __process_iterable_entry(self, entry, download, extra_info):
1471 return self.process_ie_result(
1472 entry, download=download, extra_info=extra_info)
1473
1474 def _build_format_filter(self, filter_spec):
1475 " Returns a function to filter the formats according to the filter_spec "
1476
1477 OPERATORS = {
1478 '<': operator.lt,
1479 '<=': operator.le,
1480 '>': operator.gt,
1481 '>=': operator.ge,
1482 '=': operator.eq,
1483 '!=': operator.ne,
1484 }
1485 operator_rex = re.compile(r'''(?x)\s*
1486 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1487 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1488 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1489 $
1490 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1491 m = operator_rex.search(filter_spec)
1492 if m:
1493 try:
1494 comparison_value = int(m.group('value'))
1495 except ValueError:
1496 comparison_value = parse_filesize(m.group('value'))
1497 if comparison_value is None:
1498 comparison_value = parse_filesize(m.group('value') + 'B')
1499 if comparison_value is None:
1500 raise ValueError(
1501 'Invalid value %r in format specification %r' % (
1502 m.group('value'), filter_spec))
1503 op = OPERATORS[m.group('op')]
1504
1505 if not m:
1506 STR_OPERATORS = {
1507 '=': operator.eq,
1508 '^=': lambda attr, value: attr.startswith(value),
1509 '$=': lambda attr, value: attr.endswith(value),
1510 '*=': lambda attr, value: value in attr,
1511 }
1512 str_operator_rex = re.compile(r'''(?x)
1513 \s*(?P<key>[a-zA-Z0-9._-]+)
1514 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1515 \s*(?P<value>[a-zA-Z0-9._-]+)
1516 \s*$
1517 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1518 m = str_operator_rex.search(filter_spec)
1519 if m:
1520 comparison_value = m.group('value')
1521 str_op = STR_OPERATORS[m.group('op')]
1522 if m.group('negation'):
1523 op = lambda attr, value: not str_op(attr, value)
1524 else:
1525 op = str_op
1526
1527 if not m:
1528 raise ValueError('Invalid filter specification %r' % filter_spec)
1529
1530 def _filter(f):
1531 actual_value = f.get(m.group('key'))
1532 if actual_value is None:
1533 return m.group('none_inclusive')
1534 return op(actual_value, comparison_value)
1535 return _filter
1536
1537 def _default_format_spec(self, info_dict, download=True):
1538
1539 def can_merge():
1540 merger = FFmpegMergerPP(self)
1541 return merger.available and merger.can_merge()
1542
1543 prefer_best = (
1544 not self.params.get('simulate', False)
1545 and download
1546 and (
1547 not can_merge()
1548 or info_dict.get('is_live', False)
1549 or self.outtmpl_dict['default'] == '-'))
1550 compat = (
1551 prefer_best
1552 or self.params.get('allow_multiple_audio_streams', False)
1553 or 'format-spec' in self.params.get('compat_opts', []))
1554
1555 return (
1556 'best/bestvideo+bestaudio' if prefer_best
1557 else 'bestvideo*+bestaudio/best' if not compat
1558 else 'bestvideo+bestaudio/best')
1559
1560 def build_format_selector(self, format_spec):
1561 def syntax_error(note, start):
1562 message = (
1563 'Invalid format specification: '
1564 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1565 return SyntaxError(message)
1566
1567 PICKFIRST = 'PICKFIRST'
1568 MERGE = 'MERGE'
1569 SINGLE = 'SINGLE'
1570 GROUP = 'GROUP'
1571 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1572
1573 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1574 'video': self.params.get('allow_multiple_video_streams', False)}
1575
1576 check_formats = self.params.get('check_formats')
1577
1578 def _parse_filter(tokens):
1579 filter_parts = []
1580 for type, string, start, _, _ in tokens:
1581 if type == tokenize.OP and string == ']':
1582 return ''.join(filter_parts)
1583 else:
1584 filter_parts.append(string)
1585
1586 def _remove_unused_ops(tokens):
1587 # Remove operators that we don't use and join them with the surrounding strings
1588 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1589 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1590 last_string, last_start, last_end, last_line = None, None, None, None
1591 for type, string, start, end, line in tokens:
1592 if type == tokenize.OP and string == '[':
1593 if last_string:
1594 yield tokenize.NAME, last_string, last_start, last_end, last_line
1595 last_string = None
1596 yield type, string, start, end, line
1597 # everything inside brackets will be handled by _parse_filter
1598 for type, string, start, end, line in tokens:
1599 yield type, string, start, end, line
1600 if type == tokenize.OP and string == ']':
1601 break
1602 elif type == tokenize.OP and string in ALLOWED_OPS:
1603 if last_string:
1604 yield tokenize.NAME, last_string, last_start, last_end, last_line
1605 last_string = None
1606 yield type, string, start, end, line
1607 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1608 if not last_string:
1609 last_string = string
1610 last_start = start
1611 last_end = end
1612 else:
1613 last_string += string
1614 if last_string:
1615 yield tokenize.NAME, last_string, last_start, last_end, last_line
1616
1617 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1618 selectors = []
1619 current_selector = None
1620 for type, string, start, _, _ in tokens:
1621 # ENCODING is only defined in python 3.x
1622 if type == getattr(tokenize, 'ENCODING', None):
1623 continue
1624 elif type in [tokenize.NAME, tokenize.NUMBER]:
1625 current_selector = FormatSelector(SINGLE, string, [])
1626 elif type == tokenize.OP:
1627 if string == ')':
1628 if not inside_group:
1629 # ')' will be handled by the parentheses group
1630 tokens.restore_last_token()
1631 break
1632 elif inside_merge and string in ['/', ',']:
1633 tokens.restore_last_token()
1634 break
1635 elif inside_choice and string == ',':
1636 tokens.restore_last_token()
1637 break
1638 elif string == ',':
1639 if not current_selector:
1640 raise syntax_error('"," must follow a format selector', start)
1641 selectors.append(current_selector)
1642 current_selector = None
1643 elif string == '/':
1644 if not current_selector:
1645 raise syntax_error('"/" must follow a format selector', start)
1646 first_choice = current_selector
1647 second_choice = _parse_format_selection(tokens, inside_choice=True)
1648 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1649 elif string == '[':
1650 if not current_selector:
1651 current_selector = FormatSelector(SINGLE, 'best', [])
1652 format_filter = _parse_filter(tokens)
1653 current_selector.filters.append(format_filter)
1654 elif string == '(':
1655 if current_selector:
1656 raise syntax_error('Unexpected "("', start)
1657 group = _parse_format_selection(tokens, inside_group=True)
1658 current_selector = FormatSelector(GROUP, group, [])
1659 elif string == '+':
1660 if not current_selector:
1661 raise syntax_error('Unexpected "+"', start)
1662 selector_1 = current_selector
1663 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1664 if not selector_2:
1665 raise syntax_error('Expected a selector', start)
1666 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1667 else:
1668 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1669 elif type == tokenize.ENDMARKER:
1670 break
1671 if current_selector:
1672 selectors.append(current_selector)
1673 return selectors
1674
1675 def _merge(formats_pair):
1676 format_1, format_2 = formats_pair
1677
1678 formats_info = []
1679 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1680 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1681
1682 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1683 get_no_more = {"video": False, "audio": False}
1684 for (i, fmt_info) in enumerate(formats_info):
1685 for aud_vid in ["audio", "video"]:
1686 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1687 if get_no_more[aud_vid]:
1688 formats_info.pop(i)
1689 get_no_more[aud_vid] = True
1690
1691 if len(formats_info) == 1:
1692 return formats_info[0]
1693
1694 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1695 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1696
1697 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1698 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1699
1700 output_ext = self.params.get('merge_output_format')
1701 if not output_ext:
1702 if the_only_video:
1703 output_ext = the_only_video['ext']
1704 elif the_only_audio and not video_fmts:
1705 output_ext = the_only_audio['ext']
1706 else:
1707 output_ext = 'mkv'
1708
1709 new_dict = {
1710 'requested_formats': formats_info,
1711 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1712 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1713 'ext': output_ext,
1714 }
1715
1716 if the_only_video:
1717 new_dict.update({
1718 'width': the_only_video.get('width'),
1719 'height': the_only_video.get('height'),
1720 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1721 'fps': the_only_video.get('fps'),
1722 'vcodec': the_only_video.get('vcodec'),
1723 'vbr': the_only_video.get('vbr'),
1724 'stretched_ratio': the_only_video.get('stretched_ratio'),
1725 })
1726
1727 if the_only_audio:
1728 new_dict.update({
1729 'acodec': the_only_audio.get('acodec'),
1730 'abr': the_only_audio.get('abr'),
1731 })
1732
1733 return new_dict
1734
1735 def _check_formats(formats):
1736 for f in formats:
1737 self.to_screen('[info] Testing format %s' % f['format_id'])
1738 paths = self.params.get('paths', {})
1739 temp_file = os.path.join(
1740 expand_path(paths.get('home', '').strip()),
1741 expand_path(paths.get('temp', '').strip()),
1742 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1743 try:
1744 dl, _ = self.dl(temp_file, f, test=True)
1745 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1746 dl = False
1747 finally:
1748 if os.path.exists(temp_file):
1749 os.remove(temp_file)
1750 if dl:
1751 yield f
1752 else:
1753 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1754
1755 def _build_selector_function(selector):
1756 if isinstance(selector, list): # ,
1757 fs = [_build_selector_function(s) for s in selector]
1758
1759 def selector_function(ctx):
1760 for f in fs:
1761 for format in f(ctx):
1762 yield format
1763 return selector_function
1764
1765 elif selector.type == GROUP: # ()
1766 selector_function = _build_selector_function(selector.selector)
1767
1768 elif selector.type == PICKFIRST: # /
1769 fs = [_build_selector_function(s) for s in selector.selector]
1770
1771 def selector_function(ctx):
1772 for f in fs:
1773 picked_formats = list(f(ctx))
1774 if picked_formats:
1775 return picked_formats
1776 return []
1777
1778 elif selector.type == SINGLE: # atom
1779 format_spec = selector.selector or 'best'
1780
1781 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1782 if format_spec == 'all':
1783 def selector_function(ctx):
1784 formats = list(ctx['formats'])
1785 if check_formats:
1786 formats = _check_formats(formats)
1787 for f in formats:
1788 yield f
1789 elif format_spec == 'mergeall':
1790 def selector_function(ctx):
1791 formats = list(_check_formats(ctx['formats']))
1792 if not formats:
1793 return
1794 merged_format = formats[-1]
1795 for f in formats[-2::-1]:
1796 merged_format = _merge((merged_format, f))
1797 yield merged_format
1798
1799 else:
1800 format_fallback, format_reverse, format_idx = False, True, 1
1801 mobj = re.match(
1802 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1803 format_spec)
1804 if mobj is not None:
1805 format_idx = int_or_none(mobj.group('n'), default=1)
1806 format_reverse = mobj.group('bw')[0] == 'b'
1807 format_type = (mobj.group('type') or [None])[0]
1808 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1809 format_modified = mobj.group('mod') is not None
1810
1811 format_fallback = not format_type and not format_modified # for b, w
1812 filter_f = (
1813 (lambda f: f.get('%scodec' % format_type) != 'none')
1814 if format_type and format_modified # bv*, ba*, wv*, wa*
1815 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1816 if format_type # bv, ba, wv, wa
1817 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1818 if not format_modified # b, w
1819 else None) # b*, w*
1820 else:
1821 filter_f = ((lambda f: f.get('ext') == format_spec)
1822 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1823 else (lambda f: f.get('format_id') == format_spec)) # id
1824
1825 def selector_function(ctx):
1826 formats = list(ctx['formats'])
1827 if not formats:
1828 return
1829 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1830 if format_fallback and ctx['incomplete_formats'] and not matches:
1831 # for extractors with incomplete formats (audio only (soundcloud)
1832 # or video only (imgur)) best/worst will fallback to
1833 # best/worst {video,audio}-only format
1834 matches = formats
1835 if format_reverse:
1836 matches = matches[::-1]
1837 if check_formats:
1838 matches = list(itertools.islice(_check_formats(matches), format_idx))
1839 n = len(matches)
1840 if -n <= format_idx - 1 < n:
1841 yield matches[format_idx - 1]
1842
1843 elif selector.type == MERGE: # +
1844 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1845
1846 def selector_function(ctx):
1847 for pair in itertools.product(
1848 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1849 yield _merge(pair)
1850
1851 filters = [self._build_format_filter(f) for f in selector.filters]
1852
1853 def final_selector(ctx):
1854 ctx_copy = copy.deepcopy(ctx)
1855 for _filter in filters:
1856 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1857 return selector_function(ctx_copy)
1858 return final_selector
1859
1860 stream = io.BytesIO(format_spec.encode('utf-8'))
1861 try:
1862 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1863 except tokenize.TokenError:
1864 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1865
1866 class TokenIterator(object):
1867 def __init__(self, tokens):
1868 self.tokens = tokens
1869 self.counter = 0
1870
1871 def __iter__(self):
1872 return self
1873
1874 def __next__(self):
1875 if self.counter >= len(self.tokens):
1876 raise StopIteration()
1877 value = self.tokens[self.counter]
1878 self.counter += 1
1879 return value
1880
1881 next = __next__
1882
1883 def restore_last_token(self):
1884 self.counter -= 1
1885
1886 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1887 return _build_selector_function(parsed_selector)
1888
1889 def _calc_headers(self, info_dict):
1890 res = std_headers.copy()
1891
1892 add_headers = info_dict.get('http_headers')
1893 if add_headers:
1894 res.update(add_headers)
1895
1896 cookies = self._calc_cookies(info_dict)
1897 if cookies:
1898 res['Cookie'] = cookies
1899
1900 if 'X-Forwarded-For' not in res:
1901 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1902 if x_forwarded_for_ip:
1903 res['X-Forwarded-For'] = x_forwarded_for_ip
1904
1905 return res
1906
1907 def _calc_cookies(self, info_dict):
1908 pr = sanitized_Request(info_dict['url'])
1909 self.cookiejar.add_cookie_header(pr)
1910 return pr.get_header('Cookie')
1911
1912 @staticmethod
1913 def _sanitize_thumbnails(info_dict):
1914 thumbnails = info_dict.get('thumbnails')
1915 if thumbnails is None:
1916 thumbnail = info_dict.get('thumbnail')
1917 if thumbnail:
1918 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1919 if thumbnails:
1920 thumbnails.sort(key=lambda t: (
1921 t.get('preference') if t.get('preference') is not None else -1,
1922 t.get('width') if t.get('width') is not None else -1,
1923 t.get('height') if t.get('height') is not None else -1,
1924 t.get('id') if t.get('id') is not None else '',
1925 t.get('url')))
1926 for i, t in enumerate(thumbnails):
1927 t['url'] = sanitize_url(t['url'])
1928 if t.get('width') and t.get('height'):
1929 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1930 if t.get('id') is None:
1931 t['id'] = '%d' % i
1932
1933 def process_video_result(self, info_dict, download=True):
1934 assert info_dict.get('_type', 'video') == 'video'
1935
1936 if 'id' not in info_dict:
1937 raise ExtractorError('Missing "id" field in extractor result')
1938 if 'title' not in info_dict:
1939 raise ExtractorError('Missing "title" field in extractor result')
1940
1941 def report_force_conversion(field, field_not, conversion):
1942 self.report_warning(
1943 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1944 % (field, field_not, conversion))
1945
1946 def sanitize_string_field(info, string_field):
1947 field = info.get(string_field)
1948 if field is None or isinstance(field, compat_str):
1949 return
1950 report_force_conversion(string_field, 'a string', 'string')
1951 info[string_field] = compat_str(field)
1952
1953 def sanitize_numeric_fields(info):
1954 for numeric_field in self._NUMERIC_FIELDS:
1955 field = info.get(numeric_field)
1956 if field is None or isinstance(field, compat_numeric_types):
1957 continue
1958 report_force_conversion(numeric_field, 'numeric', 'int')
1959 info[numeric_field] = int_or_none(field)
1960
1961 sanitize_string_field(info_dict, 'id')
1962 sanitize_numeric_fields(info_dict)
1963
1964 if 'playlist' not in info_dict:
1965 # It isn't part of a playlist
1966 info_dict['playlist'] = None
1967 info_dict['playlist_index'] = None
1968
1969 self._sanitize_thumbnails(info_dict)
1970
1971 if self.params.get('list_thumbnails'):
1972 self.list_thumbnails(info_dict)
1973 return
1974
1975 thumbnail = info_dict.get('thumbnail')
1976 thumbnails = info_dict.get('thumbnails')
1977 if thumbnail:
1978 info_dict['thumbnail'] = sanitize_url(thumbnail)
1979 elif thumbnails:
1980 info_dict['thumbnail'] = thumbnails[-1]['url']
1981
1982 if 'display_id' not in info_dict and 'id' in info_dict:
1983 info_dict['display_id'] = info_dict['id']
1984
1985 for ts_key, date_key in (
1986 ('timestamp', 'upload_date'),
1987 ('release_timestamp', 'release_date'),
1988 ):
1989 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1990 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1991 # see http://bugs.python.org/issue1646728)
1992 try:
1993 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1994 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1995 except (ValueError, OverflowError, OSError):
1996 pass
1997
1998 # Auto generate title fields corresponding to the *_number fields when missing
1999 # in order to always have clean titles. This is very common for TV series.
2000 for field in ('chapter', 'season', 'episode'):
2001 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2002 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2003
2004 for cc_kind in ('subtitles', 'automatic_captions'):
2005 cc = info_dict.get(cc_kind)
2006 if cc:
2007 for _, subtitle in cc.items():
2008 for subtitle_format in subtitle:
2009 if subtitle_format.get('url'):
2010 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2011 if subtitle_format.get('ext') is None:
2012 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2013
2014 automatic_captions = info_dict.get('automatic_captions')
2015 subtitles = info_dict.get('subtitles')
2016
2017 if self.params.get('listsubtitles', False):
2018 if 'automatic_captions' in info_dict:
2019 self.list_subtitles(
2020 info_dict['id'], automatic_captions, 'automatic captions')
2021 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2022 return
2023
2024 info_dict['requested_subtitles'] = self.process_subtitles(
2025 info_dict['id'], subtitles, automatic_captions)
2026
2027 # We now pick which formats have to be downloaded
2028 if info_dict.get('formats') is None:
2029 # There's only one format available
2030 formats = [info_dict]
2031 else:
2032 formats = info_dict['formats']
2033
2034 if not formats:
2035 if not self.params.get('ignore_no_formats_error'):
2036 raise ExtractorError('No video formats found!')
2037 else:
2038 self.report_warning('No video formats found!')
2039
2040 def is_wellformed(f):
2041 url = f.get('url')
2042 if not url:
2043 self.report_warning(
2044 '"url" field is missing or empty - skipping format, '
2045 'there is an error in extractor')
2046 return False
2047 if isinstance(url, bytes):
2048 sanitize_string_field(f, 'url')
2049 return True
2050
2051 # Filter out malformed formats for better extraction robustness
2052 formats = list(filter(is_wellformed, formats))
2053
2054 formats_dict = {}
2055
2056 # We check that all the formats have the format and format_id fields
2057 for i, format in enumerate(formats):
2058 sanitize_string_field(format, 'format_id')
2059 sanitize_numeric_fields(format)
2060 format['url'] = sanitize_url(format['url'])
2061 if not format.get('format_id'):
2062 format['format_id'] = compat_str(i)
2063 else:
2064 # Sanitize format_id from characters used in format selector expression
2065 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2066 format_id = format['format_id']
2067 if format_id not in formats_dict:
2068 formats_dict[format_id] = []
2069 formats_dict[format_id].append(format)
2070
2071 # Make sure all formats have unique format_id
2072 for format_id, ambiguous_formats in formats_dict.items():
2073 if len(ambiguous_formats) > 1:
2074 for i, format in enumerate(ambiguous_formats):
2075 format['format_id'] = '%s-%d' % (format_id, i)
2076
2077 for i, format in enumerate(formats):
2078 if format.get('format') is None:
2079 format['format'] = '{id} - {res}{note}'.format(
2080 id=format['format_id'],
2081 res=self.format_resolution(format),
2082 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2083 )
2084 # Automatically determine file extension if missing
2085 if format.get('ext') is None:
2086 format['ext'] = determine_ext(format['url']).lower()
2087 # Automatically determine protocol if missing (useful for format
2088 # selection purposes)
2089 if format.get('protocol') is None:
2090 format['protocol'] = determine_protocol(format)
2091 # Add HTTP headers, so that external programs can use them from the
2092 # json output
2093 full_format_info = info_dict.copy()
2094 full_format_info.update(format)
2095 format['http_headers'] = self._calc_headers(full_format_info)
2096 # Remove private housekeeping stuff
2097 if '__x_forwarded_for_ip' in info_dict:
2098 del info_dict['__x_forwarded_for_ip']
2099
2100 # TODO Central sorting goes here
2101
2102 if formats and formats[0] is not info_dict:
2103 # only set the 'formats' fields if the original info_dict list them
2104 # otherwise we end up with a circular reference, the first (and unique)
2105 # element in the 'formats' field in info_dict is info_dict itself,
2106 # which can't be exported to json
2107 info_dict['formats'] = formats
2108
2109 info_dict, _ = self.pre_process(info_dict)
2110
2111 if self.params.get('listformats'):
2112 if not info_dict.get('formats'):
2113 raise ExtractorError('No video formats found', expected=True)
2114 self.list_formats(info_dict)
2115 return
2116
2117 req_format = self.params.get('format')
2118 if req_format is None:
2119 req_format = self._default_format_spec(info_dict, download=download)
2120 self.write_debug('Default format spec: %s' % req_format)
2121
2122 format_selector = self.build_format_selector(req_format)
2123
2124 # While in format selection we may need to have an access to the original
2125 # format set in order to calculate some metrics or do some processing.
2126 # For now we need to be able to guess whether original formats provided
2127 # by extractor are incomplete or not (i.e. whether extractor provides only
2128 # video-only or audio-only formats) for proper formats selection for
2129 # extractors with such incomplete formats (see
2130 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2131 # Since formats may be filtered during format selection and may not match
2132 # the original formats the results may be incorrect. Thus original formats
2133 # or pre-calculated metrics should be passed to format selection routines
2134 # as well.
2135 # We will pass a context object containing all necessary additional data
2136 # instead of just formats.
2137 # This fixes incorrect format selection issue (see
2138 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2139 incomplete_formats = (
2140 # All formats are video-only or
2141 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2142 # all formats are audio-only
2143 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2144
2145 ctx = {
2146 'formats': formats,
2147 'incomplete_formats': incomplete_formats,
2148 }
2149
2150 formats_to_download = list(format_selector(ctx))
2151 if not formats_to_download:
2152 if not self.params.get('ignore_no_formats_error'):
2153 raise ExtractorError('Requested format is not available', expected=True)
2154 else:
2155 self.report_warning('Requested format is not available')
2156 elif download:
2157 self.to_screen(
2158 '[info] %s: Downloading %d format(s): %s' % (
2159 info_dict['id'], len(formats_to_download),
2160 ", ".join([f['format_id'] for f in formats_to_download])))
2161 for fmt in formats_to_download:
2162 new_info = dict(info_dict)
2163 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2164 new_info['__original_infodict'] = info_dict
2165 new_info.update(fmt)
2166 self.process_info(new_info)
2167 # We update the info dict with the best quality format (backwards compatibility)
2168 if formats_to_download:
2169 info_dict.update(formats_to_download[-1])
2170 return info_dict
2171
2172 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2173 """Select the requested subtitles and their format"""
2174 available_subs = {}
2175 if normal_subtitles and self.params.get('writesubtitles'):
2176 available_subs.update(normal_subtitles)
2177 if automatic_captions and self.params.get('writeautomaticsub'):
2178 for lang, cap_info in automatic_captions.items():
2179 if lang not in available_subs:
2180 available_subs[lang] = cap_info
2181
2182 if (not self.params.get('writesubtitles') and not
2183 self.params.get('writeautomaticsub') or not
2184 available_subs):
2185 return None
2186
2187 all_sub_langs = available_subs.keys()
2188 if self.params.get('allsubtitles', False):
2189 requested_langs = all_sub_langs
2190 elif self.params.get('subtitleslangs', False):
2191 requested_langs = set()
2192 for lang in self.params.get('subtitleslangs'):
2193 if lang == 'all':
2194 requested_langs.update(all_sub_langs)
2195 continue
2196 discard = lang[0] == '-'
2197 if discard:
2198 lang = lang[1:]
2199 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2200 if discard:
2201 for lang in current_langs:
2202 requested_langs.discard(lang)
2203 else:
2204 requested_langs.update(current_langs)
2205 elif 'en' in available_subs:
2206 requested_langs = ['en']
2207 else:
2208 requested_langs = [list(all_sub_langs)[0]]
2209 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2210
2211 formats_query = self.params.get('subtitlesformat', 'best')
2212 formats_preference = formats_query.split('/') if formats_query else []
2213 subs = {}
2214 for lang in requested_langs:
2215 formats = available_subs.get(lang)
2216 if formats is None:
2217 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2218 continue
2219 for ext in formats_preference:
2220 if ext == 'best':
2221 f = formats[-1]
2222 break
2223 matches = list(filter(lambda f: f['ext'] == ext, formats))
2224 if matches:
2225 f = matches[-1]
2226 break
2227 else:
2228 f = formats[-1]
2229 self.report_warning(
2230 'No subtitle format found matching "%s" for language %s, '
2231 'using %s' % (formats_query, lang, f['ext']))
2232 subs[lang] = f
2233 return subs
2234
2235 def __forced_printings(self, info_dict, filename, incomplete):
2236 def print_mandatory(field, actual_field=None):
2237 if actual_field is None:
2238 actual_field = field
2239 if (self.params.get('force%s' % field, False)
2240 and (not incomplete or info_dict.get(actual_field) is not None)):
2241 self.to_stdout(info_dict[actual_field])
2242
2243 def print_optional(field):
2244 if (self.params.get('force%s' % field, False)
2245 and info_dict.get(field) is not None):
2246 self.to_stdout(info_dict[field])
2247
2248 info_dict = info_dict.copy()
2249 if filename is not None:
2250 info_dict['filename'] = filename
2251 if info_dict.get('requested_formats') is not None:
2252 # For RTMP URLs, also include the playpath
2253 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2254 elif 'url' in info_dict:
2255 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2256
2257 for tmpl in self.params.get('forceprint', []):
2258 if re.match(r'\w+$', tmpl):
2259 tmpl = '%({})s'.format(tmpl)
2260 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2261 self.to_stdout(tmpl % info_copy)
2262
2263 print_mandatory('title')
2264 print_mandatory('id')
2265 print_mandatory('url', 'urls')
2266 print_optional('thumbnail')
2267 print_optional('description')
2268 print_optional('filename')
2269 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2270 self.to_stdout(formatSeconds(info_dict['duration']))
2271 print_mandatory('format')
2272
2273 if self.params.get('forcejson', False):
2274 self.post_extract(info_dict)
2275 self.to_stdout(json.dumps(info_dict, default=repr))
2276
2277 def dl(self, name, info, subtitle=False, test=False):
2278
2279 if test:
2280 verbose = self.params.get('verbose')
2281 params = {
2282 'test': True,
2283 'quiet': not verbose,
2284 'verbose': verbose,
2285 'noprogress': not verbose,
2286 'nopart': True,
2287 'skip_unavailable_fragments': False,
2288 'keep_fragments': False,
2289 'overwrites': True,
2290 '_no_ytdl_file': True,
2291 }
2292 else:
2293 params = self.params
2294 fd = get_suitable_downloader(info, params)(self, params)
2295 if not test:
2296 for ph in self._progress_hooks:
2297 fd.add_progress_hook(ph)
2298 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2299 self.write_debug('Invoking downloader on "%s"' % urls)
2300 new_info = dict(info)
2301 if new_info.get('http_headers') is None:
2302 new_info['http_headers'] = self._calc_headers(new_info)
2303 return fd.download(name, new_info, subtitle)
2304
2305 def process_info(self, info_dict):
2306 """Process a single resolved IE result."""
2307
2308 assert info_dict.get('_type', 'video') == 'video'
2309
2310 info_dict.setdefault('__postprocessors', [])
2311
2312 max_downloads = self.params.get('max_downloads')
2313 if max_downloads is not None:
2314 if self._num_downloads >= int(max_downloads):
2315 raise MaxDownloadsReached()
2316
2317 # TODO: backward compatibility, to be removed
2318 info_dict['fulltitle'] = info_dict['title']
2319
2320 if 'format' not in info_dict:
2321 info_dict['format'] = info_dict['ext']
2322
2323 if self._match_entry(info_dict) is not None:
2324 return
2325
2326 self.post_extract(info_dict)
2327 self._num_downloads += 1
2328
2329 # info_dict['_filename'] needs to be set for backward compatibility
2330 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2331 temp_filename = self.prepare_filename(info_dict, 'temp')
2332 files_to_move = {}
2333
2334 # Forced printings
2335 self.__forced_printings(info_dict, full_filename, incomplete=False)
2336
2337 if self.params.get('simulate', False):
2338 if self.params.get('force_write_download_archive', False):
2339 self.record_download_archive(info_dict)
2340
2341 # Do nothing else if in simulate mode
2342 return
2343
2344 if full_filename is None:
2345 return
2346
2347 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2348 return
2349 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2350 return
2351
2352 if self.params.get('writedescription', False):
2353 descfn = self.prepare_filename(info_dict, 'description')
2354 if not self._ensure_dir_exists(encodeFilename(descfn)):
2355 return
2356 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2357 self.to_screen('[info] Video description is already present')
2358 elif info_dict.get('description') is None:
2359 self.report_warning('There\'s no description to write.')
2360 else:
2361 try:
2362 self.to_screen('[info] Writing video description to: ' + descfn)
2363 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2364 descfile.write(info_dict['description'])
2365 except (OSError, IOError):
2366 self.report_error('Cannot write description file ' + descfn)
2367 return
2368
2369 if self.params.get('writeannotations', False):
2370 annofn = self.prepare_filename(info_dict, 'annotation')
2371 if not self._ensure_dir_exists(encodeFilename(annofn)):
2372 return
2373 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2374 self.to_screen('[info] Video annotations are already present')
2375 elif not info_dict.get('annotations'):
2376 self.report_warning('There are no annotations to write.')
2377 else:
2378 try:
2379 self.to_screen('[info] Writing video annotations to: ' + annofn)
2380 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2381 annofile.write(info_dict['annotations'])
2382 except (KeyError, TypeError):
2383 self.report_warning('There are no annotations to write.')
2384 except (OSError, IOError):
2385 self.report_error('Cannot write annotations file: ' + annofn)
2386 return
2387
2388 subtitles_are_requested = any([self.params.get('writesubtitles', False),
2389 self.params.get('writeautomaticsub')])
2390
2391 if subtitles_are_requested and info_dict.get('requested_subtitles'):
2392 # subtitles download errors are already managed as troubles in relevant IE
2393 # that way it will silently go on when used with unsupporting IE
2394 subtitles = info_dict['requested_subtitles']
2395 # ie = self.get_info_extractor(info_dict['extractor_key'])
2396 for sub_lang, sub_info in subtitles.items():
2397 sub_format = sub_info['ext']
2398 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2399 sub_filename_final = subtitles_filename(
2400 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2401 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2402 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2403 sub_info['filepath'] = sub_filename
2404 files_to_move[sub_filename] = sub_filename_final
2405 else:
2406 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2407 if sub_info.get('data') is not None:
2408 try:
2409 # Use newline='' to prevent conversion of newline characters
2410 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2411 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2412 subfile.write(sub_info['data'])
2413 sub_info['filepath'] = sub_filename
2414 files_to_move[sub_filename] = sub_filename_final
2415 except (OSError, IOError):
2416 self.report_error('Cannot write subtitles file ' + sub_filename)
2417 return
2418 else:
2419 try:
2420 self.dl(sub_filename, sub_info.copy(), subtitle=True)
2421 sub_info['filepath'] = sub_filename
2422 files_to_move[sub_filename] = sub_filename_final
2423 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2424 self.report_warning('Unable to download subtitle for "%s": %s' %
2425 (sub_lang, error_to_compat_str(err)))
2426 continue
2427
2428 if self.params.get('writeinfojson', False):
2429 infofn = self.prepare_filename(info_dict, 'infojson')
2430 if not self._ensure_dir_exists(encodeFilename(infofn)):
2431 return
2432 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2433 self.to_screen('[info] Video metadata is already present')
2434 else:
2435 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2436 try:
2437 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2438 except (OSError, IOError):
2439 self.report_error('Cannot write video metadata to JSON file ' + infofn)
2440 return
2441 info_dict['__infojson_filename'] = infofn
2442
2443 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2444 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2445 thumb_filename = replace_extension(
2446 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2447 files_to_move[thumb_filename_temp] = thumb_filename
2448
2449 # Write internet shortcut files
2450 url_link = webloc_link = desktop_link = False
2451 if self.params.get('writelink', False):
2452 if sys.platform == "darwin": # macOS.
2453 webloc_link = True
2454 elif sys.platform.startswith("linux"):
2455 desktop_link = True
2456 else: # if sys.platform in ['win32', 'cygwin']:
2457 url_link = True
2458 if self.params.get('writeurllink', False):
2459 url_link = True
2460 if self.params.get('writewebloclink', False):
2461 webloc_link = True
2462 if self.params.get('writedesktoplink', False):
2463 desktop_link = True
2464
2465 if url_link or webloc_link or desktop_link:
2466 if 'webpage_url' not in info_dict:
2467 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2468 return
2469 ascii_url = iri_to_uri(info_dict['webpage_url'])
2470
2471 def _write_link_file(extension, template, newline, embed_filename):
2472 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2473 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2474 self.to_screen('[info] Internet shortcut is already present')
2475 else:
2476 try:
2477 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2478 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2479 template_vars = {'url': ascii_url}
2480 if embed_filename:
2481 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2482 linkfile.write(template % template_vars)
2483 except (OSError, IOError):
2484 self.report_error('Cannot write internet shortcut ' + linkfn)
2485 return False
2486 return True
2487
2488 if url_link:
2489 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2490 return
2491 if webloc_link:
2492 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2493 return
2494 if desktop_link:
2495 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2496 return
2497
2498 try:
2499 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2500 except PostProcessingError as err:
2501 self.report_error('Preprocessing: %s' % str(err))
2502 return
2503
2504 must_record_download_archive = False
2505 if self.params.get('skip_download', False):
2506 info_dict['filepath'] = temp_filename
2507 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2508 info_dict['__files_to_move'] = files_to_move
2509 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2510 else:
2511 # Download
2512 try:
2513
2514 def existing_file(*filepaths):
2515 ext = info_dict.get('ext')
2516 final_ext = self.params.get('final_ext', ext)
2517 existing_files = []
2518 for file in orderedSet(filepaths):
2519 if final_ext != ext:
2520 converted = replace_extension(file, final_ext, ext)
2521 if os.path.exists(encodeFilename(converted)):
2522 existing_files.append(converted)
2523 if os.path.exists(encodeFilename(file)):
2524 existing_files.append(file)
2525
2526 if not existing_files or self.params.get('overwrites', False):
2527 for file in orderedSet(existing_files):
2528 self.report_file_delete(file)
2529 os.remove(encodeFilename(file))
2530 return None
2531
2532 self.report_file_already_downloaded(existing_files[0])
2533 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2534 return existing_files[0]
2535
2536 success = True
2537 if info_dict.get('requested_formats') is not None:
2538
2539 def compatible_formats(formats):
2540 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2541 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2542 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2543 if len(video_formats) > 2 or len(audio_formats) > 2:
2544 return False
2545
2546 # Check extension
2547 exts = set(format.get('ext') for format in formats)
2548 COMPATIBLE_EXTS = (
2549 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2550 set(('webm',)),
2551 )
2552 for ext_sets in COMPATIBLE_EXTS:
2553 if ext_sets.issuperset(exts):
2554 return True
2555 # TODO: Check acodec/vcodec
2556 return False
2557
2558 requested_formats = info_dict['requested_formats']
2559 old_ext = info_dict['ext']
2560 if self.params.get('merge_output_format') is None:
2561 if not compatible_formats(requested_formats):
2562 info_dict['ext'] = 'mkv'
2563 self.report_warning(
2564 'Requested formats are incompatible for merge and will be merged into mkv.')
2565 if (info_dict['ext'] == 'webm'
2566 and self.params.get('writethumbnail', False)
2567 and info_dict.get('thumbnails')):
2568 info_dict['ext'] = 'mkv'
2569 self.report_warning(
2570 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2571
2572 def correct_ext(filename):
2573 filename_real_ext = os.path.splitext(filename)[1][1:]
2574 filename_wo_ext = (
2575 os.path.splitext(filename)[0]
2576 if filename_real_ext == old_ext
2577 else filename)
2578 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2579
2580 # Ensure filename always has a correct extension for successful merge
2581 full_filename = correct_ext(full_filename)
2582 temp_filename = correct_ext(temp_filename)
2583 dl_filename = existing_file(full_filename, temp_filename)
2584 info_dict['__real_download'] = False
2585
2586 _protocols = set(determine_protocol(f) for f in requested_formats)
2587 if len(_protocols) == 1:
2588 info_dict['protocol'] = _protocols.pop()
2589 directly_mergable = (
2590 'no-direct-merge' not in self.params.get('compat_opts', [])
2591 and info_dict.get('protocol') is not None # All requested formats have same protocol
2592 and not self.params.get('allow_unplayable_formats')
2593 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2594 if directly_mergable:
2595 info_dict['url'] = requested_formats[0]['url']
2596 # Treat it as a single download
2597 dl_filename = existing_file(full_filename, temp_filename)
2598 if dl_filename is None:
2599 success, real_download = self.dl(temp_filename, info_dict)
2600 info_dict['__real_download'] = real_download
2601 else:
2602 downloaded = []
2603 merger = FFmpegMergerPP(self)
2604 if self.params.get('allow_unplayable_formats'):
2605 self.report_warning(
2606 'You have requested merging of multiple formats '
2607 'while also allowing unplayable formats to be downloaded. '
2608 'The formats won\'t be merged to prevent data corruption.')
2609 elif not merger.available:
2610 self.report_warning(
2611 'You have requested merging of multiple formats but ffmpeg is not installed. '
2612 'The formats won\'t be merged.')
2613
2614 if dl_filename is None:
2615 for f in requested_formats:
2616 new_info = dict(info_dict)
2617 del new_info['requested_formats']
2618 new_info.update(f)
2619 fname = prepend_extension(
2620 self.prepare_filename(new_info, 'temp'),
2621 'f%s' % f['format_id'], new_info['ext'])
2622 if not self._ensure_dir_exists(fname):
2623 return
2624 downloaded.append(fname)
2625 partial_success, real_download = self.dl(fname, new_info)
2626 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2627 success = success and partial_success
2628 if merger.available and not self.params.get('allow_unplayable_formats'):
2629 info_dict['__postprocessors'].append(merger)
2630 info_dict['__files_to_merge'] = downloaded
2631 # Even if there were no downloads, it is being merged only now
2632 info_dict['__real_download'] = True
2633 else:
2634 for file in downloaded:
2635 files_to_move[file] = None
2636 else:
2637 # Just a single file
2638 dl_filename = existing_file(full_filename, temp_filename)
2639 if dl_filename is None:
2640 success, real_download = self.dl(temp_filename, info_dict)
2641 info_dict['__real_download'] = real_download
2642
2643 dl_filename = dl_filename or temp_filename
2644 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2645
2646 except network_exceptions as err:
2647 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2648 return
2649 except (OSError, IOError) as err:
2650 raise UnavailableVideoError(err)
2651 except (ContentTooShortError, ) as err:
2652 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2653 return
2654
2655 if success and full_filename != '-':
2656 # Fixup content
2657 fixup_policy = self.params.get('fixup')
2658 if fixup_policy is None:
2659 fixup_policy = 'detect_or_warn'
2660
2661 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2662
2663 stretched_ratio = info_dict.get('stretched_ratio')
2664 if stretched_ratio is not None and stretched_ratio != 1:
2665 if fixup_policy == 'warn':
2666 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2667 info_dict['id'], stretched_ratio))
2668 elif fixup_policy == 'detect_or_warn':
2669 stretched_pp = FFmpegFixupStretchedPP(self)
2670 if stretched_pp.available:
2671 info_dict['__postprocessors'].append(stretched_pp)
2672 else:
2673 self.report_warning(
2674 '%s: Non-uniform pixel ratio (%s). %s'
2675 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2676 else:
2677 assert fixup_policy in ('ignore', 'never')
2678
2679 if (info_dict.get('requested_formats') is None
2680 and info_dict.get('container') == 'm4a_dash'
2681 and info_dict.get('ext') == 'm4a'):
2682 if fixup_policy == 'warn':
2683 self.report_warning(
2684 '%s: writing DASH m4a. '
2685 'Only some players support this container.'
2686 % info_dict['id'])
2687 elif fixup_policy == 'detect_or_warn':
2688 fixup_pp = FFmpegFixupM4aPP(self)
2689 if fixup_pp.available:
2690 info_dict['__postprocessors'].append(fixup_pp)
2691 else:
2692 self.report_warning(
2693 '%s: writing DASH m4a. '
2694 'Only some players support this container. %s'
2695 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2696 else:
2697 assert fixup_policy in ('ignore', 'never')
2698
2699 if ('protocol' in info_dict
2700 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2701 if fixup_policy == 'warn':
2702 self.report_warning('%s: malformed AAC bitstream detected.' % (
2703 info_dict['id']))
2704 elif fixup_policy == 'detect_or_warn':
2705 fixup_pp = FFmpegFixupM3u8PP(self)
2706 if fixup_pp.available:
2707 info_dict['__postprocessors'].append(fixup_pp)
2708 else:
2709 self.report_warning(
2710 '%s: malformed AAC bitstream detected. %s'
2711 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2712 else:
2713 assert fixup_policy in ('ignore', 'never')
2714
2715 try:
2716 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2717 except PostProcessingError as err:
2718 self.report_error('Postprocessing: %s' % str(err))
2719 return
2720 try:
2721 for ph in self._post_hooks:
2722 ph(info_dict['filepath'])
2723 except Exception as err:
2724 self.report_error('post hooks: %s' % str(err))
2725 return
2726 must_record_download_archive = True
2727
2728 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2729 self.record_download_archive(info_dict)
2730 max_downloads = self.params.get('max_downloads')
2731 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2732 raise MaxDownloadsReached()
2733
2734 def download(self, url_list):
2735 """Download a given list of URLs."""
2736 outtmpl = self.outtmpl_dict['default']
2737 if (len(url_list) > 1
2738 and outtmpl != '-'
2739 and '%' not in outtmpl
2740 and self.params.get('max_downloads') != 1):
2741 raise SameFileError(outtmpl)
2742
2743 for url in url_list:
2744 try:
2745 # It also downloads the videos
2746 res = self.extract_info(
2747 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2748 except UnavailableVideoError:
2749 self.report_error('unable to download video')
2750 except MaxDownloadsReached:
2751 self.to_screen('[info] Maximum number of downloaded files reached')
2752 raise
2753 except ExistingVideoReached:
2754 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2755 raise
2756 except RejectedVideoReached:
2757 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2758 raise
2759 else:
2760 if self.params.get('dump_single_json', False):
2761 self.post_extract(res)
2762 self.to_stdout(json.dumps(res, default=repr))
2763
2764 return self._download_retcode
2765
2766 def download_with_info_file(self, info_filename):
2767 with contextlib.closing(fileinput.FileInput(
2768 [info_filename], mode='r',
2769 openhook=fileinput.hook_encoded('utf-8'))) as f:
2770 # FileInput doesn't have a read method, we can't call json.load
2771 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2772 try:
2773 self.process_ie_result(info, download=True)
2774 except (DownloadError, EntryNotInPlaylist):
2775 webpage_url = info.get('webpage_url')
2776 if webpage_url is not None:
2777 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2778 return self.download([webpage_url])
2779 else:
2780 raise
2781 return self._download_retcode
2782
2783 @staticmethod
2784 def filter_requested_info(info_dict, actually_filter=True):
2785 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2786 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2787 if actually_filter:
2788 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2789 empty_values = (None, {}, [], set(), tuple())
2790 reject = lambda k, v: k not in keep_keys and (
2791 k.startswith('_') or k in remove_keys or v in empty_values)
2792 else:
2793 info_dict['epoch'] = int(time.time())
2794 reject = lambda k, v: k in remove_keys
2795 filter_fn = lambda obj: (
2796 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
2797 else obj if not isinstance(obj, dict)
2798 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2799 return filter_fn(info_dict)
2800
2801 def run_pp(self, pp, infodict):
2802 files_to_delete = []
2803 if '__files_to_move' not in infodict:
2804 infodict['__files_to_move'] = {}
2805 files_to_delete, infodict = pp.run(infodict)
2806 if not files_to_delete:
2807 return infodict
2808
2809 if self.params.get('keepvideo', False):
2810 for f in files_to_delete:
2811 infodict['__files_to_move'].setdefault(f, '')
2812 else:
2813 for old_filename in set(files_to_delete):
2814 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2815 try:
2816 os.remove(encodeFilename(old_filename))
2817 except (IOError, OSError):
2818 self.report_warning('Unable to remove downloaded original file')
2819 if old_filename in infodict['__files_to_move']:
2820 del infodict['__files_to_move'][old_filename]
2821 return infodict
2822
2823 @staticmethod
2824 def post_extract(info_dict):
2825 def actual_post_extract(info_dict):
2826 if info_dict.get('_type') in ('playlist', 'multi_video'):
2827 for video_dict in info_dict.get('entries', {}):
2828 actual_post_extract(video_dict or {})
2829 return
2830
2831 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2832 extra = post_extractor().items()
2833 info_dict.update(extra)
2834 info_dict.pop('__post_extractor', None)
2835
2836 original_infodict = info_dict.get('__original_infodict') or {}
2837 original_infodict.update(extra)
2838 original_infodict.pop('__post_extractor', None)
2839
2840 actual_post_extract(info_dict or {})
2841
2842 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2843 info = dict(ie_info)
2844 info['__files_to_move'] = files_to_move or {}
2845 for pp in self._pps[key]:
2846 info = self.run_pp(pp, info)
2847 return info, info.pop('__files_to_move', None)
2848
2849 def post_process(self, filename, ie_info, files_to_move=None):
2850 """Run all the postprocessors on the given file."""
2851 info = dict(ie_info)
2852 info['filepath'] = filename
2853 info['__files_to_move'] = files_to_move or {}
2854
2855 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2856 info = self.run_pp(pp, info)
2857 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2858 del info['__files_to_move']
2859 for pp in self._pps['after_move']:
2860 info = self.run_pp(pp, info)
2861 return info
2862
2863 def _make_archive_id(self, info_dict):
2864 video_id = info_dict.get('id')
2865 if not video_id:
2866 return
2867 # Future-proof against any change in case
2868 # and backwards compatibility with prior versions
2869 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2870 if extractor is None:
2871 url = str_or_none(info_dict.get('url'))
2872 if not url:
2873 return
2874 # Try to find matching extractor for the URL and take its ie_key
2875 for ie in self._ies:
2876 if ie.suitable(url):
2877 extractor = ie.ie_key()
2878 break
2879 else:
2880 return
2881 return '%s %s' % (extractor.lower(), video_id)
2882
2883 def in_download_archive(self, info_dict):
2884 fn = self.params.get('download_archive')
2885 if fn is None:
2886 return False
2887
2888 vid_id = self._make_archive_id(info_dict)
2889 if not vid_id:
2890 return False # Incomplete video information
2891
2892 return vid_id in self.archive
2893
2894 def record_download_archive(self, info_dict):
2895 fn = self.params.get('download_archive')
2896 if fn is None:
2897 return
2898 vid_id = self._make_archive_id(info_dict)
2899 assert vid_id
2900 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2901 archive_file.write(vid_id + '\n')
2902 self.archive.add(vid_id)
2903
2904 @staticmethod
2905 def format_resolution(format, default='unknown'):
2906 if format.get('vcodec') == 'none':
2907 return 'audio only'
2908 if format.get('resolution') is not None:
2909 return format['resolution']
2910 if format.get('width') and format.get('height'):
2911 res = '%dx%d' % (format['width'], format['height'])
2912 elif format.get('height'):
2913 res = '%sp' % format['height']
2914 elif format.get('width'):
2915 res = '%dx?' % format['width']
2916 else:
2917 res = default
2918 return res
2919
2920 def _format_note(self, fdict):
2921 res = ''
2922 if fdict.get('ext') in ['f4f', 'f4m']:
2923 res += '(unsupported) '
2924 if fdict.get('language'):
2925 if res:
2926 res += ' '
2927 res += '[%s] ' % fdict['language']
2928 if fdict.get('format_note') is not None:
2929 res += fdict['format_note'] + ' '
2930 if fdict.get('tbr') is not None:
2931 res += '%4dk ' % fdict['tbr']
2932 if fdict.get('container') is not None:
2933 if res:
2934 res += ', '
2935 res += '%s container' % fdict['container']
2936 if (fdict.get('vcodec') is not None
2937 and fdict.get('vcodec') != 'none'):
2938 if res:
2939 res += ', '
2940 res += fdict['vcodec']
2941 if fdict.get('vbr') is not None:
2942 res += '@'
2943 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2944 res += 'video@'
2945 if fdict.get('vbr') is not None:
2946 res += '%4dk' % fdict['vbr']
2947 if fdict.get('fps') is not None:
2948 if res:
2949 res += ', '
2950 res += '%sfps' % fdict['fps']
2951 if fdict.get('acodec') is not None:
2952 if res:
2953 res += ', '
2954 if fdict['acodec'] == 'none':
2955 res += 'video only'
2956 else:
2957 res += '%-5s' % fdict['acodec']
2958 elif fdict.get('abr') is not None:
2959 if res:
2960 res += ', '
2961 res += 'audio'
2962 if fdict.get('abr') is not None:
2963 res += '@%3dk' % fdict['abr']
2964 if fdict.get('asr') is not None:
2965 res += ' (%5dHz)' % fdict['asr']
2966 if fdict.get('filesize') is not None:
2967 if res:
2968 res += ', '
2969 res += format_bytes(fdict['filesize'])
2970 elif fdict.get('filesize_approx') is not None:
2971 if res:
2972 res += ', '
2973 res += '~' + format_bytes(fdict['filesize_approx'])
2974 return res
2975
2976 def _format_note_table(self, f):
2977 def join_fields(*vargs):
2978 return ', '.join((val for val in vargs if val != ''))
2979
2980 return join_fields(
2981 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2982 format_field(f, 'language', '[%s]'),
2983 format_field(f, 'format_note'),
2984 format_field(f, 'container', ignore=(None, f.get('ext'))),
2985 format_field(f, 'asr', '%5dHz'))
2986
2987 def list_formats(self, info_dict):
2988 formats = info_dict.get('formats', [info_dict])
2989 new_format = (
2990 'list-formats' not in self.params.get('compat_opts', [])
2991 and self.params.get('list_formats_as_table', True) is not False)
2992 if new_format:
2993 table = [
2994 [
2995 format_field(f, 'format_id'),
2996 format_field(f, 'ext'),
2997 self.format_resolution(f),
2998 format_field(f, 'fps', '%d'),
2999 '|',
3000 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3001 format_field(f, 'tbr', '%4dk'),
3002 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3003 '|',
3004 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3005 format_field(f, 'vbr', '%4dk'),
3006 format_field(f, 'acodec', default='unknown').replace('none', ''),
3007 format_field(f, 'abr', '%3dk'),
3008 format_field(f, 'asr', '%5dHz'),
3009 self._format_note_table(f)]
3010 for f in formats
3011 if f.get('preference') is None or f['preference'] >= -1000]
3012 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3013 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3014 else:
3015 table = [
3016 [
3017 format_field(f, 'format_id'),
3018 format_field(f, 'ext'),
3019 self.format_resolution(f),
3020 self._format_note(f)]
3021 for f in formats
3022 if f.get('preference') is None or f['preference'] >= -1000]
3023 header_line = ['format code', 'extension', 'resolution', 'note']
3024
3025 self.to_screen(
3026 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3027 header_line,
3028 table,
3029 delim=new_format,
3030 extraGap=(0 if new_format else 1),
3031 hideEmpty=new_format)))
3032
3033 def list_thumbnails(self, info_dict):
3034 thumbnails = info_dict.get('thumbnails')
3035 if not thumbnails:
3036 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3037 return
3038
3039 self.to_screen(
3040 '[info] Thumbnails for %s:' % info_dict['id'])
3041 self.to_screen(render_table(
3042 ['ID', 'width', 'height', 'URL'],
3043 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3044
3045 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3046 if not subtitles:
3047 self.to_screen('%s has no %s' % (video_id, name))
3048 return
3049 self.to_screen(
3050 'Available %s for %s:' % (name, video_id))
3051
3052 def _row(lang, formats):
3053 exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3054 if len(set(names)) == 1:
3055 names = [] if names[0] == 'unknown' else names[:1]
3056 return [lang, ', '.join(names), ', '.join(exts)]
3057
3058 self.to_screen(render_table(
3059 ['Language', 'Name', 'Formats'],
3060 [_row(lang, formats) for lang, formats in subtitles.items()],
3061 hideEmpty=True))
3062
3063 def urlopen(self, req):
3064 """ Start an HTTP download """
3065 if isinstance(req, compat_basestring):
3066 req = sanitized_Request(req)
3067 return self._opener.open(req, timeout=self._socket_timeout)
3068
3069 def print_debug_header(self):
3070 if not self.params.get('verbose'):
3071 return
3072
3073 if type('') is not compat_str:
3074 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3075 self.report_warning(
3076 'Your Python is broken! Update to a newer and supported version')
3077
3078 stdout_encoding = getattr(
3079 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3080 encoding_str = (
3081 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3082 locale.getpreferredencoding(),
3083 sys.getfilesystemencoding(),
3084 stdout_encoding,
3085 self.get_encoding()))
3086 write_string(encoding_str, encoding=None)
3087
3088 source = (
3089 '(exe)' if hasattr(sys, 'frozen')
3090 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3091 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3092 else '')
3093 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3094 if _LAZY_LOADER:
3095 self._write_string('[debug] Lazy loading extractors enabled\n')
3096 if _PLUGIN_CLASSES:
3097 self._write_string(
3098 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3099 if self.params.get('compat_opts'):
3100 self._write_string(
3101 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3102 try:
3103 sp = subprocess.Popen(
3104 ['git', 'rev-parse', '--short', 'HEAD'],
3105 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3106 cwd=os.path.dirname(os.path.abspath(__file__)))
3107 out, err = process_communicate_or_kill(sp)
3108 out = out.decode().strip()
3109 if re.match('[0-9a-f]+', out):
3110 self._write_string('[debug] Git HEAD: %s\n' % out)
3111 except Exception:
3112 try:
3113 sys.exc_clear()
3114 except Exception:
3115 pass
3116
3117 def python_implementation():
3118 impl_name = platform.python_implementation()
3119 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3120 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3121 return impl_name
3122
3123 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3124 platform.python_version(),
3125 python_implementation(),
3126 platform.architecture()[0],
3127 platform_name()))
3128
3129 exe_versions = FFmpegPostProcessor.get_versions(self)
3130 exe_versions['rtmpdump'] = rtmpdump_version()
3131 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3132 exe_str = ', '.join(
3133 '%s %s' % (exe, v)
3134 for exe, v in sorted(exe_versions.items())
3135 if v
3136 )
3137 if not exe_str:
3138 exe_str = 'none'
3139 self._write_string('[debug] exe versions: %s\n' % exe_str)
3140
3141 proxy_map = {}
3142 for handler in self._opener.handlers:
3143 if hasattr(handler, 'proxies'):
3144 proxy_map.update(handler.proxies)
3145 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3146
3147 if self.params.get('call_home', False):
3148 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3149 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3150 return
3151 latest_version = self.urlopen(
3152 'https://yt-dl.org/latest/version').read().decode('utf-8')
3153 if version_tuple(latest_version) > version_tuple(__version__):
3154 self.report_warning(
3155 'You are using an outdated version (newest version: %s)! '
3156 'See https://yt-dl.org/update if you need help updating.' %
3157 latest_version)
3158
3159 def _setup_opener(self):
3160 timeout_val = self.params.get('socket_timeout')
3161 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3162
3163 opts_cookiefile = self.params.get('cookiefile')
3164 opts_proxy = self.params.get('proxy')
3165
3166 if opts_cookiefile is None:
3167 self.cookiejar = compat_cookiejar.CookieJar()
3168 else:
3169 opts_cookiefile = expand_path(opts_cookiefile)
3170 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3171 if os.access(opts_cookiefile, os.R_OK):
3172 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3173
3174 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3175 if opts_proxy is not None:
3176 if opts_proxy == '':
3177 proxies = {}
3178 else:
3179 proxies = {'http': opts_proxy, 'https': opts_proxy}
3180 else:
3181 proxies = compat_urllib_request.getproxies()
3182 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3183 if 'http' in proxies and 'https' not in proxies:
3184 proxies['https'] = proxies['http']
3185 proxy_handler = PerRequestProxyHandler(proxies)
3186
3187 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3188 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3189 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3190 redirect_handler = YoutubeDLRedirectHandler()
3191 data_handler = compat_urllib_request_DataHandler()
3192
3193 # When passing our own FileHandler instance, build_opener won't add the
3194 # default FileHandler and allows us to disable the file protocol, which
3195 # can be used for malicious purposes (see
3196 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3197 file_handler = compat_urllib_request.FileHandler()
3198
3199 def file_open(*args, **kwargs):
3200 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3201 file_handler.file_open = file_open
3202
3203 opener = compat_urllib_request.build_opener(
3204 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3205
3206 # Delete the default user-agent header, which would otherwise apply in
3207 # cases where our custom HTTP handler doesn't come into play
3208 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3209 opener.addheaders = []
3210 self._opener = opener
3211
3212 def encode(self, s):
3213 if isinstance(s, bytes):
3214 return s # Already encoded
3215
3216 try:
3217 return s.encode(self.get_encoding())
3218 except UnicodeEncodeError as err:
3219 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3220 raise
3221
3222 def get_encoding(self):
3223 encoding = self.params.get('encoding')
3224 if encoding is None:
3225 encoding = preferredencoding()
3226 return encoding
3227
3228 def _write_thumbnails(self, info_dict, filename): # return the extensions
3229 write_all = self.params.get('write_all_thumbnails', False)
3230 thumbnails = []
3231 if write_all or self.params.get('writethumbnail', False):
3232 thumbnails = info_dict.get('thumbnails') or []
3233 multiple = write_all and len(thumbnails) > 1
3234
3235 ret = []
3236 for t in thumbnails[::1 if write_all else -1]:
3237 thumb_ext = determine_ext(t['url'], 'jpg')
3238 suffix = '%s.' % t['id'] if multiple else ''
3239 thumb_display_id = '%s ' % t['id'] if multiple else ''
3240 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3241
3242 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3243 ret.append(suffix + thumb_ext)
3244 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3245 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3246 else:
3247 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3248 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3249 try:
3250 uf = self.urlopen(t['url'])
3251 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3252 shutil.copyfileobj(uf, thumbf)
3253 ret.append(suffix + thumb_ext)
3254 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3255 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3256 t['filepath'] = thumb_filename
3257 except network_exceptions as err:
3258 self.report_warning('Unable to download thumbnail "%s": %s' %
3259 (t['url'], error_to_compat_str(err)))
3260 if ret and not write_all:
3261 break
3262 return ret