X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/d5d1df8afdd532cc889f9d95be0740668a0776fe..7b5674949fd03a33b47b67b31d56a5adf1c48c91:/yt_dlp/__init__.py diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 726fb0685..3d606bcba 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,10 +1,10 @@ -try: - import contextvars # noqa: F401 -except Exception: - raise Exception( - f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541 +import sys + +if sys.version_info < (3, 8): + raise ImportError( + f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 -__license__ = 'Public Domain' +__license__ = 'The Unlicense' import collections import getpass @@ -12,15 +12,14 @@ import optparse import os import re -import sys +import traceback -from .compat import compat_shlex_quote +from .compat import compat_os_name, compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS -from .downloader import FileDownloader from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO -from .extractor.common import InfoExtractor +from .networking.impersonate import ImpersonateTarget from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, @@ -40,6 +39,7 @@ DateRange, DownloadCancelled, DownloadError, + FormatSorter, GeoUtils, PlaylistEntries, SameFileError, @@ -49,18 +49,20 @@ float_or_none, format_field, int_or_none, + join_nonempty, match_filter_func, + parse_bytes, parse_duration, preferredencoding, read_batch_urls, read_stdin, render_table, setproctitle, - std_headers, traverse_obj, variadic, write_string, ) +from .utils.networking import std_headers from .YoutubeDL import YoutubeDL _IN_CLI = False @@ -73,14 +75,16 @@ def _exit(status=0, *args): def get_urls(urls, batchfile, verbose): - # Batch file verification + """ + @param verbose -1: quiet, 0: normal, 1: verbose + """ batch_urls = [] if batchfile is not None: try: batch_urls = read_batch_urls( - read_stdin('URLs') if batchfile == '-' + read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-' else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) - if verbose: + if verbose == 1: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except OSError: _exit(f'ERROR: batch file {batchfile} could not be read') @@ -91,12 +95,11 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): - # Importing GenericIE is currently slow since it imports other extractors - # TODO: Move this back to module level after generalization of embed detection - from .extractor.generic import GenericIE - out = '' if opts.list_extractors: + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' @@ -152,7 +155,7 @@ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): else: opts.embed_infojson = False if 'format-sort' in opts.compat_opts: - opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + opts.format_sort.extend(FormatSorter.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: @@ -188,8 +191,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None): raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') # Usernames and passwords - validate(not opts.usenetrc or (opts.username is None and opts.password is None), - '.netrc', msg='using {name} conflicts with giving username/password') + validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc', + msg='{name}, netrc command and username/password are mutually exclusive options') validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') validate(opts.ap_password is None or opts.ap_username is not None, 'TV Provider account username', msg='{name} missing') @@ -227,7 +230,7 @@ def validate_minmax(min_val, max_val, min_name, max_name=None): # Format sort for f in opts.format_sort: - validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) + validate_regex('format sorting', f, FormatSorter.regex) # Postprocessor formats validate_regex('merge output format', opts.merge_output_format, @@ -281,19 +284,19 @@ def parse_sleep_func(expr): raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes - def parse_bytes(name, value): + def validate_bytes(name, value): if value is None: return None - numeric_limit = FileDownloader.parse_bytes(value) + numeric_limit = parse_bytes(value) validate(numeric_limit is not None, 'rate limit', value) return numeric_limit - opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) - opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) - opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) - opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) - opts.buffersize = parse_bytes('buffer size', opts.buffersize) - opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) + opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) # Output templates def validate_outtmpl(tmpl, msg): @@ -319,31 +322,50 @@ def validate_outtmpl(tmpl, msg): if outtmpl_default == '': opts.skip_download = None del opts.outtmpl['default'] - if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - raise ValueError( - 'Cannot download a video and extract audio into the same file! ' - f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') - def parse_chapters(name, value): - chapters, ranges = [], [] + def parse_chapters(name, value, advanced=False): parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) + TIMESTAMP_RE = r'''(?x)(?: + (?P-?)(?P[^-]+) + )?\s*-\s*(?: + (?P-?)(?P[^-]+) + )?''' + + chapters, ranges, from_url = [], [], False for regex in value or []: - if regex.startswith('*'): - for range_ in map(str.strip, regex[1:].split(',')): - mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) - dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) - if None in (dur or [None]): - raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') - ranges.append(dur) + if advanced and regex == '*from-url': + from_url = True + continue + elif not regex.startswith('*'): + try: + chapters.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid {name} regex "{regex}" - {err}') continue - try: - chapters.append(re.compile(regex)) - except re.error as err: - raise ValueError(f'invalid {name} regex "{regex}" - {err}') - return chapters, ranges - opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters) - opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges)) + for range_ in map(str.strip, regex[1:].split(',')): + mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) + dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] + signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) + + err = None + if None in (dur or [None]): + err = 'Must be of the form "*start-end"' + elif not advanced and any(signs): + err = 'Negative timestamps are not allowed' + else: + dur[0] *= -1 if signs[0] else 1 + dur[1] *= -1 if signs[1] else 1 + if dur[1] == float('-inf'): + err = '"-inf" is not a valid end' + if err: + raise ValueError(f'invalid {name} time range "{regex}". {err}') + ranges.append(dur) + + return chapters, ranges, from_url + + opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) + opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) # Cookies from browser if opts.cookiesfrombrowser: @@ -351,7 +373,7 @@ def parse_chapters(name, value): mobj = re.fullmatch(r'''(?x) (?P[^+:]+) (?:\s*\+\s*(?P[^:]+))? - (?:\s*:\s*(?P.+?))? + (?:\s*:\s*(?!:)(?P.+?))? (?:\s*::\s*(?P.+))? ''', opts.cookiesfrombrowser) if mobj is None: @@ -368,6 +390,9 @@ def parse_chapters(name, value): f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') opts.cookiesfrombrowser = (browser_name, profile, keyring, container) + if opts.impersonate is not None: + opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower()) + # MetadataParser def metadataparser_actions(f): if isinstance(f, str): @@ -387,10 +412,12 @@ def metadataparser_actions(f): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -399,14 +426,19 @@ def metadataparser_actions(f): except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') - geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country - if geo_bypass_code is not None: + opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None + if opts.geo_bypass.lower() not in ('default', 'never'): try: - GeoUtils.random_ipv4(geo_bypass_code) + GeoUtils.random_ipv4(opts.geo_bypass) except Exception: - raise ValueError('unsupported geo-bypass country or ip-block') + raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"') + if len(opts.geo_bypass) == 2: + opts.geo_bypass_country = opts.geo_bypass + else: + opts.geo_bypass_ip_block = opts.geo_bypass + opts.geo_bypass = opts.geo_bypass.lower() != 'never' - opts.match_filter = match_filter_func(opts.match_filter) + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) @@ -433,6 +465,10 @@ def metadataparser_actions(f): elif ed and proto == 'default': default_downloader = ed.get_basename() + for policy in opts.color.values(): + if policy not in ('always', 'auto', 'no_color', 'never'): + raise ValueError(f'"{policy}" is not a valid color policy') + warnings, deprecation_warnings = [], [] # Common mistake: -f best @@ -562,11 +598,11 @@ def report_deprecation(val, old, new=None): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -692,7 +728,7 @@ def get_postprocessors(opts): def parse_options(argv=None): """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" parser, opts, urls = parseOpts(argv) - urls = get_urls(urls, opts.batchfile, opts.verbose) + urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose) set_compat_opts(opts) try: @@ -702,11 +738,13 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' )) + if opts.quiet is None: + opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson @@ -732,6 +770,7 @@ def parse_options(argv=None): return ParsedOptions(parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, + 'netrc_cmd': opts.netrc_cmd, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, @@ -742,7 +781,7 @@ def parse_options(argv=None): 'client_certificate': opts.client_certificate, 'client_certificate_key': opts.client_certificate_key, 'client_certificate_password': opts.client_certificate_password, - 'quiet': opts.quiet or any_getting or opts.print_json or bool(opts.forceprint), + 'quiet': opts.quiet, 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -797,6 +836,7 @@ def parse_options(argv=None): 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, 'progress_template': opts.progress_template, + 'progress_delta': opts.progress_delta, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, @@ -854,6 +894,7 @@ def parse_options(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, @@ -876,6 +917,7 @@ def parse_options(argv=None): 'postprocessors': postprocessors, 'fixup': opts.fixup, 'source_address': opts.source_address, + 'impersonate': opts.impersonate, 'call_home': opts.call_home, 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, @@ -888,7 +930,7 @@ def parse_options(argv=None): 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': opts.match_filter, - 'no_color': opts.no_color, + 'color': opts.color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, @@ -932,20 +974,80 @@ def _real_main(argv=None): if opts.rm_cachedir: ydl.cache.remove() - updater = Updater(ydl) - if opts.update_self and updater.update() and actual_use: - if updater.cmd: - return updater.restart() - # This code is reachable only for zip variant in py < 3.10 - # It makes sense to exit here, but the old behavior is to continue - ydl.report_warning('Restart yt-dlp to use the updated version') - # return 100, 'ERROR: The program must exit for the update to complete' + try: + updater = Updater(ydl, opts.update_self) + if opts.update_self and updater.update() and actual_use: + if updater.cmd: + return updater.restart() + # This code is reachable only for zip variant in py < 3.10 + # It makes sense to exit here, but the old behavior is to continue + ydl.report_warning('Restart yt-dlp to use the updated version') + # return 100, 'ERROR: The program must exit for the update to complete' + except Exception: + traceback.print_exc() + ydl._download_retcode = 100 + + if opts.list_impersonate_targets: + + known_targets = [ + # List of simplified targets we know are supported, + # to help users know what dependencies may be required. + (ImpersonateTarget('chrome'), 'curl_cffi'), + (ImpersonateTarget('edge'), 'curl_cffi'), + (ImpersonateTarget('safari'), 'curl_cffi'), + ] + + available_targets = ydl._get_available_impersonate_targets() + + def make_row(target, handler): + return [ + join_nonempty(target.client.title(), target.version, delim='-') or '-', + join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + handler, + ] + + rows = [make_row(target, handler) for target, handler in available_targets] + + for known_target, known_handler in known_targets: + if not any( + known_target in target and handler == known_handler + for target, handler in available_targets + ): + rows.append([ + ydl._format_out(text, ydl.Styles.SUPPRESS) + for text in make_row(known_target, f'{known_handler} (not available)') + ]) + + ydl.to_screen('[info] Available impersonate targets') + ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-')) + return if not actual_use: if pre_process: return ydl._download_retcode - ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) + args = sys.argv[1:] if argv is None else argv + ydl.warn_if_short_id(args) + + # Show a useful error message and wait for keypress if not launched from shell on Windows + if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): + import ctypes.wintypes + import msvcrt + + kernel32 = ctypes.WinDLL('Kernel32') + + buffer = (1 * ctypes.wintypes.DWORD)() + attached_processes = kernel32.GetConsoleProcessList(buffer, 1) + # If we only have a single process attached, then the executable was double clicked + # When using `pyinstaller` with `--onefile`, two processes get attached + is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') + if attached_processes == 1 or is_onefile and attached_processes == 2: + print(parser._generate_error_message( + 'Do not double-click the executable, instead call it from a command line.\n' + 'Please read the README for further information on how to use yt-dlp: ' + 'https://github.com/yt-dlp/yt-dlp#readme')) + msvcrt.getch() + _exit(2) parser.error( 'You must provide at least one URL.\n' 'Type yt-dlp --help to see a list of all options.') @@ -953,6 +1055,8 @@ def _real_main(argv=None): parser.destroy() try: if opts.load_info_filename is not None: + if all_urls: + ydl.report_warning('URLs are ignored due to --load-info-json') return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: return ydl.download(all_urls)