4 from __future__
import unicode_literals
39 import xml
.etree
.ElementTree
43 compat_HTMLParseError
,
49 compat_ctypes_WINFUNCTYPE
,
50 compat_etree_fromstring
,
53 compat_html_entities_html5
,
66 compat_urllib_parse_urlencode
,
67 compat_urllib_parse_urlparse
,
68 compat_urllib_parse_urlunparse
,
69 compat_urllib_parse_quote
,
70 compat_urllib_parse_quote_plus
,
71 compat_urllib_parse_unquote_plus
,
72 compat_urllib_request
,
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme
in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme
not in compat_urlparse
.uses_netloc
:
89 compat_urlparse
.uses_netloc
.append(scheme
)
92 # This is not clearly defined otherwise
93 compiled_regex_type
= type(re
.compile(''))
96 def random_user_agent():
97 _USER_AGENT_TPL
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
1676 return _USER_AGENT_TPL
% random
.choice(_CHROME_VERSIONS
)
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1693 NO_DEFAULT
= object()
1695 ENGLISH_MONTH_NAMES
= [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1700 'en': ENGLISH_MONTH_NAMES
,
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1706 KNOWN_EXTENSIONS
= (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1713 'asf', 'wmv', 'wma',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1748 '%Y/%m/%d %H:%M:%S',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1770 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
1771 DATE_FORMATS_DAY_FIRST
.extend([
1777 '%d/%m/%Y %H:%M:%S',
1780 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
1781 DATE_FORMATS_MONTH_FIRST
.extend([
1786 '%m/%d/%Y %H:%M:%S',
1789 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>'
1793 def preferredencoding():
1794 """Get preferred encoding.
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1800 pref = locale.getpreferredencoding()
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32
':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1826 'prefix
': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1838 'encoding
': 'utf
-8',
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1846 if sys.platform == 'win32
':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1856 os.chmod(tf.name, 0o666 & ~mask)
1859 os.rename(tf.name, fn)
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1879 if val is None or f.attrib.get(key) == val:
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1887 def xpath_with_ns(path
, ns_map
):
1888 components
= [c
.split(':') for c
in path
.split('/')]
1890 for c
in components
:
1892 replaced
.append(c
[0])
1895 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
1896 return '/'.join(replaced
)
1899 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1900 def _find_xpath(xpath
):
1901 return node
.find(compat_xpath(xpath
))
1903 if isinstance(xpath
, (str, compat_str
)):
1904 n
= _find_xpath(xpath
)
1912 if default
is not NO_DEFAULT
:
1915 name
= xpath
if name
is None else name
1916 raise ExtractorError('Could not find XML element %s' % name
)
1922 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1923 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
1924 if n
is None or n
== default
:
1927 if default
is not NO_DEFAULT
:
1930 name
= xpath
if name
is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name
)
1937 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1938 n
= find_xpath_attr(node
, xpath
, key
)
1940 if default
is not NO_DEFAULT
:
1943 name
= '%s[@%s]' % (xpath
, key
) if name
is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name
)
1947 return n
.attrib
[key
]
1950 def get_element_by_id(id, html
):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html
)
1955 def get_element_by_class(class_name
, html
):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval
= get_elements_by_class(class_name
, html
)
1958 return retval
[0] if retval
else None
1961 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True):
1962 retval
= get_elements_by_attribute(attribute
, value
, html
, escape_value
)
1963 return retval
[0] if retval
else None
1966 def get_elements_by_class(class_name
, html
):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1976 value = re.escape(value) if escape_value else value
1979 for m in re.finditer(r'''(?xs)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*?
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content
')
1990 if res.startswith('"') or res.startswith("'"):
1993 retlist.append(unescapeHTML(res))
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2003 compat_HTMLParser.__init__(self)
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2009 def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&"
2016 Decode and return a dictionary of attributes.
2018 'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
',
2019 'empty
': '', 'noval
': None, 'entity
': '&',
2020 'sq
': '"', 'dq': '\''
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2025 parser = HTMLAttributeParser()
2027 parser.feed(html_element)
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2035 def clean_html(html):
2036 """Clean an HTML snippet into a readable string"""
2038 if html is None: # Convenience for sanitizing descriptions etc.
2042 html = html.replace('\n', ' ')
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
2052 def sanitize_open(filename, open_mode):
2053 """Try to open the given filename, and slightly tweak it if this fails.
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2060 It returns the tuple (stream, definitive_file_name).
2064 if sys.platform == 'win32':
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
2071 if err.errno in (errno.EACCES,):
2074 # In case of error, try to remove win32 forbidden chars
2075 alt_filename = sanitize_path(filename)
2076 if alt_filename == filename:
2079 # An exception here should be caught in the caller
2080 stream = open(encodeFilename(alt_filename), open_mode)
2081 return (stream, alt_filename)
2084 def timeconvert(timestr):
2085 """Convert RFC 2822 defined time string into system timestamp"""
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2099 def replace_insane(char):
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
2102 elif not restricted and char == '\n':
2104 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2107 return '' if restricted else '\''
2109 return '_
-' if restricted else ' -'
2110 elif char in '\\/|
*<>':
2112 if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()):
2114 if restricted
and ord(char
) > 127:
2121 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
)
2122 result
= ''.join(map(replace_insane
, s
))
2124 while '__' in result
:
2125 result
= result
.replace('__', '_')
2126 result
= result
.strip('_')
2127 # Common case of "Foreign band name - English song title"
2128 if restricted
and result
.startswith('-_'):
2130 if result
.startswith('-'):
2131 result
= '_' + result
[len('-'):]
2132 result
= result
.lstrip('.')
2138 def sanitize_path(s
, force
=False):
2139 """Sanitizes and normalizes path on Windows"""
2140 if sys
.platform
== 'win32':
2142 drive_or_unc
, _
= os
.path
.splitdrive(s
)
2143 if sys
.version_info
< (2, 7) and not drive_or_unc
:
2144 drive_or_unc
, _
= os
.path
.splitunc(s
)
2150 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
2154 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
2155 for path_part
in norm_path
]
2157 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
2158 elif force
and s
[0] == os
.path
.sep
:
2159 sanitized_path
.insert(0, os
.path
.sep
)
2160 return os
.path
.join(*sanitized_path
)
2163 def sanitize_url(url
):
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url
.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
2171 (r
'^httpss://', r
'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
2175 for mistake
, fixup
in COMMON_TYPOS
:
2176 if re
.match(mistake
, url
):
2177 return re
.sub(mistake
, fixup
, url
)
2181 def extract_basic_auth(url
):
2182 parts
= compat_urlparse
.urlsplit(url
)
2183 if parts
.username
is None:
2185 url
= compat_urlparse
.urlunsplit(parts
._replace
(netloc
=(
2186 parts
.hostname
if parts
.port
is None
2187 else '%s:%d' % (parts
.hostname
, parts
.port
))))
2188 auth_payload
= base64
.b64encode(
2189 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode('utf-8'))
2190 return url
, 'Basic ' + auth_payload
.decode('utf-8')
2193 def sanitized_Request(url
, *args
, **kwargs
):
2194 url
, auth_header
= extract_basic_auth(escape_url(sanitize_url(url
)))
2195 if auth_header
is not None:
2196 headers
= args
[1] if len(args
) >= 2 else kwargs
.setdefault('headers', {})
2197 headers
['Authorization'] = auth_header
2198 return compat_urllib_request
.Request(url
, *args
, **kwargs
)
2202 """Expand shell variables and ~"""
2203 return os
.path
.expandvars(compat_expanduser(s
))
2206 def orderedSet(iterable
):
2207 """ Remove all duplicates from the input iterable """
2215 def _htmlentity_transform(entity_with_semicolon
):
2216 """Transforms an HTML entity to a character."""
2217 entity
= entity_with_semicolon
[:-1]
2219 # Known non-numeric HTML entity
2220 if entity
in compat_html_entities
.name2codepoint
:
2221 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
2223 # TODO: HTML5 allows entities without a semicolon. For example,
2224 # 'Éric' should be decoded as 'Éric'.
2225 if entity_with_semicolon
in compat_html_entities_html5
:
2226 return compat_html_entities_html5
[entity_with_semicolon
]
2228 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
2229 if mobj
is not None:
2230 numstr
= mobj
.group(1)
2231 if numstr
.startswith('x'):
2233 numstr
= '0%s' % numstr
2236 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2238 return compat_chr(int(numstr
, base
))
2242 # Unknown entity in name, return its literal representation
2243 return '&%s;' % entity
2246 def unescapeHTML(s
):
2249 assert type(s
) == compat_str
2252 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
2255 def escapeHTML(text
):
2258 .replace('&', '&')
2259 .replace('<', '<')
2260 .replace('>', '>')
2261 .replace('"', '"')
2262 .replace("'", ''')
2266 def process_communicate_or_kill(p
, *args
, **kwargs
):
2268 return p
.communicate(*args
, **kwargs
)
2269 except BaseException
: # Including KeyboardInterrupt
2275 class Popen(subprocess
.Popen
):
2276 if sys
.platform
== 'win32':
2277 _startupinfo
= subprocess
.STARTUPINFO()
2278 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
2282 def __init__(self
, *args
, **kwargs
):
2283 super(Popen
, self
).__init
__(*args
, **kwargs
, startupinfo
=self
._startupinfo
)
2285 def communicate_or_kill(self
, *args
, **kwargs
):
2286 return process_communicate_or_kill(self
, *args
, **kwargs
)
2289 def get_subprocess_encoding():
2290 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
2291 # For subprocess calls, encode with locale encoding
2292 # Refer to http://stackoverflow.com/a/9951851/35070
2293 encoding
= preferredencoding()
2295 encoding
= sys
.getfilesystemencoding()
2296 if encoding
is None:
2301 def encodeFilename(s
, for_subprocess
=False):
2303 @param s The name of the file
2306 assert type(s
) == compat_str
2308 # Python 3 has a Unicode API
2309 if sys
.version_info
>= (3, 0):
2312 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315 if not for_subprocess
and sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
2318 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319 if sys
.platform
.startswith('java'):
2322 return s
.encode(get_subprocess_encoding(), 'ignore')
2325 def decodeFilename(b
, for_subprocess
=False):
2327 if sys
.version_info
>= (3, 0):
2330 if not isinstance(b
, bytes):
2333 return b
.decode(get_subprocess_encoding(), 'ignore')
2336 def encodeArgument(s
):
2337 if not isinstance(s
, compat_str
):
2338 # Legacy code that uses byte strings
2339 # Uncomment the following line after fixing all post processors
2340 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2341 s
= s
.decode('ascii')
2342 return encodeFilename(s
, True)
2345 def decodeArgument(b
):
2346 return decodeFilename(b
, True)
2349 def decodeOption(optval
):
2352 if isinstance(optval
, bytes):
2353 optval
= optval
.decode(preferredencoding())
2355 assert isinstance(optval
, compat_str
)
2359 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2362 def timetuple_from_msec(msec
):
2363 secs
, msec
= divmod(msec
, 1000)
2364 mins
, secs
= divmod(secs
, 60)
2365 hrs
, mins
= divmod(mins
, 60)
2366 return _timetuple(hrs
, mins
, secs
, msec
)
2369 def formatSeconds(secs
, delim
=':', msec
=False):
2370 time
= timetuple_from_msec(secs
* 1000)
2372 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
2374 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
2376 ret
= '%d' % time
.seconds
2377 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
2380 def _ssl_load_windows_store_certs(ssl_context
, storename
):
2381 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2383 certs
= [cert
for cert
, encoding
, trust
in ssl
.enum_certificates(storename
)
2384 if encoding
== 'x509_asn' and (
2385 trust
is True or ssl
.Purpose
.SERVER_AUTH
.oid
in trust
)]
2386 except PermissionError
:
2390 ssl_context
.load_verify_locations(cadata
=cert
)
2391 except ssl
.SSLError
:
2395 def make_HTTPS_handler(params
, **kwargs
):
2396 opts_check_certificate
= not params
.get('nocheckcertificate')
2397 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLS_CLIENT
)
2398 context
.check_hostname
= opts_check_certificate
2399 context
.verify_mode
= ssl
.CERT_REQUIRED
if opts_check_certificate
else ssl
.CERT_NONE
2400 if opts_check_certificate
:
2402 context
.load_default_certs()
2403 # Work around the issue in load_default_certs when there are bad certificates. See:
2404 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406 except ssl
.SSLError
:
2407 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408 if sys
.platform
== 'win32' and hasattr(ssl
, 'enum_certificates'):
2409 # Create a new context to discard any certificates that were already loaded
2410 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLS_CLIENT
)
2411 context
.check_hostname
, context
.verify_mode
= True, ssl
.CERT_REQUIRED
2412 for storename
in ('CA', 'ROOT'):
2413 _ssl_load_windows_store_certs(context
, storename
)
2414 context
.set_default_verify_paths()
2415 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
2418 def bug_reports_message(before
=';'):
2419 if ytdl_is_updateable():
2420 update_cmd
= 'type yt-dlp -U to update'
2422 update_cmd
= 'see https://github.com/yt-dlp/yt-dlp on how to update'
2423 msg
= 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2424 msg
+= ' Make sure you are using the latest version; %s.' % update_cmd
2425 msg
+= ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2427 before
= before
.rstrip()
2428 if not before
or before
.endswith(('.', '!', '?')):
2429 msg
= msg
[0].title() + msg
[1:]
2431 return (before
+ ' ' if before
else '') + msg
2434 class YoutubeDLError(Exception):
2435 """Base exception for YoutubeDL errors."""
2439 network_exceptions
= [compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
]
2440 if hasattr(ssl
, 'CertificateError'):
2441 network_exceptions
.append(ssl
.CertificateError
)
2442 network_exceptions
= tuple(network_exceptions
)
2445 class ExtractorError(YoutubeDLError
):
2446 """Error during info extraction."""
2448 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
2449 """ tb, if given, is the original traceback (so that it can be printed out).
2450 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2452 if sys
.exc_info()[0] in network_exceptions
:
2457 self
.expected
= expected
2459 self
.video_id
= video_id
2461 self
.exc_info
= sys
.exc_info() # preserve original exception
2463 super(ExtractorError
, self
).__init
__(''.join((
2464 format_field(ie
, template
='[%s] '),
2465 format_field(video_id
, template
='%s: '),
2467 format_field(cause
, template
=' (caused by %r)'),
2468 '' if expected
else bug_reports_message())))
2470 def format_traceback(self
):
2471 if self
.traceback
is None:
2473 return ''.join(traceback
.format_tb(self
.traceback
))
2476 class UnsupportedError(ExtractorError
):
2477 def __init__(self
, url
):
2478 super(UnsupportedError
, self
).__init
__(
2479 'Unsupported URL: %s' % url
, expected
=True)
2483 class RegexNotFoundError(ExtractorError
):
2484 """Error when a regex didn't match"""
2488 class GeoRestrictedError(ExtractorError
):
2489 """Geographic restriction Error exception.
2491 This exception may be thrown when a video is not available from your
2492 geographic location due to geographic restrictions imposed by a website.
2495 def __init__(self
, msg
, countries
=None, **kwargs
):
2496 kwargs
['expected'] = True
2497 super(GeoRestrictedError
, self
).__init
__(msg
, **kwargs
)
2498 self
.countries
= countries
2501 class DownloadError(YoutubeDLError
):
2502 """Download Error exception.
2504 This exception may be thrown by FileDownloader objects if they are not
2505 configured to continue on errors. They will contain the appropriate
2509 def __init__(self
, msg
, exc_info
=None):
2510 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511 super(DownloadError
, self
).__init
__(msg
)
2512 self
.exc_info
= exc_info
2515 class EntryNotInPlaylist(YoutubeDLError
):
2516 """Entry not in playlist exception.
2518 This exception will be thrown by YoutubeDL when a requested entry
2519 is not found in the playlist info_dict
2524 class SameFileError(YoutubeDLError
):
2525 """Same File exception.
2527 This exception will be thrown by FileDownloader objects if they detect
2528 multiple files would have to be downloaded to the same file on disk.
2533 class PostProcessingError(YoutubeDLError
):
2534 """Post Processing exception.
2536 This exception may be raised by PostProcessor's .run() method to
2537 indicate an error in the postprocessing task.
2540 def __init__(self
, msg
):
2541 super(PostProcessingError
, self
).__init
__(msg
)
2545 class DownloadCancelled(YoutubeDLError
):
2546 """ Exception raised when the download queue should be interrupted """
2547 msg
= 'The download was cancelled'
2549 def __init__(self
, msg
=None):
2552 YoutubeDLError
.__init
__(self
, self
.msg
)
2555 class ExistingVideoReached(DownloadCancelled
):
2556 """ --break-on-existing triggered """
2557 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2560 class RejectedVideoReached(DownloadCancelled
):
2561 """ --break-on-reject triggered """
2562 msg
= 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2565 class MaxDownloadsReached(DownloadCancelled
):
2566 """ --max-downloads limit has been reached. """
2567 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
2570 class ThrottledDownload(YoutubeDLError
):
2571 """ Download speed below --throttled-rate. """
2575 class UnavailableVideoError(YoutubeDLError
):
2576 """Unavailable Format exception.
2578 This exception will be thrown when a video is requested
2579 in a format that is not available for that video.
2584 class ContentTooShortError(YoutubeDLError
):
2585 """Content Too Short exception.
2587 This exception may be raised by FileDownloader objects when a file they
2588 download is too small for what the server announced first, indicating
2589 the connection was probably interrupted.
2592 def __init__(self
, downloaded
, expected
):
2593 super(ContentTooShortError
, self
).__init
__(
2594 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
)
2597 self
.downloaded
= downloaded
2598 self
.expected
= expected
2601 class XAttrMetadataError(YoutubeDLError
):
2602 def __init__(self
, code
=None, msg
='Unknown error'):
2603 super(XAttrMetadataError
, self
).__init
__(msg
)
2607 # Parsing code and msg
2608 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
2609 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
2610 self
.reason
= 'NO_SPACE'
2611 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
2612 self
.reason
= 'VALUE_TOO_LONG'
2614 self
.reason
= 'NOT_SUPPORTED'
2617 class XAttrUnavailableError(YoutubeDLError
):
2621 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
):
2622 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2623 # expected HTTP responses to meet HTTP/1.0 or later (see also
2624 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2625 if sys
.version_info
< (3, 0):
2626 kwargs
['strict'] = True
2627 hc
= http_class(*args
, **compat_kwargs(kwargs
))
2628 source_address
= ydl_handler
._params
.get('source_address')
2630 if source_address
is not None:
2631 # This is to workaround _create_connection() from socket where it will try all
2632 # address data from getaddrinfo() including IPv6. This filters the result from
2633 # getaddrinfo() based on the source_address value.
2634 # This is based on the cpython socket.create_connection() function.
2635 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2636 def _create_connection(address
, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None):
2637 host
, port
= address
2639 addrs
= socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
)
2640 af
= socket
.AF_INET
if '.' in source_address
[0] else socket
.AF_INET6
2641 ip_addrs
= [addr
for addr
in addrs
if addr
[0] == af
]
2642 if addrs
and not ip_addrs
:
2643 ip_version
= 'v4' if af
== socket
.AF_INET
else 'v6'
2645 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2646 % (ip_version
, source_address
[0]))
2647 for res
in ip_addrs
:
2648 af
, socktype
, proto
, canonname
, sa
= res
2651 sock
= socket
.socket(af
, socktype
, proto
)
2652 if timeout
is not socket
._GLOBAL
_DEFAULT
_TIMEOUT
:
2653 sock
.settimeout(timeout
)
2654 sock
.bind(source_address
)
2656 err
= None # Explicitly break reference cycle
2658 except socket
.error
as _
:
2660 if sock
is not None:
2665 raise socket
.error('getaddrinfo returns an empty list')
2666 if hasattr(hc
, '_create_connection'):
2667 hc
._create
_connection
= _create_connection
2668 sa
= (source_address
, 0)
2669 if hasattr(hc
, 'source_address'): # Python 2.7+
2670 hc
.source_address
= sa
2672 def _hc_connect(self
, *args
, **kwargs
):
2673 sock
= _create_connection(
2674 (self
.host
, self
.port
), self
.timeout
, sa
)
2676 self
.sock
= ssl
.wrap_socket(
2677 sock
, self
.key_file
, self
.cert_file
,
2678 ssl_version
=ssl
.PROTOCOL_TLSv1
)
2681 hc
.connect
= functools
.partial(_hc_connect
, hc
)
2686 def handle_youtubedl_headers(headers
):
2687 filtered_headers
= headers
2689 if 'Youtubedl-no-compression' in filtered_headers
:
2690 filtered_headers
= dict((k
, v
) for k
, v
in filtered_headers
.items() if k
.lower() != 'accept-encoding')
2691 del filtered_headers
['Youtubedl-no-compression']
2693 return filtered_headers
2696 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
2697 """Handler for HTTP requests and responses.
2699 This class, when installed with an OpenerDirector, automatically adds
2700 the standard headers to every HTTP request and handles gzipped and
2701 deflated responses from web servers. If compression is to be avoided in
2702 a particular request, the original request in the program code only has
2703 to include the HTTP header "Youtubedl-no-compression", which will be
2704 removed before making the real request.
2706 Part of this code was copied from:
2708 http://techknack.net/python-urllib2-handlers/
2710 Andrew Rowls, the author of that code, agreed to release it to the
2714 def __init__(self
, params
, *args
, **kwargs
):
2715 compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
)
2716 self
._params
= params
2718 def http_open(self
, req
):
2719 conn_class
= compat_http_client
.HTTPConnection
2721 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
2723 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
2724 del req
.headers
['Ytdl-socks-proxy']
2726 return self
.do_open(functools
.partial(
2727 _create_http_connection
, self
, conn_class
, False),
2735 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
2737 return zlib
.decompress(data
)
2739 def http_request(self
, req
):
2740 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2741 # always respected by websites, some tend to give out URLs with non percent-encoded
2742 # non-ASCII characters (see telemb.py, ard.py [#3412])
2743 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2744 # To work around aforementioned issue we will replace request's original URL with
2745 # percent-encoded one
2746 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2747 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2748 url
= req
.get_full_url()
2749 url_escaped
= escape_url(url
)
2751 # Substitute URL if any change after escaping
2752 if url
!= url_escaped
:
2753 req
= update_Request(req
, url
=url_escaped
)
2755 for h
, v
in std_headers
.items():
2756 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2757 # The dict keys are capitalized because of this bug by urllib
2758 if h
.capitalize() not in req
.headers
:
2759 req
.add_header(h
, v
)
2761 req
.headers
= handle_youtubedl_headers(req
.headers
)
2763 if sys
.version_info
< (2, 7) and '#' in req
.get_full_url():
2764 # Python 2.6 is brain-dead when it comes to fragments
2765 req
._Request
__original
= req
._Request
__original
.partition('#')[0]
2766 req
._Request
__r
_type
= req
._Request
__r
_type
.partition('#')[0]
2770 def http_response(self
, req
, resp
):
2773 if resp
.headers
.get('Content-encoding', '') == 'gzip':
2774 content
= resp
.read()
2775 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
2777 uncompressed
= io
.BytesIO(gz
.read())
2778 except IOError as original_ioerror
:
2779 # There may be junk add the end of the file
2780 # See http://stackoverflow.com/q/4928560/35070 for details
2781 for i
in range(1, 1024):
2783 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
2784 uncompressed
= io
.BytesIO(gz
.read())
2789 raise original_ioerror
2790 resp
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
2791 resp
.msg
= old_resp
.msg
2792 del resp
.headers
['Content-encoding']
2794 if resp
.headers
.get('Content-encoding', '') == 'deflate':
2795 gz
= io
.BytesIO(self
.deflate(resp
.read()))
2796 resp
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
2797 resp
.msg
= old_resp
.msg
2798 del resp
.headers
['Content-encoding']
2799 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2800 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2801 if 300 <= resp
.code
< 400:
2802 location
= resp
.headers
.get('Location')
2804 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2805 if sys
.version_info
>= (3, 0):
2806 location
= location
.encode('iso-8859-1').decode('utf-8')
2808 location
= location
.decode('utf-8')
2809 location_escaped
= escape_url(location
)
2810 if location
!= location_escaped
:
2811 del resp
.headers
['Location']
2812 if sys
.version_info
< (3, 0):
2813 location_escaped
= location_escaped
.encode('utf-8')
2814 resp
.headers
['Location'] = location_escaped
2817 https_request
= http_request
2818 https_response
= http_response
2821 def make_socks_conn_class(base_class
, socks_proxy
):
2822 assert issubclass(base_class
, (
2823 compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
))
2825 url_components
= compat_urlparse
.urlparse(socks_proxy
)
2826 if url_components
.scheme
.lower() == 'socks5':
2827 socks_type
= ProxyType
.SOCKS5
2828 elif url_components
.scheme
.lower() in ('socks', 'socks4'):
2829 socks_type
= ProxyType
.SOCKS4
2830 elif url_components
.scheme
.lower() == 'socks4a':
2831 socks_type
= ProxyType
.SOCKS4A
2833 def unquote_if_non_empty(s
):
2836 return compat_urllib_parse_unquote_plus(s
)
2840 url_components
.hostname
, url_components
.port
or 1080,
2842 unquote_if_non_empty(url_components
.username
),
2843 unquote_if_non_empty(url_components
.password
),
2846 class SocksConnection(base_class
):
2848 self
.sock
= sockssocket()
2849 self
.sock
.setproxy(*proxy_args
)
2850 if type(self
.timeout
) in (int, float):
2851 self
.sock
.settimeout(self
.timeout
)
2852 self
.sock
.connect((self
.host
, self
.port
))
2854 if isinstance(self
, compat_http_client
.HTTPSConnection
):
2855 if hasattr(self
, '_context'): # Python > 2.6
2856 self
.sock
= self
._context
.wrap_socket(
2857 self
.sock
, server_hostname
=self
.host
)
2859 self
.sock
= ssl
.wrap_socket(self
.sock
)
2861 return SocksConnection
2864 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
):
2865 def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
):
2866 compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
)
2867 self
._https
_conn
_class
= https_conn_class
or compat_http_client
.HTTPSConnection
2868 self
._params
= params
2870 def https_open(self
, req
):
2872 conn_class
= self
._https
_conn
_class
2874 if hasattr(self
, '_context'): # python > 2.6
2875 kwargs
['context'] = self
._context
2876 if hasattr(self
, '_check_hostname'): # python 3.x
2877 kwargs
['check_hostname'] = self
._check
_hostname
2879 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
2881 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
2882 del req
.headers
['Ytdl-socks-proxy']
2884 return self
.do_open(functools
.partial(
2885 _create_http_connection
, self
, conn_class
, True),
2889 class YoutubeDLCookieJar(compat_cookiejar
.MozillaCookieJar
):
2891 See [1] for cookie file format.
2893 1. https://curl.haxx.se/docs/http-cookies.html
2895 _HTTPONLY_PREFIX
= '#HttpOnly_'
2897 _HEADER
= '''# Netscape HTTP Cookie File
2898 # This file is generated by yt-dlp. Do not edit.
2901 _CookieFileEntry
= collections
.namedtuple(
2903 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2905 def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
2907 Save cookies to a file.
2909 Most of the code is taken from CPython 3.8 and slightly adapted
2910 to support cookie files with UTF-8 in both python 2 and 3.
2912 if filename
is None:
2913 if self
.filename
is not None:
2914 filename
= self
.filename
2916 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
)
2918 # Store session cookies with `expires` set to 0 instead of an empty
2921 if cookie
.expires
is None:
2924 with io
.open(filename
, 'w', encoding
='utf-8') as f
:
2925 f
.write(self
._HEADER
)
2928 if not ignore_discard
and cookie
.discard
:
2930 if not ignore_expires
and cookie
.is_expired(now
):
2936 if cookie
.domain
.startswith('.'):
2937 initial_dot
= 'TRUE'
2939 initial_dot
= 'FALSE'
2940 if cookie
.expires
is not None:
2941 expires
= compat_str(cookie
.expires
)
2944 if cookie
.value
is None:
2945 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2946 # with no name, whereas http.cookiejar regards it as a
2947 # cookie with no value.
2952 value
= cookie
.value
2954 '\t'.join([cookie
.domain
, initial_dot
, cookie
.path
,
2955 secure
, expires
, name
, value
]) + '\n')
2957 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
2958 """Load cookies from a file."""
2959 if filename
is None:
2960 if self
.filename
is not None:
2961 filename
= self
.filename
2963 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
)
2965 def prepare_line(line
):
2966 if line
.startswith(self
._HTTPONLY
_PREFIX
):
2967 line
= line
[len(self
._HTTPONLY
_PREFIX
):]
2968 # comments and empty lines are fine
2969 if line
.startswith('#') or not line
.strip():
2971 cookie_list
= line
.split('\t')
2972 if len(cookie_list
) != self
._ENTRY
_LEN
:
2973 raise compat_cookiejar
.LoadError('invalid length %d' % len(cookie_list
))
2974 cookie
= self
._CookieFileEntry
(*cookie_list
)
2975 if cookie
.expires_at
and not cookie
.expires_at
.isdigit():
2976 raise compat_cookiejar
.LoadError('invalid expires at %s' % cookie
.expires_at
)
2980 with io
.open(filename
, encoding
='utf-8') as f
:
2983 cf
.write(prepare_line(line
))
2984 except compat_cookiejar
.LoadError
as e
:
2986 'WARNING: skipping cookie file entry due to %s: %r\n'
2987 % (e
, line
), sys
.stderr
)
2990 self
._really
_load
(cf
, filename
, ignore_discard
, ignore_expires
)
2991 # Session cookies are denoted by either `expires` field set to
2992 # an empty string or 0. MozillaCookieJar only recognizes the former
2993 # (see [1]). So we need force the latter to be recognized as session
2994 # cookies on our own.
2995 # Session cookies may be important for cookies-based authentication,
2996 # e.g. usually, when user does not check 'Remember me' check box while
2997 # logging in on a site, some important cookies are stored as session
2998 # cookies so that not recognizing them will result in failed login.
2999 # 1. https://bugs.python.org/issue17164
3001 # Treat `expires=0` cookies as session cookies
3002 if cookie
.expires
== 0:
3003 cookie
.expires
= None
3004 cookie
.discard
= True
3007 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
):
3008 def __init__(self
, cookiejar
=None):
3009 compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
)
3011 def http_response(self
, request
, response
):
3012 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3013 # characters in Set-Cookie HTTP header of last response (see
3014 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3015 # In order to at least prevent crashing we will percent encode Set-Cookie
3016 # header before HTTPCookieProcessor starts processing it.
3017 # if sys.version_info < (3, 0) and response.headers:
3018 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3019 # set_cookie = response.headers.get(set_cookie_header)
3021 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3022 # if set_cookie != set_cookie_escaped:
3023 # del response.headers[set_cookie_header]
3024 # response.headers[set_cookie_header] = set_cookie_escaped
3025 return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
)
3027 https_request
= compat_urllib_request
.HTTPCookieProcessor
.http_request
3028 https_response
= http_response
3031 class YoutubeDLRedirectHandler(compat_urllib_request
.HTTPRedirectHandler
):
3032 """YoutubeDL redirect handler
3034 The code is based on HTTPRedirectHandler implementation from CPython [1].
3036 This redirect handler solves two issues:
3037 - ensures redirect URL is always unicode under python 2
3038 - introduces support for experimental HTTP response status code
3039 308 Permanent Redirect [2] used by some sites [3]
3041 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3042 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3043 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3046 http_error_301
= http_error_303
= http_error_307
= http_error_308
= compat_urllib_request
.HTTPRedirectHandler
.http_error_302
3048 def redirect_request(self
, req
, fp
, code
, msg
, headers
, newurl
):
3049 """Return a Request or None in response to a redirect.
3051 This is called by the http_error_30x methods when a
3052 redirection response is received. If a redirection should
3053 take place, return a new Request to allow http_error_30x to
3054 perform the redirect. Otherwise, raise HTTPError if no-one
3055 else should try to handle this url. Return None if you can't
3056 but another Handler might.
3058 m
= req
.get_method()
3059 if (not (code
in (301, 302, 303, 307, 308) and m
in ("GET", "HEAD")
3060 or code
in (301, 302, 303) and m
== "POST")):
3061 raise compat_HTTPError(req
.full_url
, code
, msg
, headers
, fp
)
3062 # Strictly (according to RFC 2616), 301 or 302 in response to
3063 # a POST MUST NOT cause a redirection without confirmation
3064 # from the user (of urllib.request, in this case). In practice,
3065 # essentially all clients do redirect in this case, so we do
3068 # On python 2 urlh.geturl() may sometimes return redirect URL
3069 # as byte string instead of unicode. This workaround allows
3070 # to force it always return unicode.
3071 if sys
.version_info
[0] < 3:
3072 newurl
= compat_str(newurl
)
3074 # Be conciliant with URIs containing a space. This is mainly
3075 # redundant with the more complete encoding done in http_error_302(),
3076 # but it is kept for compatibility with other callers.
3077 newurl
= newurl
.replace(' ', '%20')
3079 CONTENT_HEADERS
= ("content-length", "content-type")
3080 # NB: don't use dict comprehension for python 2.6 compatibility
3081 newheaders
= dict((k
, v
) for k
, v
in req
.headers
.items()
3082 if k
.lower() not in CONTENT_HEADERS
)
3083 return compat_urllib_request
.Request(
3084 newurl
, headers
=newheaders
, origin_req_host
=req
.origin_req_host
,
3088 def extract_timezone(date_str
):
3091 ^.{8,}? # >=8 char non-TZ prefix, if present
3092 (?P<tz>Z| # just the UTC Z, or
3093 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3094 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3095 [ ]? # optional space
3096 (?P<sign>\+|-) # +/-
3097 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3101 timezone
= datetime
.timedelta()
3103 date_str
= date_str
[:-len(m
.group('tz'))]
3104 if not m
.group('sign'):
3105 timezone
= datetime
.timedelta()
3107 sign
= 1 if m
.group('sign') == '+' else -1
3108 timezone
= datetime
.timedelta(
3109 hours
=sign
* int(m
.group('hours')),
3110 minutes
=sign
* int(m
.group('minutes')))
3111 return timezone
, date_str
3114 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
3115 """ Return a UNIX timestamp from the given date """
3117 if date_str
is None:
3120 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
3122 if timezone
is None:
3123 timezone
, date_str
= extract_timezone(date_str
)
3126 date_format
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
)
3127 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
3128 return calendar
.timegm(dt
.timetuple())
3133 def date_formats(day_first
=True):
3134 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
3137 def unified_strdate(date_str
, day_first
=True):
3138 """Return a string with the date in the format YYYYMMDD"""
3140 if date_str
is None:
3144 date_str
= date_str
.replace(',', ' ')
3145 # Remove AM/PM + timezone
3146 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
3147 _
, date_str
= extract_timezone(date_str
)
3149 for expression
in date_formats(day_first
):
3151 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
3154 if upload_date
is None:
3155 timetuple
= email
.utils
.parsedate_tz(date_str
)
3158 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
3161 if upload_date
is not None:
3162 return compat_str(upload_date
)
3165 def unified_timestamp(date_str
, day_first
=True):
3166 if date_str
is None:
3169 date_str
= re
.sub(r
'[,|]', '', date_str
)
3171 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
3172 timezone
, date_str
= extract_timezone(date_str
)
3174 # Remove AM/PM + timezone
3175 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
3177 # Remove unrecognized timezones from ISO 8601 alike timestamps
3178 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
3180 date_str
= date_str
[:-len(m
.group('tz'))]
3182 # Python only supports microseconds, so remove nanoseconds
3183 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
3185 date_str
= m
.group(1)
3187 for expression
in date_formats(day_first
):
3189 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
3190 return calendar
.timegm(dt
.timetuple())
3193 timetuple
= email
.utils
.parsedate_tz(date_str
)
3195 return calendar
.timegm(timetuple
) + pm_delta
* 3600
3198 def determine_ext(url
, default_ext
='unknown_video'):
3199 if url
is None or '.' not in url
:
3201 guess
= url
.partition('?')[0].rpartition('.')[2]
3202 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
3204 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3205 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
3206 return guess
.rstrip('/')
3211 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
3212 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
3215 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
3217 Return a datetime object from a string in the format YYYYMMDD or
3218 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3220 format: string date format used to return datetime object from
3221 precision: round the time portion of a datetime object.
3222 auto|microsecond|second|minute|hour|day.
3223 auto: round to the unit provided in date_str (if applicable).
3225 auto_precision
= False
3226 if precision
== 'auto':
3227 auto_precision
= True
3228 precision
= 'microsecond'
3229 today
= datetime_round(datetime
.datetime
.now(), precision
)
3230 if date_str
in ('now', 'today'):
3232 if date_str
== 'yesterday':
3233 return today
- datetime
.timedelta(days
=1)
3235 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3237 if match
is not None:
3238 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
3239 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
3240 unit
= match
.group('unit')
3241 if unit
== 'month' or unit
== 'year':
3242 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
3248 delta
= datetime
.timedelta(**{unit + 's': time}
)
3249 new_date
= start_time
+ delta
3251 return datetime_round(new_date
, unit
)
3254 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
3257 def date_from_str(date_str
, format
='%Y%m%d'):
3259 Return a datetime object from a string in the format YYYYMMDD or
3260 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3262 format: string date format used to return datetime object from
3264 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
3267 def datetime_add_months(dt
, months
):
3268 """Increment/Decrement a datetime object by months."""
3269 month
= dt
.month
+ months
- 1
3270 year
= dt
.year
+ month
// 12
3271 month
= month
% 12 + 1
3272 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
3273 return dt
.replace(year
, month
, day
)
3276 def datetime_round(dt
, precision
='day'):
3278 Round a datetime object's time to a specific precision
3280 if precision
== 'microsecond':
3289 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
3290 timestamp
= calendar
.timegm(dt
.timetuple())
3291 return datetime
.datetime
.utcfromtimestamp(roundto(timestamp
, unit_seconds
[precision
]))
3294 def hyphenate_date(date_str
):
3296 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3297 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
3298 if match
is not None:
3299 return '-'.join(match
.groups())
3304 class DateRange(object):
3305 """Represents a time interval between two dates"""
3307 def __init__(self
, start
=None, end
=None):
3308 """start and end must be strings in the format accepted by date"""
3309 if start
is not None:
3310 self
.start
= date_from_str(start
)
3312 self
.start
= datetime
.datetime
.min.date()
3314 self
.end
= date_from_str(end
)
3316 self
.end
= datetime
.datetime
.max.date()
3317 if self
.start
> self
.end
:
3318 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
3322 """Returns a range that only contains the given day"""
3323 return cls(day
, day
)
3325 def __contains__(self
, date
):
3326 """Check if the date is in the range"""
3327 if not isinstance(date
, datetime
.date
):
3328 date
= date_from_str(date
)
3329 return self
.start
<= date
<= self
.end
3332 return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat())
3335 def platform_name():
3336 """ Returns the platform name as a compat_str """
3337 res
= platform
.platform()
3338 if isinstance(res
, bytes):
3339 res
= res
.decode(preferredencoding())
3341 assert isinstance(res
, compat_str
)
3345 def get_windows_version():
3346 ''' Get Windows version. None if it's not running on Windows '''
3347 if compat_os_name
== 'nt':
3348 return version_tuple(platform
.win32_ver()[1])
3353 def _windows_write_string(s
, out
):
3354 """ Returns True if the string was written using special methods,
3355 False if it has yet to be written out."""
3356 # Adapted from http://stackoverflow.com/a/3259271/35070
3359 import ctypes
.wintypes
3367 fileno
= out
.fileno()
3368 except AttributeError:
3369 # If the output stream doesn't have a fileno, it's virtual
3371 except io
.UnsupportedOperation
:
3372 # Some strange Windows pseudo files?
3374 if fileno
not in WIN_OUTPUT_IDS
:
3377 GetStdHandle
= compat_ctypes_WINFUNCTYPE(
3378 ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)(
3379 ('GetStdHandle', ctypes
.windll
.kernel32
))
3380 h
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
])
3382 WriteConsoleW
= compat_ctypes_WINFUNCTYPE(
3383 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
,
3384 ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
),
3385 ctypes
.wintypes
.LPVOID
)(('WriteConsoleW', ctypes
.windll
.kernel32
))
3386 written
= ctypes
.wintypes
.DWORD(0)
3388 GetFileType
= compat_ctypes_WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(('GetFileType', ctypes
.windll
.kernel32
))
3389 FILE_TYPE_CHAR
= 0x0002
3390 FILE_TYPE_REMOTE
= 0x8000
3391 GetConsoleMode
= compat_ctypes_WINFUNCTYPE(
3392 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
,
3393 ctypes
.POINTER(ctypes
.wintypes
.DWORD
))(
3394 ('GetConsoleMode', ctypes
.windll
.kernel32
))
3395 INVALID_HANDLE_VALUE
= ctypes
.wintypes
.DWORD(-1).value
3397 def not_a_console(handle
):
3398 if handle
== INVALID_HANDLE_VALUE
or handle
is None:
3400 return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
3401 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0)
3403 if not_a_console(h
):
3406 def next_nonbmp_pos(s
):
3408 return next(i
for i
, c
in enumerate(s
) if ord(c
) > 0xffff)
3409 except StopIteration:
3413 count
= min(next_nonbmp_pos(s
), 1024)
3415 ret
= WriteConsoleW(
3416 h
, s
, count
if count
else 2, ctypes
.byref(written
), None)
3418 raise OSError('Failed to write string')
3419 if not count
: # We just wrote a non-BMP character
3420 assert written
.value
== 2
3423 assert written
.value
> 0
3424 s
= s
[written
.value
:]
3428 def write_string(s
, out
=None, encoding
=None):
3431 assert type(s
) == compat_str
3433 if sys
.platform
== 'win32' and encoding
is None and hasattr(out
, 'fileno'):
3434 if _windows_write_string(s
, out
):
3437 if ('b' in getattr(out
, 'mode', '')
3438 or sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
3439 byt
= s
.encode(encoding
or preferredencoding(), 'ignore')
3441 elif hasattr(out
, 'buffer'):
3442 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
3443 byt
= s
.encode(enc
, 'ignore')
3444 out
.buffer.write(byt
)
3450 def bytes_to_intlist(bs
):
3453 if isinstance(bs
[0], int): # Python 3
3456 return [ord(c
) for c
in bs
]
3459 def intlist_to_bytes(xs
):
3462 return compat_struct_pack('%dB' % len(xs
), *xs
)
3465 # Cross-platform file locking
3466 if sys
.platform
== 'win32':
3467 import ctypes
.wintypes
3470 class OVERLAPPED(ctypes
.Structure
):
3472 ('Internal', ctypes
.wintypes
.LPVOID
),
3473 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
3474 ('Offset', ctypes
.wintypes
.DWORD
),
3475 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
3476 ('hEvent', ctypes
.wintypes
.HANDLE
),
3479 kernel32
= ctypes
.windll
.kernel32
3480 LockFileEx
= kernel32
.LockFileEx
3481 LockFileEx
.argtypes
= [
3482 ctypes
.wintypes
.HANDLE
, # hFile
3483 ctypes
.wintypes
.DWORD
, # dwFlags
3484 ctypes
.wintypes
.DWORD
, # dwReserved
3485 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
3486 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
3487 ctypes
.POINTER(OVERLAPPED
) # Overlapped
3489 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
3490 UnlockFileEx
= kernel32
.UnlockFileEx
3491 UnlockFileEx
.argtypes
= [
3492 ctypes
.wintypes
.HANDLE
, # hFile
3493 ctypes
.wintypes
.DWORD
, # dwReserved
3494 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
3495 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
3496 ctypes
.POINTER(OVERLAPPED
) # Overlapped
3498 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
3499 whole_low
= 0xffffffff
3500 whole_high
= 0x7fffffff
3502 def _lock_file(f
, exclusive
):
3503 overlapped
= OVERLAPPED()
3504 overlapped
.Offset
= 0
3505 overlapped
.OffsetHigh
= 0
3506 overlapped
.hEvent
= 0
3507 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
3508 handle
= msvcrt
.get_osfhandle(f
.fileno())
3509 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
3510 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
3511 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
3513 def _unlock_file(f
):
3514 assert f
._lock
_file
_overlapped
_p
3515 handle
= msvcrt
.get_osfhandle(f
.fileno())
3516 if not UnlockFileEx(handle
, 0,
3517 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
3518 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
3521 # Some platforms, such as Jython, is missing fcntl
3525 def _lock_file(f
, exclusive
):
3526 fcntl
.flock(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
3528 def _unlock_file(f
):
3529 fcntl
.flock(f
, fcntl
.LOCK_UN
)
3531 UNSUPPORTED_MSG
= 'file locking is not supported on this platform'
3533 def _lock_file(f
, exclusive
):
3534 raise IOError(UNSUPPORTED_MSG
)
3536 def _unlock_file(f
):
3537 raise IOError(UNSUPPORTED_MSG
)
3540 class locked_file(object):
3541 def __init__(self
, filename
, mode
, encoding
=None):
3542 assert mode
in ['r', 'a', 'w']
3543 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
3546 def __enter__(self
):
3547 exclusive
= self
.mode
!= 'r'
3549 _lock_file(self
.f
, exclusive
)
3555 def __exit__(self
, etype
, value
, traceback
):
3557 _unlock_file(self
.f
)
3564 def write(self
, *args
):
3565 return self
.f
.write(*args
)
3567 def read(self
, *args
):
3568 return self
.f
.read(*args
)
3571 def get_filesystem_encoding():
3572 encoding
= sys
.getfilesystemencoding()
3573 return encoding
if encoding
is not None else 'utf-8'
3576 def shell_quote(args
):
3578 encoding
= get_filesystem_encoding()
3580 if isinstance(a
, bytes):
3581 # We may get a filename encoded with 'encodeFilename'
3582 a
= a
.decode(encoding
)
3583 quoted_args
.append(compat_shlex_quote(a
))
3584 return ' '.join(quoted_args
)
3587 def smuggle_url(url
, data
):
3588 """ Pass additional data in a URL for internal use. """
3590 url
, idata
= unsmuggle_url(url
, {})
3592 sdata
= compat_urllib_parse_urlencode(
3593 {'__youtubedl_smuggle': json.dumps(data)}
)
3594 return url
+ '#' + sdata
3597 def unsmuggle_url(smug_url
, default
=None):
3598 if '#__youtubedl_smuggle' not in smug_url
:
3599 return smug_url
, default
3600 url
, _
, sdata
= smug_url
.rpartition('#')
3601 jsond
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0]
3602 data
= json
.loads(jsond
)
3606 def format_bytes(bytes):
3609 if type(bytes) is str:
3610 bytes = float(bytes)
3614 exponent
= int(math
.log(bytes, 1024.0))
3615 suffix
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
]
3616 converted
= float(bytes) / float(1024 ** exponent
)
3617 return '%.2f%s' % (converted
, suffix
)
3620 def lookup_unit_table(unit_table
, s
):
3621 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
3623 r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
)
3626 num_str
= m
.group('num').replace(',', '.')
3627 mult
= unit_table
[m
.group('unit')]
3628 return int(float(num_str
) * mult
)
3631 def parse_filesize(s
):
3635 # The lower-case forms are of course incorrect and unofficial,
3636 # but we support those too
3653 'megabytes': 1000 ** 2,
3654 'mebibytes': 1024 ** 2,
3660 'gigabytes': 1000 ** 3,
3661 'gibibytes': 1024 ** 3,
3667 'terabytes': 1000 ** 4,
3668 'tebibytes': 1024 ** 4,
3674 'petabytes': 1000 ** 5,
3675 'pebibytes': 1024 ** 5,
3681 'exabytes': 1000 ** 6,
3682 'exbibytes': 1024 ** 6,
3688 'zettabytes': 1000 ** 7,
3689 'zebibytes': 1024 ** 7,
3695 'yottabytes': 1000 ** 8,
3696 'yobibytes': 1024 ** 8,
3699 return lookup_unit_table(_UNIT_TABLE
, s
)
3708 if re
.match(r
'^[\d,.]+$', s
):
3709 return str_to_int(s
)
3720 return lookup_unit_table(_UNIT_TABLE
, s
)
3723 def parse_resolution(s
):
3727 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
3730 'width': int(mobj
.group('w')),
3731 'height': int(mobj
.group('h')),
3734 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
3736 return {'height': int(mobj.group(1))}
3738 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
3740 return {'height': int(mobj.group(1)) * 540}
3745 def parse_bitrate(s
):
3746 if not isinstance(s
, compat_str
):
3748 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
3750 return int(mobj
.group(1))
3753 def month_by_name(name
, lang
='en'):
3754 """ Return the number of a month by (locale-independently) English name """
3756 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
3759 return month_names
.index(name
) + 1
3764 def month_by_abbreviation(abbrev
):
3765 """ Return the number of a month by (locale-independently) English
3769 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
3774 def fix_xml_ampersands(xml_str
):
3775 """Replace all the '&' by '&' in XML"""
3777 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3782 def setproctitle(title
):
3783 assert isinstance(title
, compat_str
)
3785 # ctypes in Jython is not complete
3786 # http://bugs.jython.org/issue2148
3787 if sys
.platform
.startswith('java'):
3791 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
3795 # LoadLibrary in Windows Python 2.7.13 only expects
3796 # a bytestring, but since unicode_literals turns
3797 # every string into a unicode string, it fails.
3799 title_bytes
= title
.encode('utf-8')
3800 buf
= ctypes
.create_string_buffer(len(title_bytes
))
3801 buf
.value
= title_bytes
3803 libc
.prctl(15, buf
, 0, 0, 0)
3804 except AttributeError:
3805 return # Strange libc, just skip this
3808 def remove_start(s
, start
):
3809 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
3812 def remove_end(s
, end
):
3813 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
3816 def remove_quotes(s
):
3817 if s
is None or len(s
) < 2:
3819 for quote
in ('"', "'", ):
3820 if s
[0] == quote
and s
[-1] == quote
:
3825 def get_domain(url
):
3826 domain
= re
.match(r
'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url
)
3827 return domain
.group('domain') if domain
else None
3830 def url_basename(url
):
3831 path
= compat_urlparse
.urlparse(url
).path
3832 return path
.strip('/').split('/')[-1]
3836 return re
.match(r
'https?://[^?#&]+/', url
).group()
3839 def urljoin(base
, path
):
3840 if isinstance(path
, bytes):
3841 path
= path
.decode('utf-8')
3842 if not isinstance(path
, compat_str
) or not path
:
3844 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
3846 if isinstance(base
, bytes):
3847 base
= base
.decode('utf-8')
3848 if not isinstance(base
, compat_str
) or not re
.match(
3849 r
'^(?:https?:)?//', base
):
3851 return compat_urlparse
.urljoin(base
, path
)
3854 class HEADRequest(compat_urllib_request
.Request
):
3855 def get_method(self
):
3859 class PUTRequest(compat_urllib_request
.Request
):
3860 def get_method(self
):
3864 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
3867 v
= getattr(v
, get_attr
, None)
3873 return int(v
) * invscale
// scale
3874 except (ValueError, TypeError):
3878 def str_or_none(v
, default
=None):
3879 return default
if v
is None else compat_str(v
)
3882 def str_to_int(int_str
):
3883 """ A more relaxed version of int_or_none """
3884 if isinstance(int_str
, compat_integer_types
):
3886 elif isinstance(int_str
, compat_str
):
3887 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
3888 return int_or_none(int_str
)
3891 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
3895 return float(v
) * invscale
/ scale
3896 except (ValueError, TypeError):
3900 def bool_or_none(v
, default
=None):
3901 return v
if isinstance(v
, bool) else default
3904 def strip_or_none(v
, default
=None):
3905 return v
.strip() if isinstance(v
, compat_str
) else default
3908 def url_or_none(url
):
3909 if not url
or not isinstance(url
, compat_str
):
3912 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
3915 def strftime_or_none(timestamp
, date_format
, default
=None):
3916 datetime_object
= None
3918 if isinstance(timestamp
, compat_numeric_types
): # unix timestamp
3919 datetime_object
= datetime
.datetime
.utcfromtimestamp(timestamp
)
3920 elif isinstance(timestamp
, compat_str
): # assume YYYYMMDD
3921 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
3922 return datetime_object
.strftime(date_format
)
3923 except (ValueError, TypeError, AttributeError):
3927 def parse_duration(s
):
3928 if not isinstance(s
, compat_basestring
):
3933 days
, hours
, mins
, secs
, ms
= [None] * 5
3934 m
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
)
3936 days
, hours
, mins
, secs
, ms
= m
.groups()
3941 [0-9]+\s*y(?:ears?)?\s*
3944 [0-9]+\s*m(?:onths?)?\s*
3947 [0-9]+\s*w(?:eeks?)?\s*
3950 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3954 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3957 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3960 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3963 days
, hours
, mins
, secs
, ms
= m
.groups()
3965 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
3967 hours
, mins
= m
.groups()
3973 duration
+= float(secs
)
3975 duration
+= float(mins
) * 60
3977 duration
+= float(hours
) * 60 * 60
3979 duration
+= float(days
) * 24 * 60 * 60
3981 duration
+= float(ms
)
3985 def prepend_extension(filename
, ext
, expected_real_ext
=None):
3986 name
, real_ext
= os
.path
.splitext(filename
)
3988 '{0}.{1}{2}'.format(name
, ext
, real_ext
)
3989 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
3990 else '{0}.{1}'.format(filename
, ext
))
3993 def replace_extension(filename
, ext
, expected_real_ext
=None):
3994 name
, real_ext
= os
.path
.splitext(filename
)
3995 return '{0}.{1}'.format(
3996 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
4000 def check_executable(exe
, args
=[]):
4001 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4002 args can be a list of arguments for a short output (like -version) """
4004 Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate_or_kill()
4010 def get_exe_version(exe
, args
=['--version'],
4011 version_re
=None, unrecognized
='present'):
4012 """ Returns the version of the specified executable,
4013 or False if the executable is not present """
4015 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4016 # SIGTTOU if yt-dlp is run in the background.
4017 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4019 [encodeArgument(exe
)] + args
, stdin
=subprocess
.PIPE
,
4020 stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate_or_kill()
4023 if isinstance(out
, bytes): # Python 2.x
4024 out
= out
.decode('ascii', 'ignore')
4025 return detect_exe_version(out
, version_re
, unrecognized
)
4028 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
4029 assert isinstance(output
, compat_str
)
4030 if version_re
is None:
4031 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
4032 m
= re
.search(version_re
, output
)
4039 class LazyList(collections
.abc
.Sequence
):
4040 ''' Lazy immutable list from an iterable
4041 Note that slices of a LazyList are lists and not LazyList'''
4043 class IndexError(IndexError):
4046 def __init__(self
, iterable
):
4047 self
.__iterable
= iter(iterable
)
4049 self
.__reversed
= False
4053 # We need to consume the entire iterable to iterate in reverse
4054 yield from self
.exhaust()
4056 yield from self
.__cache
4057 for item
in self
.__iterable
:
4058 self
.__cache
.append(item
)
4061 def __exhaust(self
):
4062 self
.__cache
.extend(self
.__iterable
)
4063 # Discard the emptied iterable to make it pickle-able
4064 self
.__iterable
= []
4068 ''' Evaluate the entire iterable '''
4069 return self
.__exhaust
()[::-1 if self
.__reversed
else 1]
4072 def __reverse_index(x
):
4073 return None if x
is None else -(x
+ 1)
4075 def __getitem__(self
, idx
):
4076 if isinstance(idx
, slice):
4078 idx
= slice(self
.__reverse
_index
(idx
.start
), self
.__reverse
_index
(idx
.stop
), -(idx
.step
or 1))
4079 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
4080 elif isinstance(idx
, int):
4082 idx
= self
.__reverse
_index
(idx
)
4083 start
, stop
, step
= idx
, idx
, 0
4085 raise TypeError('indices must be integers or slices')
4086 if ((start
or 0) < 0 or (stop
or 0) < 0
4087 or (start
is None and step
< 0)
4088 or (stop
is None and step
> 0)):
4089 # We need to consume the entire iterable to be able to slice from the end
4090 # Obviously, never use this with infinite iterables
4093 return self
.__cache
[idx
]
4094 except IndexError as e
:
4095 raise self
.IndexError(e
) from e
4096 n
= max(start
or 0, stop
or 0) - len(self
.__cache
) + 1
4098 self
.__cache
.extend(itertools
.islice(self
.__iterable
, n
))
4100 return self
.__cache
[idx
]
4101 except IndexError as e
:
4102 raise self
.IndexError(e
) from e
4106 self
[-1] if self
.__reversed
else self
[0]
4107 except self
.IndexError:
4113 return len(self
.__cache
)
4116 self
.__reversed
= not self
.__reversed
4120 # repr and str should mimic a list. So we exhaust the iterable
4121 return repr(self
.exhaust())
4124 return repr(self
.exhaust())
4129 # This is only useful for tests
4130 return len(self
.getslice())
4132 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
4133 self
._pagefunc
= pagefunc
4134 self
._pagesize
= pagesize
4135 self
._use
_cache
= use_cache
4138 def getpage(self
, pagenum
):
4139 page_results
= self
._cache
.get(pagenum
) or list(self
._pagefunc
(pagenum
))
4141 self
._cache
[pagenum
] = page_results
4144 def getslice(self
, start
=0, end
=None):
4145 return list(self
._getslice
(start
, end
))
4147 def _getslice(self
, start
, end
):
4148 raise NotImplementedError('This method must be implemented by subclasses')
4150 def __getitem__(self
, idx
):
4151 # NOTE: cache must be enabled if this is used
4152 if not isinstance(idx
, int) or idx
< 0:
4153 raise TypeError('indices must be non-negative integers')
4154 entries
= self
.getslice(idx
, idx
+ 1)
4155 return entries
[0] if entries
else None
4158 class OnDemandPagedList(PagedList
):
4159 def _getslice(self
, start
, end
):
4160 for pagenum
in itertools
.count(start
// self
._pagesize
):
4161 firstid
= pagenum
* self
._pagesize
4162 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
4163 if start
>= nextfirstid
:
4167 start
% self
._pagesize
4168 if firstid
<= start
< nextfirstid
4171 ((end
- 1) % self
._pagesize
) + 1
4172 if (end
is not None and firstid
<= end
<= nextfirstid
)
4175 page_results
= self
.getpage(pagenum
)
4176 if startv
!= 0 or endv
is not None:
4177 page_results
= page_results
[startv
:endv
]
4178 yield from page_results
4180 # A little optimization - if current page is not "full", ie. does
4181 # not contain page_size videos then we can assume that this page
4182 # is the last one - there are no more ids on further pages -
4183 # i.e. no need to query again.
4184 if len(page_results
) + startv
< self
._pagesize
:
4187 # If we got the whole page, but the next page is not interesting,
4188 # break out early as well
4189 if end
== nextfirstid
:
4193 class InAdvancePagedList(PagedList
):
4194 def __init__(self
, pagefunc
, pagecount
, pagesize
):
4195 self
._pagecount
= pagecount
4196 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
4198 def _getslice(self
, start
, end
):
4199 start_page
= start
// self
._pagesize
4201 self
._pagecount
if end
is None else (end
// self
._pagesize
+ 1))
4202 skip_elems
= start
- start_page
* self
._pagesize
4203 only_more
= None if end
is None else end
- start
4204 for pagenum
in range(start_page
, end_page
):
4205 page_results
= self
.getpage(pagenum
)
4207 page_results
= page_results
[skip_elems
:]
4209 if only_more
is not None:
4210 if len(page_results
) < only_more
:
4211 only_more
-= len(page_results
)
4213 yield from page_results
[:only_more
]
4215 yield from page_results
4218 def uppercase_escape(s
):
4219 unicode_escape
= codecs
.getdecoder('unicode_escape')
4221 r
'\\U[0-9a-fA-F]{8}',
4222 lambda m
: unicode_escape(m
.group(0))[0],
4226 def lowercase_escape(s
):
4227 unicode_escape
= codecs
.getdecoder('unicode_escape')
4229 r
'\\u[0-9a-fA-F]{4}',
4230 lambda m
: unicode_escape(m
.group(0))[0],
4234 def escape_rfc3986(s
):
4235 """Escape non-ASCII characters as suggested by RFC 3986"""
4236 if sys
.version_info
< (3, 0) and isinstance(s
, compat_str
):
4237 s
= s
.encode('utf-8')
4238 return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
4241 def escape_url(url
):
4242 """Escape URL as suggested by RFC 3986"""
4243 url_parsed
= compat_urllib_parse_urlparse(url
)
4244 return url_parsed
._replace
(
4245 netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'),
4246 path
=escape_rfc3986(url_parsed
.path
),
4247 params
=escape_rfc3986(url_parsed
.params
),
4248 query
=escape_rfc3986(url_parsed
.query
),
4249 fragment
=escape_rfc3986(url_parsed
.fragment
)
4254 return compat_parse_qs(compat_urllib_parse_urlparse(url
).query
)
4257 def read_batch_urls(batch_fd
):
4259 if not isinstance(url
, compat_str
):
4260 url
= url
.decode('utf-8', 'replace')
4261 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
4262 for bom
in BOM_UTF8
:
4263 if url
.startswith(bom
):
4264 url
= url
[len(bom
):]
4266 if not url
or url
.startswith(('#', ';', ']')):
4268 # "#" cannot be stripped out since it is part of the URI
4269 # However, it can be safely stipped out if follwing a whitespace
4270 return re
.split(r
'\s#', url
, 1)[0].rstrip()
4272 with contextlib
.closing(batch_fd
) as fd
:
4273 return [url
for url
in map(fixup
, fd
) if url
]
4276 def urlencode_postdata(*args
, **kargs
):
4277 return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii')
4280 def update_url_query(url
, query
):
4283 parsed_url
= compat_urlparse
.urlparse(url
)
4284 qs
= compat_parse_qs(parsed_url
.query
)
4286 return compat_urlparse
.urlunparse(parsed_url
._replace
(
4287 query
=compat_urllib_parse_urlencode(qs
, True)))
4290 def update_Request(req
, url
=None, data
=None, headers
={}, query={}
):
4291 req_headers
= req
.headers
.copy()
4292 req_headers
.update(headers
)
4293 req_data
= data
or req
.data
4294 req_url
= update_url_query(url
or req
.get_full_url(), query
)
4295 req_get_method
= req
.get_method()
4296 if req_get_method
== 'HEAD':
4297 req_type
= HEADRequest
4298 elif req_get_method
== 'PUT':
4299 req_type
= PUTRequest
4301 req_type
= compat_urllib_request
.Request
4303 req_url
, data
=req_data
, headers
=req_headers
,
4304 origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
)
4305 if hasattr(req
, 'timeout'):
4306 new_req
.timeout
= req
.timeout
4310 def _multipart_encode_impl(data
, boundary
):
4311 content_type
= 'multipart/form-data; boundary=%s' % boundary
4314 for k
, v
in data
.items():
4315 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
4316 if isinstance(k
, compat_str
):
4317 k
= k
.encode('utf-8')
4318 if isinstance(v
, compat_str
):
4319 v
= v
.encode('utf-8')
4320 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4321 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4322 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
4323 if boundary
.encode('ascii') in content
:
4324 raise ValueError('Boundary overlaps with data')
4327 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
4329 return out
, content_type
4332 def multipart_encode(data
, boundary
=None):
4334 Encode a dict to RFC 7578-compliant form-data
4337 A dict where keys and values can be either Unicode or bytes-like
4340 If specified a Unicode object, it's used as the boundary. Otherwise
4341 a random boundary is generated.
4343 Reference: https://tools.ietf.org/html/rfc7578
4345 has_specified_boundary
= boundary
is not None
4348 if boundary
is None:
4349 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
4352 out
, content_type
= _multipart_encode_impl(data
, boundary
)
4355 if has_specified_boundary
:
4359 return out
, content_type
4362 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True):
4363 if isinstance(key_or_keys
, (list, tuple)):
4364 for key
in key_or_keys
:
4365 if key
not in d
or d
[key
] is None or skip_false_values
and not d
[key
]:
4369 return d
.get(key_or_keys
, default
)
4372 def try_get(src
, getter
, expected_type
=None):
4373 for get
in variadic(getter
):
4376 except (AttributeError, KeyError, TypeError, IndexError):
4379 if expected_type
is None or isinstance(v
, expected_type
):
4383 def merge_dicts(*dicts
):
4385 for a_dict
in dicts
:
4386 for k
, v
in a_dict
.items():
4390 or (isinstance(v
, compat_str
) and v
4391 and isinstance(merged
[k
], compat_str
)
4392 and not merged
[k
])):
4397 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
4398 return string
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
)
4410 TV_PARENTAL_GUIDELINES
= {
4420 def parse_age_limit(s
):
4422 return s
if 0 <= s
<= 21 else None
4423 if not isinstance(s
, compat_basestring
):
4425 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
4427 return int(m
.group('age'))
4430 return US_RATINGS
[s
]
4431 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
4433 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
4437 def strip_jsonp(code
):
4440 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4441 (?:\s*&&\s*(?P=func_name))?
4442 \s*\(\s*(?P<callback_data>.*)\);?
4443 \s*?(?://[^\n]*)*$''',
4444 r
'\g<callback_data>', code
)
4447 def js_to_json(code
, vars={}):
4448 # vars is a dict of var, val pairs to substitute
4449 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4450 SKIP_RE
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
)
4452 (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16),
4453 (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8),
4458 if v
in ('true', 'false', 'null'):
4460 elif v
in ('undefined', 'void 0'):
4462 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
4465 if v
[0] in ("'", '"'):
4466 v
= re
.sub(r
'(?s)\\.|"', lambda m
: {
4471 }.get(m
.group(0), m
.group(0)), v
[1:-1])
4473 for regex
, base
in INTEGER_TABLE
:
4474 im
= re
.match(regex
, v
)
4476 i
= int(im
.group(1), base
)
4477 return '"%d":' % i
if v
.endswith(':') else '%d' % i
4484 return re
.sub(r
'''(?sx)
4485 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4486 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4487 {comment}|,(?={skip}[\]}}])|
4488 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4489 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4492 '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
)
4495 def qualities(quality_ids
):
4496 """ Get a numeric quality value out of a list of possible values """
4499 return quality_ids
.index(qid
)
4506 'default': '%(title)s [%(id)s].%(ext)s',
4507 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4513 'description': 'description',
4514 'annotation': 'annotations.xml',
4515 'infojson': 'info.json',
4517 'pl_thumbnail': None,
4518 'pl_description': 'description',
4519 'pl_infojson': 'info.json',
4522 # As of [1] format syntax is:
4523 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4524 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4525 STR_FORMAT_RE_TMPL
= r
'''(?x)
4526 (?<!%)(?P<prefix>(?:%%)*)
4528 (?P<has_key>\((?P<key>{0})\))?
4530 (?P<conversion>[#0\-+ ]+)?
4532 (?P<precision>\.\d+)?
4533 (?P<len_mod>[hlL])? # unused in python
4534 {1} # conversion type
4539 STR_FORMAT_TYPES
= 'diouxXeEfFgGcrs'
4542 def limit_length(s
, length
):
4543 """ Add ellipses to overly long strings """
4548 return s
[:length
- len(ELLIPSES
)] + ELLIPSES
4552 def version_tuple(v
):
4553 return tuple(int(e
) for e
in re
.split(r
'[-.]', v
))
4556 def is_outdated_version(version
, limit
, assume_new
=True):
4558 return not assume_new
4560 return version_tuple(version
) < version_tuple(limit
)
4562 return not assume_new
4565 def ytdl_is_updateable():
4566 """ Returns if yt-dlp can be updated with -U """
4568 from .update
import is_non_updateable
4570 return not is_non_updateable()
4573 def args_to_str(args
):
4574 # Get a short string representation for a subprocess command
4575 return ' '.join(compat_shlex_quote(a
) for a
in args
)
4578 def error_to_compat_str(err
):
4580 # On python 2 error byte string must be decoded with proper
4581 # encoding rather than ascii
4582 if sys
.version_info
[0] < 3:
4583 err_str
= err_str
.decode(preferredencoding())
4587 def mimetype2ext(mt
):
4591 mt
, _
, params
= mt
.partition(';')
4596 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4597 # it's the most popular one
4598 'audio/mpeg': 'mp3',
4599 'audio/x-wav': 'wav',
4601 'audio/wave': 'wav',
4604 ext
= FULL_MAP
.get(mt
)
4610 'smptett+xml': 'tt',
4614 'x-mp4-fragmented': 'mp4',
4615 'x-ms-sami': 'sami',
4618 'x-mpegurl': 'm3u8',
4619 'vnd.apple.mpegurl': 'm3u8',
4623 'vnd.ms-sstr+xml': 'ism',
4627 'filmstrip+json': 'fs',
4631 _
, _
, subtype
= mt
.rpartition('/')
4632 ext
= SUBTYPE_MAP
.get(subtype
.lower())
4643 _
, _
, suffix
= subtype
.partition('+')
4644 ext
= SUFFIX_MAP
.get(suffix
)
4648 return subtype
.replace('+', '.')
4651 def parse_codecs(codecs_str
):
4652 # http://tools.ietf.org/html/rfc6381
4655 split_codecs
= list(filter(None, map(
4656 str.strip
, codecs_str
.strip().strip(',').split(','))))
4657 vcodec
, acodec
, hdr
= None, None, None
4658 for full_codec
in split_codecs
:
4659 codec
= full_codec
.split('.')[0]
4660 if codec
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'):
4663 if codec
in ('dvh1', 'dvhe'):
4665 elif codec
== 'vp9' and vcodec
.startswith('vp9.2'):
4667 elif codec
== 'av01':
4668 parts
= full_codec
.split('.')
4669 if len(parts
) > 3 and parts
[3] == '10':
4671 vcodec
= '.'.join(parts
[:4])
4672 elif codec
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4676 write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
)
4677 if not vcodec
and not acodec
:
4678 if len(split_codecs
) == 2:
4680 'vcodec': split_codecs
[0],
4681 'acodec': split_codecs
[1],
4685 'vcodec': vcodec
or 'none',
4686 'acodec': acodec
or 'none',
4687 'dynamic_range': hdr
,
4692 def urlhandle_detect_ext(url_handle
):
4693 getheader
= url_handle
.headers
.get
4695 cd
= getheader('Content-Disposition')
4697 m
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
)
4699 e
= determine_ext(m
.group('filename'), default_ext
=None)
4703 return mimetype2ext(getheader('Content-Type'))
4706 def encode_data_uri(data
, mime_type
):
4707 return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii'))
4710 def age_restricted(content_limit
, age_limit
):
4711 """ Returns True iff the content should be blocked """
4713 if age_limit
is None: # No limit set
4715 if content_limit
is None:
4716 return False # Content available for everyone
4717 return age_limit
< content_limit
4720 def is_html(first_bytes
):
4721 """ Detect whether a file contains HTML by examining its first bytes. """
4724 (b
'\xef\xbb\xbf', 'utf-8'),
4725 (b
'\x00\x00\xfe\xff', 'utf-32-be'),
4726 (b
'\xff\xfe\x00\x00', 'utf-32-le'),
4727 (b
'\xff\xfe', 'utf-16-le'),
4728 (b
'\xfe\xff', 'utf-16-be'),
4730 for bom
, enc
in BOMS
:
4731 if first_bytes
.startswith(bom
):
4732 s
= first_bytes
[len(bom
):].decode(enc
, 'replace')
4735 s
= first_bytes
.decode('utf-8', 'replace')
4737 return re
.match(r
'^\s*<', s
)
4740 def determine_protocol(info_dict
):
4741 protocol
= info_dict
.get('protocol')
4742 if protocol
is not None:
4745 url
= sanitize_url(info_dict
['url'])
4746 if url
.startswith('rtmp'):
4748 elif url
.startswith('mms'):
4750 elif url
.startswith('rtsp'):
4753 ext
= determine_ext(url
)
4759 return compat_urllib_parse_urlparse(url
).scheme
4762 def render_table(header_row
, data
, delim
=False, extraGap
=0, hideEmpty
=False):
4763 """ Render a list of rows, each as a list of values """
4765 return len(remove_terminal_sequences(string
))
4767 def get_max_lens(table
):
4768 return [max(width(str(v
)) for v
in col
) for col
in zip(*table
)]
4770 def filter_using_list(row
, filterArray
):
4771 return [col
for (take
, col
) in zip(filterArray
, row
) if take
]
4774 max_lens
= get_max_lens(data
)
4775 header_row
= filter_using_list(header_row
, max_lens
)
4776 data
= [filter_using_list(row
, max_lens
) for row
in data
]
4778 table
= [header_row
] + data
4779 max_lens
= get_max_lens(table
)
4782 table
= [header_row
] + [[delim
* (ml
+ extraGap
) for ml
in max_lens
]] + data
4785 for pos
, text
in enumerate(map(str, row
)):
4786 row
[pos
] = text
+ (' ' * (max_lens
[pos
] - width(text
) + extraGap
))
4787 ret
= '\n'.join(''.join(row
) for row
in table
)
4791 def _match_one(filter_part
, dct
, incomplete
):
4792 # TODO: Generalize code with YoutubeDL._build_format_filter
4793 STRING_OPERATORS
= {
4794 '*=': operator
.contains
,
4795 '^=': lambda attr
, value
: attr
.startswith(value
),
4796 '$=': lambda attr
, value
: attr
.endswith(value
),
4797 '~=': lambda attr
, value
: re
.search(value
, attr
),
4799 COMPARISON_OPERATORS
= {
4801 '<=': operator
.le
, # "<=" must be defined above "<"
4808 operator_rex
= re
.compile(r
'''(?x)\s*
4810 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4812 (?P<quote>["\'])(?P
<quotedstrval
>.+?
)(?P
=quote
)|
4816 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4817 m = operator_rex.search(filter_part)
4820 unnegated_op = COMPARISON_OPERATORS[m['op']]
4822 op = lambda attr, value: not unnegated_op(attr, value)
4825 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4827 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4828 actual_value = dct.get(m['key'])
4829 numeric_comparison = None
4830 if isinstance(actual_value, compat_numeric_types):
4831 # If the original field is a string and matching comparisonvalue is
4832 # a number we should respect the origin of the original field
4833 # and process comparison value as a string (see
4834 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4836 numeric_comparison = int(comparison_value)
4838 numeric_comparison = parse_filesize(comparison_value)
4839 if numeric_comparison is None:
4840 numeric_comparison = parse_filesize(f'{comparison_value}B')
4841 if numeric_comparison is None:
4842 numeric_comparison = parse_duration(comparison_value)
4843 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4844 raise ValueError('Operator %s only supports string values!' % m['op'])
4845 if actual_value is None:
4846 return incomplete or m['none_inclusive']
4847 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4850 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4851 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4853 operator_rex = re.compile(r'''(?x
)\s
*
4854 (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+)
4856 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4857 m = operator_rex.search(filter_part)
4859 op = UNARY_OPERATORS[m.group('op')]
4860 actual_value = dct.get(m.group('key'))
4861 if incomplete and actual_value is None:
4863 return op(actual_value)
4865 raise ValueError('Invalid filter part %r' % filter_part)
4868 def match_str(filter_str, dct, incomplete=False):
4869 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4870 When incomplete, all conditions passes on missing fields
4873 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4874 for filter_part in re.split(r'(?<!\\)&', filter_str))
4877 def match_filter_func(filter_str):
4878 def _match_func(info_dict, *args, **kwargs):
4879 if match_str(filter_str, info_dict, *args, **kwargs):
4882 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4883 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4887 def parse_dfxp_time_expr(time_expr):
4891 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4893 return float(mobj.group('time_offset'))
4895 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4897 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4900 def srt_subtitles_timecode(seconds):
4901 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4904 def ass_subtitles_timecode(seconds):
4905 time = timetuple_from_msec(seconds * 1000)
4906 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4909 def dfxp2srt(dfxp_data):
4911 @param dfxp_data A
bytes-like
object containing DFXP data
4912 @returns A
unicode object containing converted SRT data
4914 LEGACY_NAMESPACES = (
4915 (b'http://www.w3.org/ns/ttml', [
4916 b'http://www.w3.org/2004/11/ttaf1',
4917 b'http://www.w3.org/2006/04/ttaf1',
4918 b'http://www.w3.org/2006/10/ttaf1',
4920 (b'http://www.w3.org/ns/ttml#styling', [
4921 b'http://www.w3.org/ns/ttml#style',
4925 SUPPORTED_STYLING = [
4934 _x = functools.partial(xpath_with_ns, ns_map={
4935 'xml': 'http://www.w3.org/XML/1998/namespace',
4936 'ttml': 'http://www.w3.org/ns/ttml',
4937 'tts': 'http://www.w3.org/ns/ttml#styling',
4943 class TTMLPElementParser(object):
4945 _unclosed_elements = []
4946 _applied_styles = []
4948 def start(self, tag, attrib):
4949 if tag in (_x('ttml:br'), 'br'):
4952 unclosed_elements = []
4954 element_style_id = attrib.get('style')
4956 style.update(default_style)
4957 if element_style_id:
4958 style.update(styles.get(element_style_id, {}))
4959 for prop in SUPPORTED_STYLING:
4960 prop_val = attrib.get(_x('tts:' + prop))
4962 style[prop] = prop_val
4965 for k, v in sorted(style.items()):
4966 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4969 font += ' color="%s"' % v
4970 elif k == 'fontSize':
4971 font += ' size="%s"' % v
4972 elif k == 'fontFamily':
4973 font += ' face="%s"' % v
4974 elif k == 'fontWeight' and v == 'bold':
4976 unclosed_elements.append('b')
4977 elif k == 'fontStyle' and v == 'italic':
4979 unclosed_elements.append('i')
4980 elif k == 'textDecoration' and v == 'underline':
4982 unclosed_elements.append('u')
4984 self._out += '<font' + font + '>'
4985 unclosed_elements.append('font')
4987 if self._applied_styles:
4988 applied_style.update(self._applied_styles[-1])
4989 applied_style.update(style)
4990 self._applied_styles.append(applied_style)
4991 self._unclosed_elements.append(unclosed_elements)
4994 if tag not in (_x('ttml:br'), 'br'):
4995 unclosed_elements = self._unclosed_elements.pop()
4996 for element in reversed(unclosed_elements):
4997 self._out += '</%s>' % element
4998 if unclosed_elements and self._applied_styles:
4999 self._applied_styles.pop()
5001 def data(self, data):
5005 return self._out.strip()
5007 def parse_node(node):
5008 target = TTMLPElementParser()
5009 parser = xml.etree.ElementTree.XMLParser(target=target)
5010 parser.feed(xml.etree.ElementTree.tostring(node))
5011 return parser.close()
5013 for k, v in LEGACY_NAMESPACES:
5015 dfxp_data = dfxp_data.replace(ns, k)
5017 dfxp = compat_etree_fromstring(dfxp_data)
5019 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5022 raise ValueError('Invalid dfxp/TTML subtitle')
5026 for style in dfxp.findall(_x('.//ttml:style')):
5027 style_id = style.get('id') or style.get(_x('xml:id'))
5030 parent_style_id = style.get('style')
5032 if parent_style_id not in styles:
5035 styles[style_id] = styles[parent_style_id].copy()
5036 for prop in SUPPORTED_STYLING:
5037 prop_val = style.get(_x('tts:' + prop))
5039 styles.setdefault(style_id, {})[prop] = prop_val
5045 for p in ('body', 'div'):
5046 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5049 style = styles.get(ele.get('style'))
5052 default_style.update(style)
5054 for para, index in zip(paras, itertools.count(1)):
5055 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5056 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5057 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5058 if begin_time is None:
5063 end_time = begin_time + dur
5064 out.append('%d\n%s --> %s\n%s\n\n' % (
5066 srt_subtitles_timecode(begin_time),
5067 srt_subtitles_timecode(end_time),
5073 def cli_option(params, command_option, param):
5074 param = params.get(param)
5076 param = compat_str(param)
5077 return [command_option, param] if param is not None else []
5080 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5081 param = params.get(param)
5084 assert isinstance(param, bool)
5086 return [command_option + separator + (true_value if param else false_value)]
5087 return [command_option, true_value if param else false_value]
5090 def cli_valueless_option(params, command_option, param, expected_value=True):
5091 param = params.get(param)
5092 return [command_option] if param == expected_value else []
5095 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5096 if isinstance(argdict, (list, tuple)): # for backward compatibility
5103 assert isinstance(argdict, dict)
5105 assert isinstance(keys, (list, tuple))
5106 for key_list in keys:
5107 arg_list = list(filter(
5108 lambda x: x is not None,
5109 [argdict.get(key.lower()) for key in variadic(key_list)]))
5111 return [arg for args in arg_list for arg in args]
5115 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5116 main_key, exe = main_key.lower(), exe.lower()
5117 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5118 keys = [f'{root_key}{k}' for k in (keys or [''])]
5119 if root_key in keys:
5121 keys.append((main_key, exe))
5122 keys.append('default')
5125 return cli_configuration_args(argdict, keys, default, use_compat)
5128 class ISO639Utils(object):
5129 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5188 'iw': 'heb', # Replaced by he in 1989 revision
5198 'in': 'ind', # Replaced by id in 1989 revision
5313 'ji': 'yid', # Replaced by yi in 1989 revision
5321 def short2long(cls, code):
5322 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5323 return cls._lang_map.get(code[:2])
5326 def long2short(cls, code):
5327 """Convert language code from ISO 639-2/T to ISO 639-1"""
5328 for short_name, long_name in cls._lang_map.items():
5329 if long_name == code:
5333 class ISO3166Utils(object):
5334 # From http://data.okfn.org/data/core/country-list
5336 'AF': 'Afghanistan',
5337 'AX': 'Åland Islands',
5340 'AS': 'American Samoa',
5345 'AG': 'Antigua and Barbuda',
5362 'BO': 'Bolivia, Plurinational State of',
5363 'BQ': 'Bonaire, Sint Eustatius and Saba',
5364 'BA': 'Bosnia and Herzegovina',
5366 'BV': 'Bouvet Island',
5368 'IO': 'British Indian Ocean Territory',
5369 'BN': 'Brunei Darussalam',
5371 'BF': 'Burkina Faso',
5377 'KY': 'Cayman Islands',
5378 'CF': 'Central African Republic',
5382 'CX': 'Christmas Island',
5383 'CC': 'Cocos (Keeling) Islands',
5387 'CD': 'Congo, the Democratic Republic of the',
5388 'CK': 'Cook Islands',
5390 'CI': 'Côte d\'Ivoire',
5395 'CZ': 'Czech Republic',
5399 'DO': 'Dominican Republic',
5402 'SV': 'El Salvador',
5403 'GQ': 'Equatorial Guinea',
5407 'FK': 'Falkland Islands (Malvinas)',
5408 'FO': 'Faroe Islands',
5412 'GF': 'French Guiana',
5413 'PF': 'French Polynesia',
5414 'TF': 'French Southern Territories',
5429 'GW': 'Guinea-Bissau',
5432 'HM': 'Heard Island and McDonald Islands',
5433 'VA': 'Holy See (Vatican City State)',
5440 'IR': 'Iran, Islamic Republic of',
5443 'IM': 'Isle of Man',
5453 'KP': 'Korea, Democratic People\'s Republic of',
5454 'KR': 'Korea, Republic of',
5457 'LA': 'Lao People\'s Democratic Republic',
5463 'LI': 'Liechtenstein',
5467 'MK': 'Macedonia, the Former Yugoslav Republic of',
5474 'MH': 'Marshall Islands',
5480 'FM': 'Micronesia, Federated States of',
5481 'MD': 'Moldova, Republic of',
5492 'NL': 'Netherlands',
5493 'NC': 'New Caledonia',
5494 'NZ': 'New Zealand',
5499 'NF': 'Norfolk Island',
5500 'MP': 'Northern Mariana Islands',
5505 'PS': 'Palestine, State of',
5507 'PG': 'Papua New Guinea',
5510 'PH': 'Philippines',
5514 'PR': 'Puerto Rico',
5518 'RU': 'Russian Federation',
5520 'BL': 'Saint Barthélemy',
5521 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5522 'KN': 'Saint Kitts and Nevis',
5523 'LC': 'Saint Lucia',
5524 'MF': 'Saint Martin (French part)',
5525 'PM': 'Saint Pierre and Miquelon',
5526 'VC': 'Saint Vincent and the Grenadines',
5529 'ST': 'Sao Tome and Principe',
5530 'SA': 'Saudi Arabia',
5534 'SL': 'Sierra Leone',
5536 'SX': 'Sint Maarten (Dutch part)',
5539 'SB': 'Solomon Islands',
5541 'ZA': 'South Africa',
5542 'GS': 'South Georgia and the South Sandwich Islands',
5543 'SS': 'South Sudan',
5548 'SJ': 'Svalbard and Jan Mayen',
5551 'CH': 'Switzerland',
5552 'SY': 'Syrian Arab Republic',
5553 'TW': 'Taiwan, Province of China',
5555 'TZ': 'Tanzania, United Republic of',
5557 'TL': 'Timor-Leste',
5561 'TT': 'Trinidad and Tobago',
5564 'TM': 'Turkmenistan',
5565 'TC': 'Turks and Caicos Islands',
5569 'AE': 'United Arab Emirates',
5570 'GB': 'United Kingdom',
5571 'US': 'United States',
5572 'UM': 'United States Minor Outlying Islands',
5576 'VE': 'Venezuela, Bolivarian Republic of',
5578 'VG': 'Virgin Islands, British',
5579 'VI': 'Virgin Islands, U.S.',
5580 'WF': 'Wallis and Futuna',
5581 'EH': 'Western Sahara',
5588 def short2full(cls, code):
5589 """Convert an ISO 3166-2 country code to the corresponding full name"""
5590 return cls._country_map.get(code.upper())
5593 class GeoUtils(object):
5594 # Major IPv4 address blocks per country
5596 'AD': '46.172.224.0/19',
5597 'AE': '94.200.0.0/13',
5598 'AF': '149.54.0.0/17',
5599 'AG': '209.59.64.0/18',
5600 'AI': '204.14.248.0/21',
5601 'AL': '46.99.0.0/16',
5602 'AM': '46.70.0.0/15',
5603 'AO': '105.168.0.0/13',
5604 'AP': '182.50.184.0/21',
5605 'AQ': '23.154.160.0/24',
5606 'AR': '181.0.0.0/12',
5607 'AS': '202.70.112.0/20',
5608 'AT': '77.116.0.0/14',
5609 'AU': '1.128.0.0/11',
5610 'AW': '181.41.0.0/18',
5611 'AX': '185.217.4.0/22',
5612 'AZ': '5.197.0.0/16',
5613 'BA': '31.176.128.0/17',
5614 'BB': '65.48.128.0/17',
5615 'BD': '114.130.0.0/16',
5617 'BF': '102.178.0.0/15',
5618 'BG': '95.42.0.0/15',
5619 'BH': '37.131.0.0/17',
5620 'BI': '154.117.192.0/18',
5621 'BJ': '137.255.0.0/16',
5622 'BL': '185.212.72.0/23',
5623 'BM': '196.12.64.0/18',
5624 'BN': '156.31.0.0/16',
5625 'BO': '161.56.0.0/16',
5626 'BQ': '161.0.80.0/20',
5627 'BR': '191.128.0.0/12',
5628 'BS': '24.51.64.0/18',
5629 'BT': '119.2.96.0/19',
5630 'BW': '168.167.0.0/16',
5631 'BY': '178.120.0.0/13',
5632 'BZ': '179.42.192.0/18',
5633 'CA': '99.224.0.0/11',
5634 'CD': '41.243.0.0/16',
5635 'CF': '197.242.176.0/21',
5636 'CG': '160.113.0.0/16',
5637 'CH': '85.0.0.0/13',
5638 'CI': '102.136.0.0/14',
5639 'CK': '202.65.32.0/19',
5640 'CL': '152.172.0.0/14',
5641 'CM': '102.244.0.0/14',
5642 'CN': '36.128.0.0/10',
5643 'CO': '181.240.0.0/12',
5644 'CR': '201.192.0.0/12',
5645 'CU': '152.206.0.0/15',
5646 'CV': '165.90.96.0/19',
5647 'CW': '190.88.128.0/17',
5648 'CY': '31.153.0.0/16',
5649 'CZ': '88.100.0.0/14',
5651 'DJ': '197.241.0.0/17',
5652 'DK': '87.48.0.0/12',
5653 'DM': '192.243.48.0/20',
5654 'DO': '152.166.0.0/15',
5655 'DZ': '41.96.0.0/12',
5656 'EC': '186.68.0.0/15',
5657 'EE': '90.190.0.0/15',
5658 'EG': '156.160.0.0/11',
5659 'ER': '196.200.96.0/20',
5660 'ES': '88.0.0.0/11',
5661 'ET': '196.188.0.0/14',
5662 'EU': '2.16.0.0/13',
5663 'FI': '91.152.0.0/13',
5664 'FJ': '144.120.0.0/16',
5665 'FK': '80.73.208.0/21',
5666 'FM': '119.252.112.0/20',
5667 'FO': '88.85.32.0/19',
5669 'GA': '41.158.0.0/15',
5671 'GD': '74.122.88.0/21',
5672 'GE': '31.146.0.0/16',
5673 'GF': '161.22.64.0/18',
5674 'GG': '62.68.160.0/19',
5675 'GH': '154.160.0.0/12',
5676 'GI': '95.164.0.0/16',
5677 'GL': '88.83.0.0/19',
5678 'GM': '160.182.0.0/15',
5679 'GN': '197.149.192.0/18',
5680 'GP': '104.250.0.0/19',
5681 'GQ': '105.235.224.0/20',
5682 'GR': '94.64.0.0/13',
5683 'GT': '168.234.0.0/16',
5684 'GU': '168.123.0.0/16',
5685 'GW': '197.214.80.0/20',
5686 'GY': '181.41.64.0/18',
5687 'HK': '113.252.0.0/14',
5688 'HN': '181.210.0.0/16',
5689 'HR': '93.136.0.0/13',
5690 'HT': '148.102.128.0/17',
5691 'HU': '84.0.0.0/14',
5692 'ID': '39.192.0.0/10',
5693 'IE': '87.32.0.0/12',
5694 'IL': '79.176.0.0/13',
5695 'IM': '5.62.80.0/20',
5696 'IN': '117.192.0.0/10',
5697 'IO': '203.83.48.0/21',
5698 'IQ': '37.236.0.0/14',
5699 'IR': '2.176.0.0/12',
5700 'IS': '82.221.0.0/16',
5701 'IT': '79.0.0.0/10',
5702 'JE': '87.244.64.0/18',
5703 'JM': '72.27.0.0/17',
5704 'JO': '176.29.0.0/16',
5705 'JP': '133.0.0.0/8',
5706 'KE': '105.48.0.0/12',
5707 'KG': '158.181.128.0/17',
5708 'KH': '36.37.128.0/17',
5709 'KI': '103.25.140.0/22',
5710 'KM': '197.255.224.0/20',
5711 'KN': '198.167.192.0/19',
5712 'KP': '175.45.176.0/22',
5713 'KR': '175.192.0.0/10',
5714 'KW': '37.36.0.0/14',
5715 'KY': '64.96.0.0/15',
5716 'KZ': '2.72.0.0/13',
5717 'LA': '115.84.64.0/18',
5718 'LB': '178.135.0.0/16',
5719 'LC': '24.92.144.0/20',
5720 'LI': '82.117.0.0/19',
5721 'LK': '112.134.0.0/15',
5722 'LR': '102.183.0.0/16',
5723 'LS': '129.232.0.0/17',
5724 'LT': '78.56.0.0/13',
5725 'LU': '188.42.0.0/16',
5726 'LV': '46.109.0.0/16',
5727 'LY': '41.252.0.0/14',
5728 'MA': '105.128.0.0/11',
5729 'MC': '88.209.64.0/18',
5730 'MD': '37.246.0.0/16',
5731 'ME': '178.175.0.0/17',
5732 'MF': '74.112.232.0/21',
5733 'MG': '154.126.0.0/17',
5734 'MH': '117.103.88.0/21',
5735 'MK': '77.28.0.0/15',
5736 'ML': '154.118.128.0/18',
5737 'MM': '37.111.0.0/17',
5738 'MN': '49.0.128.0/17',
5739 'MO': '60.246.0.0/16',
5740 'MP': '202.88.64.0/20',
5741 'MQ': '109.203.224.0/19',
5742 'MR': '41.188.64.0/18',
5743 'MS': '208.90.112.0/22',
5744 'MT': '46.11.0.0/16',
5745 'MU': '105.16.0.0/12',
5746 'MV': '27.114.128.0/18',
5747 'MW': '102.70.0.0/15',
5748 'MX': '187.192.0.0/11',
5749 'MY': '175.136.0.0/13',
5750 'MZ': '197.218.0.0/15',
5751 'NA': '41.182.0.0/16',
5752 'NC': '101.101.0.0/18',
5753 'NE': '197.214.0.0/18',
5754 'NF': '203.17.240.0/22',
5755 'NG': '105.112.0.0/12',
5756 'NI': '186.76.0.0/15',
5757 'NL': '145.96.0.0/11',
5758 'NO': '84.208.0.0/13',
5759 'NP': '36.252.0.0/15',
5760 'NR': '203.98.224.0/19',
5761 'NU': '49.156.48.0/22',
5762 'NZ': '49.224.0.0/14',
5763 'OM': '5.36.0.0/15',
5764 'PA': '186.72.0.0/15',
5765 'PE': '186.160.0.0/14',
5766 'PF': '123.50.64.0/18',
5767 'PG': '124.240.192.0/19',
5768 'PH': '49.144.0.0/13',
5769 'PK': '39.32.0.0/11',
5770 'PL': '83.0.0.0/11',
5771 'PM': '70.36.0.0/20',
5772 'PR': '66.50.0.0/16',
5773 'PS': '188.161.0.0/16',
5774 'PT': '85.240.0.0/13',
5775 'PW': '202.124.224.0/20',
5776 'PY': '181.120.0.0/14',
5777 'QA': '37.210.0.0/15',
5778 'RE': '102.35.0.0/16',
5779 'RO': '79.112.0.0/13',
5780 'RS': '93.86.0.0/15',
5781 'RU': '5.136.0.0/13',
5782 'RW': '41.186.0.0/16',
5783 'SA': '188.48.0.0/13',
5784 'SB': '202.1.160.0/19',
5785 'SC': '154.192.0.0/11',
5786 'SD': '102.120.0.0/13',
5787 'SE': '78.64.0.0/12',
5788 'SG': '8.128.0.0/10',
5789 'SI': '188.196.0.0/14',
5790 'SK': '78.98.0.0/15',
5791 'SL': '102.143.0.0/17',
5792 'SM': '89.186.32.0/19',
5793 'SN': '41.82.0.0/15',
5794 'SO': '154.115.192.0/18',
5795 'SR': '186.179.128.0/17',
5796 'SS': '105.235.208.0/21',
5797 'ST': '197.159.160.0/19',
5798 'SV': '168.243.0.0/16',
5799 'SX': '190.102.0.0/20',
5801 'SZ': '41.84.224.0/19',
5802 'TC': '65.255.48.0/20',
5803 'TD': '154.68.128.0/19',
5804 'TG': '196.168.0.0/14',
5805 'TH': '171.96.0.0/13',
5806 'TJ': '85.9.128.0/18',
5807 'TK': '27.96.24.0/21',
5808 'TL': '180.189.160.0/20',
5809 'TM': '95.85.96.0/19',
5810 'TN': '197.0.0.0/11',
5811 'TO': '175.176.144.0/21',
5812 'TR': '78.160.0.0/11',
5813 'TT': '186.44.0.0/15',
5814 'TV': '202.2.96.0/19',
5815 'TW': '120.96.0.0/11',
5816 'TZ': '156.156.0.0/14',
5817 'UA': '37.52.0.0/14',
5818 'UG': '102.80.0.0/13',
5820 'UY': '167.56.0.0/13',
5821 'UZ': '84.54.64.0/18',
5822 'VA': '212.77.0.0/19',
5823 'VC': '207.191.240.0/21',
5824 'VE': '186.88.0.0/13',
5825 'VG': '66.81.192.0/20',
5826 'VI': '146.226.0.0/16',
5827 'VN': '14.160.0.0/11',
5828 'VU': '202.80.32.0/20',
5829 'WF': '117.20.32.0/21',
5830 'WS': '202.4.32.0/19',
5831 'YE': '134.35.0.0/16',
5832 'YT': '41.242.116.0/22',
5833 'ZA': '41.0.0.0/11',
5834 'ZM': '102.144.0.0/13',
5835 'ZW': '102.177.192.0/18',
5839 def random_ipv4(cls, code_or_block):
5840 if len(code_or_block) == 2:
5841 block = cls._country_ip_map.get(code_or_block.upper())
5845 block = code_or_block
5846 addr, preflen = block.split('/')
5847 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5848 addr_max = addr_min | (0xffffffff >> int(preflen))
5849 return compat_str(socket.inet_ntoa(
5850 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5853 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5854 def __init__(self, proxies=None):
5855 # Set default handlers
5856 for type in ('http', 'https'):
5857 setattr(self, '%s_open' % type,
5858 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5859 meth(r, proxy, type))
5860 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5862 def proxy_open(self, req, proxy, type):
5863 req_proxy = req.headers.get('Ytdl-request-proxy')
5864 if req_proxy is not None:
5866 del req.headers['Ytdl-request-proxy']
5868 if proxy == '__noproxy__':
5869 return None # No Proxy
5870 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5871 req.add_header('Ytdl-socks-proxy', proxy)
5872 # yt-dlp's http/https handlers do wrapping the socket with socks
5874 return compat_urllib_request.ProxyHandler.proxy_open(
5875 self, req, proxy, type)
5878 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5879 # released into Public Domain
5880 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5882 def long_to_bytes(n, blocksize=0):
5883 """long_to_bytes(n:long, blocksize:int) : string
5884 Convert a long integer to a byte string.
5886 If optional blocksize is given and greater than zero, pad the front of the
5887 byte string with binary zeros so that the length is a multiple of
5890 # after much testing, this algorithm was deemed to be the fastest
5894 s = compat_struct_pack('>I', n & 0xffffffff) + s
5896 # strip off leading zeros
5897 for i in range(len(s)):
5898 if s[i] != b'\000'[0]:
5901 # only happens when n == 0
5905 # add back some pad bytes. this could be done more efficiently w.r.t. the
5906 # de-padding being done above, but sigh...
5907 if blocksize > 0 and len(s) % blocksize:
5908 s = (blocksize - len(s) % blocksize) * b'\000' + s
5912 def bytes_to_long(s):
5913 """bytes_to_long(string) : long
5914 Convert a byte string to a long integer.
5916 This is (essentially) the inverse of long_to_bytes().
5921 extra = (4 - length % 4)
5922 s = b'\000' * extra + s
5923 length = length + extra
5924 for i in range(0, length, 4):
5925 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5929 def ohdave_rsa_encrypt(data, exponent, modulus):
5931 Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/
5934 data: data to encrypt, bytes-like object
5935 exponent, modulus: parameter e and N of RSA algorithm, both integer
5936 Output: hex string of encrypted data
5938 Limitation: supports one block encryption only
5941 payload = int(binascii.hexlify(data[::-1]), 16)
5942 encrypted = pow(payload, exponent, modulus)
5943 return '%x' % encrypted
5946 def pkcs1pad(data, length):
5948 Padding input data with PKCS#1 scheme
5950 @param {int[]} data input data
5951 @param {int} length target length
5952 @returns {int[]} padded data
5954 if len(data) > length - 11:
5955 raise ValueError('Input data too
long for PKCS
#1 padding')
5957 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
5958 return [0, 2] + pseudo_random
+ [0] + data
5961 def encode_base_n(num
, n
, table
=None):
5962 FULL_TABLE
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5964 table
= FULL_TABLE
[:n
]
5967 raise ValueError('base %d exceeds table length %d' % (n
, len(table
)))
5974 ret
= table
[num
% n
] + ret
5979 def decode_packed_codes(code
):
5980 mobj
= re
.search(PACKED_CODES_RE
, code
)
5981 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
5984 symbols
= symbols
.split('|')
5989 base_n_count
= encode_base_n(count
, base
)
5990 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
5993 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
5997 def caesar(s
, alphabet
, shift
):
6002 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
6007 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6010 def parse_m3u8_attributes(attrib
):
6012 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
6013 if val
.startswith('"'):
6019 def urshift(val
, n
):
6020 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
6023 # Based on png2str() written by @gdkchan and improved by @yokrysty
6024 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6025 def decode_png(png_data
):
6026 # Reference: https://www.w3.org/TR/PNG/
6027 header
= png_data
[8:]
6029 if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR':
6030 raise IOError('Not a valid PNG file.')
6032 int_map
= {1: '>B', 2: '>H', 4: '>I'}
6033 unpack_integer
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0]
6038 length
= unpack_integer(header
[:4])
6041 chunk_type
= header
[:4]
6044 chunk_data
= header
[:length
]
6045 header
= header
[length
:]
6047 header
= header
[4:] # Skip CRC
6055 ihdr
= chunks
[0]['data']
6057 width
= unpack_integer(ihdr
[:4])
6058 height
= unpack_integer(ihdr
[4:8])
6062 for chunk
in chunks
:
6063 if chunk
['type'] == b
'IDAT':
6064 idat
+= chunk
['data']
6067 raise IOError('Unable to read PNG data.')
6069 decompressed_data
= bytearray(zlib
.decompress(idat
))
6074 def _get_pixel(idx
):
6079 for y
in range(height
):
6080 basePos
= y
* (1 + stride
)
6081 filter_type
= decompressed_data
[basePos
]
6085 pixels
.append(current_row
)
6087 for x
in range(stride
):
6088 color
= decompressed_data
[1 + basePos
+ x
]
6089 basex
= y
* stride
+ x
6094 left
= _get_pixel(basex
- 3)
6096 up
= _get_pixel(basex
- stride
)
6098 if filter_type
== 1: # Sub
6099 color
= (color
+ left
) & 0xff
6100 elif filter_type
== 2: # Up
6101 color
= (color
+ up
) & 0xff
6102 elif filter_type
== 3: # Average
6103 color
= (color
+ ((left
+ up
) >> 1)) & 0xff
6104 elif filter_type
== 4: # Paeth
6110 c
= _get_pixel(basex
- stride
- 3)
6118 if pa
<= pb
and pa
<= pc
:
6119 color
= (color
+ a
) & 0xff
6121 color
= (color
+ b
) & 0xff
6123 color
= (color
+ c
) & 0xff
6125 current_row
.append(color
)
6127 return width
, height
, pixels
6130 def write_xattr(path
, key
, value
):
6131 # This mess below finds the best xattr tool for the job
6133 # try the pyxattr module...
6136 if hasattr(xattr
, 'set'): # pyxattr
6137 # Unicode arguments are not supported in python-pyxattr until
6139 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6140 pyxattr_required_version
= '0.5.0'
6141 if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
):
6142 # TODO: fallback to CLI tools
6143 raise XAttrUnavailableError(
6144 'python-pyxattr is detected but is too old. '
6145 'yt-dlp requires %s or above while your version is %s. '
6146 'Falling back to other xattr implementations' % (
6147 pyxattr_required_version
, xattr
.__version
__))
6149 setxattr
= xattr
.set
6151 setxattr
= xattr
.setxattr
6154 setxattr(path
, key
, value
)
6155 except EnvironmentError as e
:
6156 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6159 if compat_os_name
== 'nt':
6160 # Write xattrs to NTFS Alternate Data Streams:
6161 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6162 assert ':' not in key
6163 assert os
.path
.exists(path
)
6165 ads_fn
= path
+ ':' + key
6167 with open(ads_fn
, 'wb') as f
:
6169 except EnvironmentError as e
:
6170 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6172 user_has_setfattr
= check_executable('setfattr', ['--version'])
6173 user_has_xattr
= check_executable('xattr', ['-h'])
6175 if user_has_setfattr
or user_has_xattr
:
6177 value
= value
.decode('utf-8')
6178 if user_has_setfattr
:
6179 executable
= 'setfattr'
6180 opts
= ['-n', key
, '-v', value
]
6181 elif user_has_xattr
:
6182 executable
= 'xattr'
6183 opts
= ['-w', key
, value
]
6185 cmd
= ([encodeFilename(executable
, True)]
6186 + [encodeArgument(o
) for o
in opts
]
6187 + [encodeFilename(path
, True)])
6191 cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
6192 except EnvironmentError as e
:
6193 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6194 stdout
, stderr
= p
.communicate_or_kill()
6195 stderr
= stderr
.decode('utf-8', 'replace')
6196 if p
.returncode
!= 0:
6197 raise XAttrMetadataError(p
.returncode
, stderr
)
6200 # On Unix, and can't find pyxattr, setfattr, or xattr.
6201 if sys
.platform
.startswith('linux'):
6202 raise XAttrUnavailableError(
6203 "Couldn't find a tool to set the xattrs. "
6204 "Install either the python 'pyxattr' or 'xattr' "
6205 "modules, or the GNU 'attr' package "
6206 "(which contains the 'setfattr' tool).")
6208 raise XAttrUnavailableError(
6209 "Couldn't find a tool to set the xattrs. "
6210 "Install either the python 'xattr' module, "
6211 "or the 'xattr' binary.")
6214 def random_birthday(year_field
, month_field
, day_field
):
6215 start_date
= datetime
.date(1950, 1, 1)
6216 end_date
= datetime
.date(1995, 12, 31)
6217 offset
= random
.randint(0, (end_date
- start_date
).days
)
6218 random_date
= start_date
+ datetime
.timedelta(offset
)
6220 year_field
: str(random_date
.year
),
6221 month_field
: str(random_date
.month
),
6222 day_field
: str(random_date
.day
),
6226 # Templates for internet shortcut files, which are plain text files.
6227 DOT_URL_LINK_TEMPLATE
= '''
6232 DOT_WEBLOC_LINK_TEMPLATE
= '''
6233 <?xml version="1.0" encoding="UTF-8"?>
6234 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6235 <plist version="1.0">
6238 \t<string>%(url)s</string>
6243 DOT_DESKTOP_LINK_TEMPLATE
= '''
6253 'url': DOT_URL_LINK_TEMPLATE
,
6254 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
6255 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
6259 def iri_to_uri(iri
):
6261 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6263 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6266 iri_parts
= compat_urllib_parse_urlparse(iri
)
6268 if '[' in iri_parts
.netloc
:
6269 raise ValueError('IPv6 URIs are not, yet, supported.')
6270 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6272 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6275 if iri_parts
.username
:
6276 net_location
+= compat_urllib_parse_quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
6277 if iri_parts
.password
is not None:
6278 net_location
+= ':' + compat_urllib_parse_quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
6281 net_location
+= iri_parts
.hostname
.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6282 # The 'idna' encoding produces ASCII text.
6283 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
6284 net_location
+= ':' + str(iri_parts
.port
)
6286 return compat_urllib_parse_urlunparse(
6290 compat_urllib_parse_quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
6292 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6293 compat_urllib_parse_quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
6295 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6296 compat_urllib_parse_quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
6298 compat_urllib_parse_quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
6300 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6303 def to_high_limit_path(path
):
6304 if sys
.platform
in ['win32', 'cygwin']:
6305 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6306 return r
'\\?\ '.rstrip() + os
.path
.abspath(path
)
6311 def format_field(obj
, field
=None, template
='%s', ignore
=(None, ''), default
='', func
=None):
6313 val
= obj
if obj
is not None else default
6315 val
= obj
.get(field
, default
)
6316 if func
and val
not in ignore
:
6318 return template
% val
if val
not in ignore
else default
6321 def clean_podcast_url(url
):
6322 return re
.sub(r
'''(?x)
6326 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6329 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6332 cn\.co| # https://podcorn.com/analytics-prefix/
6333 st\.fm # https://podsights.com/docs/
6338 _HEX_TABLE
= '0123456789abcdef'
6341 def random_uuidv4():
6342 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6345 def make_dir(path
, to_screen
=None):
6347 dn
= os
.path
.dirname(path
)
6348 if dn
and not os
.path
.exists(dn
):
6351 except (OSError, IOError) as err
:
6352 if callable(to_screen
) is not None:
6353 to_screen('unable to create directory ' + error_to_compat_str(err
))
6357 def get_executable_path():
6358 from zipimport
import zipimporter
6359 if hasattr(sys
, 'frozen'): # Running from PyInstaller
6360 path
= os
.path
.dirname(sys
.executable
)
6361 elif isinstance(globals().get('__loader__'), zipimporter
): # Running from ZIP
6362 path
= os
.path
.join(os
.path
.dirname(__file__
), '../..')
6364 path
= os
.path
.join(os
.path
.dirname(__file__
), '..')
6365 return os
.path
.abspath(path
)
6368 def load_plugins(name
, suffix
, namespace
):
6371 plugins_spec
= importlib
.util
.spec_from_file_location(
6372 name
, os
.path
.join(get_executable_path(), 'ytdlp_plugins', name
, '__init__.py'))
6373 plugins
= importlib
.util
.module_from_spec(plugins_spec
)
6374 sys
.modules
[plugins_spec
.name
] = plugins
6375 plugins_spec
.loader
.exec_module(plugins
)
6376 for name
in dir(plugins
):
6377 if name
in namespace
:
6379 if not name
.endswith(suffix
):
6381 klass
= getattr(plugins
, name
)
6382 classes
[name
] = namespace
[name
] = klass
6383 except FileNotFoundError
:
6389 obj
, *path_list
, default
=None, expected_type
=None, get_all
=True,
6390 casesense
=True, is_user_input
=False, traverse_string
=False):
6391 ''' Traverse nested list/dict/tuple
6392 @param path_list A list of paths which are checked one by one.
6393 Each path is a list of keys where each key is a string,
6394 a function, a tuple of strings or "...".
6395 When a fuction is given, it takes the key as argument and
6396 returns whether the key matches or not. When a tuple is given,
6397 all the keys given in the tuple are traversed, and
6398 "..." traverses all the keys in the object
6399 @param default Default value to return
6400 @param expected_type Only accept final value of this type (Can also be any callable)
6401 @param get_all Return all the values obtained from a path or only the first one
6402 @param casesense Whether to consider dictionary keys as case sensitive
6403 @param is_user_input Whether the keys are generated from user input. If True,
6404 strings are converted to int/slice if necessary
6405 @param traverse_string Whether to traverse inside strings. If True, any
6406 non-compatible object will also be converted into a string
6410 _lower
= lambda k
: (k
.lower() if isinstance(k
, str) else k
)
6411 path_list
= (map(_lower
, variadic(path
)) for path
in path_list
)
6413 def _traverse_obj(obj
, path
, _current_depth
=0):
6417 path
= tuple(variadic(path
))
6418 for i
, key
in enumerate(path
):
6419 if isinstance(key
, (list, tuple)):
6420 obj
= [_traverse_obj(obj
, sub_key
, _current_depth
) for sub_key
in key
]
6423 obj
= (obj
.values() if isinstance(obj
, dict)
6424 else obj
if isinstance(obj
, (list, tuple, LazyList
))
6425 else str(obj
) if traverse_string
else [])
6427 depth
= max(depth
, _current_depth
)
6428 return [_traverse_obj(inner_obj
, path
[i
+ 1:], _current_depth
) for inner_obj
in obj
]
6430 if isinstance(obj
, (list, tuple, LazyList
)):
6431 obj
= enumerate(obj
)
6432 elif isinstance(obj
, dict):
6435 if not traverse_string
:
6439 depth
= max(depth
, _current_depth
)
6440 return [_traverse_obj(v
, path
[i
+ 1:], _current_depth
) for k
, v
in obj
if key(k
)]
6441 elif isinstance(obj
, dict) and not (is_user_input
and key
== ':'):
6442 obj
= (obj
.get(key
) if casesense
or (key
in obj
)
6443 else next((v
for k
, v
in obj
.items() if _lower(k
) == key
), None))
6446 key
= (int_or_none(key
) if ':' not in key
6447 else slice(*map(int_or_none
, key
.split(':'))))
6448 if key
== slice(None):
6449 return _traverse_obj(obj
, (..., *path
[i
+ 1:]), _current_depth
)
6450 if not isinstance(key
, (int, slice)):
6452 if not isinstance(obj
, (list, tuple, LazyList
)):
6453 if not traverse_string
:
6462 if isinstance(expected_type
, type):
6463 type_test
= lambda val
: val
if isinstance(val
, expected_type
) else None
6464 elif expected_type
is not None:
6465 type_test
= expected_type
6467 type_test
= lambda val
: val
6469 for path
in path_list
:
6471 val
= _traverse_obj(obj
, path
)
6474 for _
in range(depth
- 1):
6475 val
= itertools
.chain
.from_iterable(v
for v
in val
if v
is not None)
6476 val
= [v
for v
in map(type_test
, val
) if v
is not None]
6478 return val
if get_all
else val
[0]
6480 val
= type_test(val
)
6486 def traverse_dict(dictn
, keys
, casesense
=True):
6487 ''' For backward compatibility. Do not use '''
6488 return traverse_obj(dictn
, keys
, casesense
=casesense
,
6489 is_user_input
=True, traverse_string
=True)
6492 def variadic(x
, allowed_types
=(str, bytes)):
6493 return x
if isinstance(x
, collections
.abc
.Iterable
) and not isinstance(x
, allowed_types
) else (x
,)
6496 # create a JSON Web Signature (jws) with HS256 algorithm
6497 # the resulting format is in JWS Compact Serialization
6498 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6499 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6500 def jwt_encode_hs256(payload_data
, key
, headers
={}):
6506 header_data
.update(headers
)
6507 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode('utf-8'))
6508 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode('utf-8'))
6509 h
= hmac
.new(key
.encode('utf-8'), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
6510 signature_b64
= base64
.b64encode(h
.digest())
6511 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
6515 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6516 def jwt_decode_hs256(jwt
):
6517 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
6518 payload_data
= json
.loads(base64
.urlsafe_b64decode(payload_b64
))
6522 def supports_terminal_sequences(stream
):
6523 if compat_os_name
== 'nt':
6524 if get_windows_version() < (10, 0, 10586):
6526 elif not os
.getenv('TERM'):
6529 return stream
.isatty()
6530 except BaseException
:
6534 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
6537 def remove_terminal_sequences(string
):
6538 return _terminal_sequences_re
.sub('', string
)
6541 def number_of_digits(number
):
6542 return len('%d' % number
)