4 from __future__
import unicode_literals
39 import xml
.etree
.ElementTree
44 compat_HTMLParseError
,
50 compat_ctypes_WINFUNCTYPE
,
51 compat_etree_fromstring
,
54 compat_html_entities_html5
,
67 compat_urllib_parse_urlencode
,
68 compat_urllib_parse_urlparse
,
69 compat_urllib_parse_urlunparse
,
70 compat_urllib_parse_quote
,
71 compat_urllib_parse_quote_plus
,
72 compat_urllib_parse_unquote_plus
,
73 compat_urllib_request
,
84 def register_socks_protocols():
85 # "Register" SOCKS protocols
86 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
87 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
88 for scheme
in ('socks', 'socks4', 'socks4a', 'socks5'):
89 if scheme
not in compat_urlparse
.uses_netloc
:
90 compat_urlparse
.uses_netloc
.append(scheme
)
93 # This is not clearly defined otherwise
94 compiled_regex_type
= type(re
.compile(''))
97 def random_user_agent():
98 _USER_AGENT_TPL
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
1677 return _USER_AGENT_TPL
% random
.choice(_CHROME_VERSIONS
)
1681 'User-Agent': random_user_agent(),
1682 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1683 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1684 'Accept-Encoding': 'gzip, deflate',
1685 'Accept-Language': 'en-us,en;q=0.5',
1690 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1694 NO_DEFAULT
= object()
1696 ENGLISH_MONTH_NAMES
= [
1697 'January', 'February', 'March', 'April', 'May', 'June',
1698 'July', 'August', 'September', 'October', 'November', 'December']
1701 'en': ENGLISH_MONTH_NAMES
,
1703 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1704 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1707 KNOWN_EXTENSIONS
= (
1708 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1709 'flv', 'f4v', 'f4a', 'f4b',
1710 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1711 'mkv', 'mka', 'mk3d',
1714 'asf', 'wmv', 'wma',
1720 'f4f', 'f4m', 'm3u8', 'smil')
1722 # needed for sanitizing filenames in restricted mode
1723 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1724 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1725 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1749 '%Y/%m/%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S',
1754 '%Y-%m-%d %H:%M:%S.%f',
1755 '%Y-%m-%d %H:%M:%S:%f',
1758 '%Y-%m-%dT%H:%M:%SZ',
1759 '%Y-%m-%dT%H:%M:%S.%fZ',
1760 '%Y-%m-%dT%H:%M:%S.%f0Z',
1761 '%Y-%m-%dT%H:%M:%S',
1762 '%Y-%m-%dT%H:%M:%S.%f',
1764 '%b %d %Y at %H:%M',
1765 '%b %d %Y at %H:%M:%S',
1766 '%B %d %Y at %H:%M',
1767 '%B %d %Y at %H:%M:%S',
1771 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
1772 DATE_FORMATS_DAY_FIRST
.extend([
1778 '%d/%m/%Y %H:%M:%S',
1781 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
1782 DATE_FORMATS_MONTH_FIRST
.extend([
1787 '%m/%d/%Y %H:%M:%S',
1790 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1791 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>'
1794 def preferredencoding():
1795 """Get preferred encoding.
1797 Returns the best encoding scheme for the system, based on
1798 locale.getpreferredencoding() and some further tweaks.
1801 pref = locale.getpreferredencoding()
1809 def write_json_file(obj, fn):
1810 """ Encode obj as JSON and write it to fn, atomically if possible """
1812 fn = encodeFilename(fn)
1813 if sys.version_info < (3, 0) and sys.platform != 'win32
':
1814 encoding = get_filesystem_encoding()
1815 # os.path.basename returns a bytes object, but NamedTemporaryFile
1816 # will fail if the filename contains non ascii characters unless we
1817 # use a unicode object
1818 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1819 # the same for os.path.dirname
1820 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1822 path_basename = os.path.basename
1823 path_dirname = os.path.dirname
1827 'prefix
': path_basename(fn) + '.',
1828 'dir': path_dirname(fn),
1832 # In Python 2.x, json.dump expects a bytestream.
1833 # In Python 3.x, it writes to a character stream
1834 if sys.version_info < (3, 0):
1839 'encoding
': 'utf
-8',
1842 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1847 if sys.platform == 'win32
':
1848 # Need to remove existing file on Windows, else os.rename raises
1849 # WindowsError or FileExistsError.
1857 os.chmod(tf.name, 0o666 & ~mask)
1860 os.rename(tf.name, fn)
1869 if sys.version_info >= (2, 7):
1870 def find_xpath_attr(node, xpath, key, val=None):
1871 """ Find the xpath xpath[@key=val] """
1872 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
1873 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1874 return node.find(expr)
1876 def find_xpath_attr(node, xpath, key, val=None):
1877 for f in node.findall(compat_xpath(xpath)):
1878 if key not in f.attrib:
1880 if val is None or f.attrib.get(key) == val:
1884 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1885 # the namespace parameter
1888 def xpath_with_ns(path
, ns_map
):
1889 components
= [c
.split(':') for c
in path
.split('/')]
1891 for c
in components
:
1893 replaced
.append(c
[0])
1896 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
1897 return '/'.join(replaced
)
1900 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1901 def _find_xpath(xpath
):
1902 return node
.find(compat_xpath(xpath
))
1904 if isinstance(xpath
, (str, compat_str
)):
1905 n
= _find_xpath(xpath
)
1913 if default
is not NO_DEFAULT
:
1916 name
= xpath
if name
is None else name
1917 raise ExtractorError('Could not find XML element %s' % name
)
1923 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1924 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
1925 if n
is None or n
== default
:
1928 if default
is not NO_DEFAULT
:
1931 name
= xpath
if name
is None else name
1932 raise ExtractorError('Could not find XML element\'s text %s' % name
)
1938 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
1939 n
= find_xpath_attr(node
, xpath
, key
)
1941 if default
is not NO_DEFAULT
:
1944 name
= '%s[@%s]' % (xpath
, key
) if name
is None else name
1945 raise ExtractorError('Could not find XML attribute %s' % name
)
1948 return n
.attrib
[key
]
1951 def get_element_by_id(id, html
):
1952 """Return the content of the tag with the specified ID in the passed HTML document"""
1953 return get_element_by_attribute('id', id, html
)
1956 def get_element_by_class(class_name
, html
):
1957 """Return the content of the first tag with the specified class in the passed HTML document"""
1958 retval
= get_elements_by_class(class_name
, html
)
1959 return retval
[0] if retval
else None
1962 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True):
1963 retval
= get_elements_by_attribute(attribute
, value
, html
, escape_value
)
1964 return retval
[0] if retval
else None
1967 def get_elements_by_class(class_name
, html
):
1968 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1969 return get_elements_by_attribute(
1970 'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1971 html, escape_value=False)
1974 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1975 """Return the content of the tag with the specified attribute in the passed HTML document"""
1977 value = re.escape(value) if escape_value else value
1980 for m in re.finditer(r'''(?xs)
1982 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
1984 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*?
1988 ''' % (re.escape(attribute), value), html):
1989 res = m.group('content
')
1991 if res.startswith('"') or res.startswith("'"):
1994 retlist.append(unescapeHTML(res))
1999 class HTMLAttributeParser(compat_HTMLParser):
2000 """Trivial HTML parser to gather the attributes for a single element"""
2004 compat_HTMLParser.__init__(self)
2006 def handle_starttag(self, tag, attrs):
2007 self.attrs = dict(attrs)
2010 class HTMLListAttrsParser(compat_HTMLParser):
2011 """HTML parser to gather the attributes for the elements of a list"""
2014 compat_HTMLParser.__init__(self)
2018 def handle_starttag(self, tag, attrs):
2019 if tag == 'li
' and self._level == 0:
2020 self.items.append(dict(attrs))
2023 def handle_endtag(self, tag):
2027 def extract_attributes(html_element):
2028 """Given a string for an HTML element such as
2030 a="foo" B="bar" c="&98;az" d=boz
2031 empty= noval entity="&"
2034 Decode and return a dictionary of attributes.
2036 'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
',
2037 'empty
': '', 'noval
': None, 'entity
': '&',
2038 'sq
': '"', 'dq': '\''
2040 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2041 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2043 parser = HTMLAttributeParser()
2045 parser.feed(html_element)
2047 # Older Python may throw HTMLParseError in case of malformed HTML
2048 except compat_HTMLParseError:
2053 def parse_list(webpage):
2054 """Given a string for an series of HTML <li> elements,
2055 return a dictionary of their attributes"""
2056 parser = HTMLListAttrsParser()
2057 parser.feed(webpage)
2062 def clean_html(html):
2063 """Clean an HTML snippet into a readable string"""
2065 if html is None: # Convenience for sanitizing descriptions etc.
2069 html = html.replace('\n', ' ')
2070 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2071 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2073 html = re.sub('<.*?>', '', html)
2074 # Replace html entities
2075 html = unescapeHTML(html)
2079 def sanitize_open(filename, open_mode):
2080 """Try to open the given filename, and slightly tweak it if this fails.
2082 Attempts to open the given filename. If this fails, it tries to change
2083 the filename slightly, step by step, until it's either able to open it
2084 or it fails and raises a final exception, like the standard open()
2087 It returns the tuple (stream, definitive_file_name).
2091 if sys.platform == 'win32':
2093 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2094 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2095 stream = open(encodeFilename(filename), open_mode)
2096 return (stream, filename)
2097 except (IOError, OSError) as err:
2098 if err.errno in (errno.EACCES,):
2101 # In case of error, try to remove win32 forbidden chars
2102 alt_filename = sanitize_path(filename)
2103 if alt_filename == filename:
2106 # An exception here should be caught in the caller
2107 stream = open(encodeFilename(alt_filename), open_mode)
2108 return (stream, alt_filename)
2111 def timeconvert(timestr):
2112 """Convert RFC 2822 defined time string into system timestamp"""
2114 timetuple = email.utils.parsedate_tz(timestr)
2115 if timetuple is not None:
2116 timestamp = email.utils.mktime_tz(timetuple)
2120 def sanitize_filename(s, restricted=False, is_id=False):
2121 """Sanitizes a string so it could be used as part of a filename.
2122 If restricted is set, use a stricter subset of allowed characters.
2123 Set is_id if this is not an arbitrary string, but an ID that should be kept
2126 def replace_insane(char):
2127 if restricted and char in ACCENT_CHARS:
2128 return ACCENT_CHARS[char]
2129 elif not restricted and char == '\n':
2131 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2134 return '' if restricted else '\''
2136 return '_
-' if restricted else ' -'
2137 elif char in '\\/|
*<>':
2139 if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()):
2141 if restricted
and ord(char
) > 127:
2148 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
)
2149 result
= ''.join(map(replace_insane
, s
))
2151 while '__' in result
:
2152 result
= result
.replace('__', '_')
2153 result
= result
.strip('_')
2154 # Common case of "Foreign band name - English song title"
2155 if restricted
and result
.startswith('-_'):
2157 if result
.startswith('-'):
2158 result
= '_' + result
[len('-'):]
2159 result
= result
.lstrip('.')
2165 def sanitize_path(s
, force
=False):
2166 """Sanitizes and normalizes path on Windows"""
2167 if sys
.platform
== 'win32':
2169 drive_or_unc
, _
= os
.path
.splitdrive(s
)
2170 if sys
.version_info
< (2, 7) and not drive_or_unc
:
2171 drive_or_unc
, _
= os
.path
.splitunc(s
)
2177 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
2181 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
2182 for path_part
in norm_path
]
2184 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
2185 elif force
and s
[0] == os
.path
.sep
:
2186 sanitized_path
.insert(0, os
.path
.sep
)
2187 return os
.path
.join(*sanitized_path
)
2190 def sanitize_url(url
):
2191 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2192 # the number of unwanted failures due to missing protocol
2193 if url
.startswith('//'):
2194 return 'http:%s' % url
2195 # Fix some common typos seen so far
2197 # https://github.com/ytdl-org/youtube-dl/issues/15649
2198 (r
'^httpss://', r
'https://'),
2199 # https://bx1.be/lives/direct-tv/
2200 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
2202 for mistake
, fixup
in COMMON_TYPOS
:
2203 if re
.match(mistake
, url
):
2204 return re
.sub(mistake
, fixup
, url
)
2208 def extract_basic_auth(url
):
2209 parts
= compat_urlparse
.urlsplit(url
)
2210 if parts
.username
is None:
2212 url
= compat_urlparse
.urlunsplit(parts
._replace
(netloc
=(
2213 parts
.hostname
if parts
.port
is None
2214 else '%s:%d' % (parts
.hostname
, parts
.port
))))
2215 auth_payload
= base64
.b64encode(
2216 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode('utf-8'))
2217 return url
, 'Basic ' + auth_payload
.decode('utf-8')
2220 def sanitized_Request(url
, *args
, **kwargs
):
2221 url
, auth_header
= extract_basic_auth(escape_url(sanitize_url(url
)))
2222 if auth_header
is not None:
2223 headers
= args
[1] if len(args
) >= 2 else kwargs
.setdefault('headers', {})
2224 headers
['Authorization'] = auth_header
2225 return compat_urllib_request
.Request(url
, *args
, **kwargs
)
2229 """Expand shell variables and ~"""
2230 return os
.path
.expandvars(compat_expanduser(s
))
2233 def orderedSet(iterable
):
2234 """ Remove all duplicates from the input iterable """
2242 def _htmlentity_transform(entity_with_semicolon
):
2243 """Transforms an HTML entity to a character."""
2244 entity
= entity_with_semicolon
[:-1]
2246 # Known non-numeric HTML entity
2247 if entity
in compat_html_entities
.name2codepoint
:
2248 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
2250 # TODO: HTML5 allows entities without a semicolon. For example,
2251 # 'Éric' should be decoded as 'Éric'.
2252 if entity_with_semicolon
in compat_html_entities_html5
:
2253 return compat_html_entities_html5
[entity_with_semicolon
]
2255 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
2256 if mobj
is not None:
2257 numstr
= mobj
.group(1)
2258 if numstr
.startswith('x'):
2260 numstr
= '0%s' % numstr
2263 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2265 return compat_chr(int(numstr
, base
))
2269 # Unknown entity in name, return its literal representation
2270 return '&%s;' % entity
2273 def unescapeHTML(s
):
2276 assert type(s
) == compat_str
2279 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
2282 def escapeHTML(text
):
2285 .replace('&', '&')
2286 .replace('<', '<')
2287 .replace('>', '>')
2288 .replace('"', '"')
2289 .replace("'", ''')
2293 def process_communicate_or_kill(p
, *args
, **kwargs
):
2295 return p
.communicate(*args
, **kwargs
)
2296 except BaseException
: # Including KeyboardInterrupt
2302 class Popen(subprocess
.Popen
):
2303 if sys
.platform
== 'win32':
2304 _startupinfo
= subprocess
.STARTUPINFO()
2305 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
2309 def __init__(self
, *args
, **kwargs
):
2310 super(Popen
, self
).__init
__(*args
, **kwargs
, startupinfo
=self
._startupinfo
)
2312 def communicate_or_kill(self
, *args
, **kwargs
):
2313 return process_communicate_or_kill(self
, *args
, **kwargs
)
2316 def get_subprocess_encoding():
2317 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
2318 # For subprocess calls, encode with locale encoding
2319 # Refer to http://stackoverflow.com/a/9951851/35070
2320 encoding
= preferredencoding()
2322 encoding
= sys
.getfilesystemencoding()
2323 if encoding
is None:
2328 def encodeFilename(s
, for_subprocess
=False):
2330 @param s The name of the file
2333 assert type(s
) == compat_str
2335 # Python 3 has a Unicode API
2336 if sys
.version_info
>= (3, 0):
2339 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2340 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2341 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2342 if not for_subprocess
and sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
2345 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2346 if sys
.platform
.startswith('java'):
2349 return s
.encode(get_subprocess_encoding(), 'ignore')
2352 def decodeFilename(b
, for_subprocess
=False):
2354 if sys
.version_info
>= (3, 0):
2357 if not isinstance(b
, bytes):
2360 return b
.decode(get_subprocess_encoding(), 'ignore')
2363 def encodeArgument(s
):
2364 if not isinstance(s
, compat_str
):
2365 # Legacy code that uses byte strings
2366 # Uncomment the following line after fixing all post processors
2367 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2368 s
= s
.decode('ascii')
2369 return encodeFilename(s
, True)
2372 def decodeArgument(b
):
2373 return decodeFilename(b
, True)
2376 def decodeOption(optval
):
2379 if isinstance(optval
, bytes):
2380 optval
= optval
.decode(preferredencoding())
2382 assert isinstance(optval
, compat_str
)
2386 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2389 def timetuple_from_msec(msec
):
2390 secs
, msec
= divmod(msec
, 1000)
2391 mins
, secs
= divmod(secs
, 60)
2392 hrs
, mins
= divmod(mins
, 60)
2393 return _timetuple(hrs
, mins
, secs
, msec
)
2396 def formatSeconds(secs
, delim
=':', msec
=False):
2397 time
= timetuple_from_msec(secs
* 1000)
2399 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
2401 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
2403 ret
= '%d' % time
.seconds
2404 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
2407 def _ssl_load_windows_store_certs(ssl_context
, storename
):
2408 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2410 certs
= [cert
for cert
, encoding
, trust
in ssl
.enum_certificates(storename
)
2411 if encoding
== 'x509_asn' and (
2412 trust
is True or ssl
.Purpose
.SERVER_AUTH
.oid
in trust
)]
2413 except PermissionError
:
2417 ssl_context
.load_verify_locations(cadata
=cert
)
2418 except ssl
.SSLError
:
2422 def make_HTTPS_handler(params
, **kwargs
):
2423 opts_check_certificate
= not params
.get('nocheckcertificate')
2424 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLS_CLIENT
)
2425 context
.check_hostname
= opts_check_certificate
2426 context
.verify_mode
= ssl
.CERT_REQUIRED
if opts_check_certificate
else ssl
.CERT_NONE
2427 if opts_check_certificate
:
2429 context
.load_default_certs()
2430 # Work around the issue in load_default_certs when there are bad certificates. See:
2431 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2432 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2433 except ssl
.SSLError
:
2434 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2435 if sys
.platform
== 'win32' and hasattr(ssl
, 'enum_certificates'):
2436 # Create a new context to discard any certificates that were already loaded
2437 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLS_CLIENT
)
2438 context
.check_hostname
, context
.verify_mode
= True, ssl
.CERT_REQUIRED
2439 for storename
in ('CA', 'ROOT'):
2440 _ssl_load_windows_store_certs(context
, storename
)
2441 context
.set_default_verify_paths()
2442 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
2445 def bug_reports_message(before
=';'):
2446 if ytdl_is_updateable():
2447 update_cmd
= 'type yt-dlp -U to update'
2449 update_cmd
= 'see https://github.com/yt-dlp/yt-dlp on how to update'
2450 msg
= 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2451 msg
+= ' Make sure you are using the latest version; %s.' % update_cmd
2452 msg
+= ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2454 before
= before
.rstrip()
2455 if not before
or before
.endswith(('.', '!', '?')):
2456 msg
= msg
[0].title() + msg
[1:]
2458 return (before
+ ' ' if before
else '') + msg
2461 class YoutubeDLError(Exception):
2462 """Base exception for YoutubeDL errors."""
2465 def __init__(self
, msg
=None):
2468 elif self
.msg
is None:
2469 self
.msg
= type(self
).__name
__
2470 super().__init
__(self
.msg
)
2473 network_exceptions
= [compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
]
2474 if hasattr(ssl
, 'CertificateError'):
2475 network_exceptions
.append(ssl
.CertificateError
)
2476 network_exceptions
= tuple(network_exceptions
)
2479 class ExtractorError(YoutubeDLError
):
2480 """Error during info extraction."""
2482 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
2483 """ tb, if given, is the original traceback (so that it can be printed out).
2484 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2486 if sys
.exc_info()[0] in network_exceptions
:
2491 self
.expected
= expected
2493 self
.video_id
= video_id
2495 self
.exc_info
= sys
.exc_info() # preserve original exception
2497 super(ExtractorError
, self
).__init
__(''.join((
2498 format_field(ie
, template
='[%s] '),
2499 format_field(video_id
, template
='%s: '),
2501 format_field(cause
, template
=' (caused by %r)'),
2502 '' if expected
else bug_reports_message())))
2504 def format_traceback(self
):
2505 if self
.traceback
is None:
2507 return ''.join(traceback
.format_tb(self
.traceback
))
2510 class UnsupportedError(ExtractorError
):
2511 def __init__(self
, url
):
2512 super(UnsupportedError
, self
).__init
__(
2513 'Unsupported URL: %s' % url
, expected
=True)
2517 class RegexNotFoundError(ExtractorError
):
2518 """Error when a regex didn't match"""
2522 class GeoRestrictedError(ExtractorError
):
2523 """Geographic restriction Error exception.
2525 This exception may be thrown when a video is not available from your
2526 geographic location due to geographic restrictions imposed by a website.
2529 def __init__(self
, msg
, countries
=None, **kwargs
):
2530 kwargs
['expected'] = True
2531 super(GeoRestrictedError
, self
).__init
__(msg
, **kwargs
)
2532 self
.countries
= countries
2535 class DownloadError(YoutubeDLError
):
2536 """Download Error exception.
2538 This exception may be thrown by FileDownloader objects if they are not
2539 configured to continue on errors. They will contain the appropriate
2543 def __init__(self
, msg
, exc_info
=None):
2544 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2545 super(DownloadError
, self
).__init
__(msg
)
2546 self
.exc_info
= exc_info
2549 class EntryNotInPlaylist(YoutubeDLError
):
2550 """Entry not in playlist exception.
2552 This exception will be thrown by YoutubeDL when a requested entry
2553 is not found in the playlist info_dict
2555 msg
= 'Entry not found in info'
2558 class SameFileError(YoutubeDLError
):
2559 """Same File exception.
2561 This exception will be thrown by FileDownloader objects if they detect
2562 multiple files would have to be downloaded to the same file on disk.
2564 msg
= 'Fixed output name but more than one file to download'
2566 def __init__(self
, filename
=None):
2567 if filename
is not None:
2568 self
.msg
+= f
': {filename}'
2569 super().__init
__(self
.msg
)
2572 class PostProcessingError(YoutubeDLError
):
2573 """Post Processing exception.
2575 This exception may be raised by PostProcessor's .run() method to
2576 indicate an error in the postprocessing task.
2580 class DownloadCancelled(YoutubeDLError
):
2581 """ Exception raised when the download queue should be interrupted """
2582 msg
= 'The download was cancelled'
2585 class ExistingVideoReached(DownloadCancelled
):
2586 """ --break-on-existing triggered """
2587 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2590 class RejectedVideoReached(DownloadCancelled
):
2591 """ --break-on-reject triggered """
2592 msg
= 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2595 class MaxDownloadsReached(DownloadCancelled
):
2596 """ --max-downloads limit has been reached. """
2597 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
2600 class ReExtractInfo(YoutubeDLError
):
2601 """ Video info needs to be re-extracted. """
2603 def __init__(self
, msg
, expected
=False):
2604 super().__init
__(msg
)
2605 self
.expected
= expected
2608 class ThrottledDownload(ReExtractInfo
):
2609 """ Download speed below --throttled-rate. """
2610 msg
= 'The download speed is below throttle limit'
2613 super().__init
__(self
.msg
, expected
=False)
2616 class UnavailableVideoError(YoutubeDLError
):
2617 """Unavailable Format exception.
2619 This exception will be thrown when a video is requested
2620 in a format that is not available for that video.
2622 msg
= 'Unable to download video'
2624 def __init__(self
, err
=None):
2626 self
.msg
+= f
': {err}'
2627 super().__init
__(self
.msg
)
2630 class ContentTooShortError(YoutubeDLError
):
2631 """Content Too Short exception.
2633 This exception may be raised by FileDownloader objects when a file they
2634 download is too small for what the server announced first, indicating
2635 the connection was probably interrupted.
2638 def __init__(self
, downloaded
, expected
):
2639 super(ContentTooShortError
, self
).__init
__(
2640 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
)
2643 self
.downloaded
= downloaded
2644 self
.expected
= expected
2647 class XAttrMetadataError(YoutubeDLError
):
2648 def __init__(self
, code
=None, msg
='Unknown error'):
2649 super(XAttrMetadataError
, self
).__init
__(msg
)
2653 # Parsing code and msg
2654 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
2655 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
2656 self
.reason
= 'NO_SPACE'
2657 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
2658 self
.reason
= 'VALUE_TOO_LONG'
2660 self
.reason
= 'NOT_SUPPORTED'
2663 class XAttrUnavailableError(YoutubeDLError
):
2667 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
):
2668 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2669 # expected HTTP responses to meet HTTP/1.0 or later (see also
2670 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2671 if sys
.version_info
< (3, 0):
2672 kwargs
['strict'] = True
2673 hc
= http_class(*args
, **compat_kwargs(kwargs
))
2674 source_address
= ydl_handler
._params
.get('source_address')
2676 if source_address
is not None:
2677 # This is to workaround _create_connection() from socket where it will try all
2678 # address data from getaddrinfo() including IPv6. This filters the result from
2679 # getaddrinfo() based on the source_address value.
2680 # This is based on the cpython socket.create_connection() function.
2681 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2682 def _create_connection(address
, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None):
2683 host
, port
= address
2685 addrs
= socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
)
2686 af
= socket
.AF_INET
if '.' in source_address
[0] else socket
.AF_INET6
2687 ip_addrs
= [addr
for addr
in addrs
if addr
[0] == af
]
2688 if addrs
and not ip_addrs
:
2689 ip_version
= 'v4' if af
== socket
.AF_INET
else 'v6'
2691 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2692 % (ip_version
, source_address
[0]))
2693 for res
in ip_addrs
:
2694 af
, socktype
, proto
, canonname
, sa
= res
2697 sock
= socket
.socket(af
, socktype
, proto
)
2698 if timeout
is not socket
._GLOBAL
_DEFAULT
_TIMEOUT
:
2699 sock
.settimeout(timeout
)
2700 sock
.bind(source_address
)
2702 err
= None # Explicitly break reference cycle
2704 except socket
.error
as _
:
2706 if sock
is not None:
2711 raise socket
.error('getaddrinfo returns an empty list')
2712 if hasattr(hc
, '_create_connection'):
2713 hc
._create
_connection
= _create_connection
2714 sa
= (source_address
, 0)
2715 if hasattr(hc
, 'source_address'): # Python 2.7+
2716 hc
.source_address
= sa
2718 def _hc_connect(self
, *args
, **kwargs
):
2719 sock
= _create_connection(
2720 (self
.host
, self
.port
), self
.timeout
, sa
)
2722 self
.sock
= ssl
.wrap_socket(
2723 sock
, self
.key_file
, self
.cert_file
,
2724 ssl_version
=ssl
.PROTOCOL_TLSv1
)
2727 hc
.connect
= functools
.partial(_hc_connect
, hc
)
2732 def handle_youtubedl_headers(headers
):
2733 filtered_headers
= headers
2735 if 'Youtubedl-no-compression' in filtered_headers
:
2736 filtered_headers
= dict((k
, v
) for k
, v
in filtered_headers
.items() if k
.lower() != 'accept-encoding')
2737 del filtered_headers
['Youtubedl-no-compression']
2739 return filtered_headers
2742 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
2743 """Handler for HTTP requests and responses.
2745 This class, when installed with an OpenerDirector, automatically adds
2746 the standard headers to every HTTP request and handles gzipped and
2747 deflated responses from web servers. If compression is to be avoided in
2748 a particular request, the original request in the program code only has
2749 to include the HTTP header "Youtubedl-no-compression", which will be
2750 removed before making the real request.
2752 Part of this code was copied from:
2754 http://techknack.net/python-urllib2-handlers/
2756 Andrew Rowls, the author of that code, agreed to release it to the
2760 def __init__(self
, params
, *args
, **kwargs
):
2761 compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
)
2762 self
._params
= params
2764 def http_open(self
, req
):
2765 conn_class
= compat_http_client
.HTTPConnection
2767 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
2769 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
2770 del req
.headers
['Ytdl-socks-proxy']
2772 return self
.do_open(functools
.partial(
2773 _create_http_connection
, self
, conn_class
, False),
2781 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
2783 return zlib
.decompress(data
)
2785 def http_request(self
, req
):
2786 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2787 # always respected by websites, some tend to give out URLs with non percent-encoded
2788 # non-ASCII characters (see telemb.py, ard.py [#3412])
2789 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2790 # To work around aforementioned issue we will replace request's original URL with
2791 # percent-encoded one
2792 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2793 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2794 url
= req
.get_full_url()
2795 url_escaped
= escape_url(url
)
2797 # Substitute URL if any change after escaping
2798 if url
!= url_escaped
:
2799 req
= update_Request(req
, url
=url_escaped
)
2801 for h
, v
in std_headers
.items():
2802 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2803 # The dict keys are capitalized because of this bug by urllib
2804 if h
.capitalize() not in req
.headers
:
2805 req
.add_header(h
, v
)
2807 req
.headers
= handle_youtubedl_headers(req
.headers
)
2809 if sys
.version_info
< (2, 7) and '#' in req
.get_full_url():
2810 # Python 2.6 is brain-dead when it comes to fragments
2811 req
._Request
__original
= req
._Request
__original
.partition('#')[0]
2812 req
._Request
__r
_type
= req
._Request
__r
_type
.partition('#')[0]
2816 def http_response(self
, req
, resp
):
2819 if resp
.headers
.get('Content-encoding', '') == 'gzip':
2820 content
= resp
.read()
2821 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
2823 uncompressed
= io
.BytesIO(gz
.read())
2824 except IOError as original_ioerror
:
2825 # There may be junk add the end of the file
2826 # See http://stackoverflow.com/q/4928560/35070 for details
2827 for i
in range(1, 1024):
2829 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
2830 uncompressed
= io
.BytesIO(gz
.read())
2835 raise original_ioerror
2836 resp
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
2837 resp
.msg
= old_resp
.msg
2838 del resp
.headers
['Content-encoding']
2840 if resp
.headers
.get('Content-encoding', '') == 'deflate':
2841 gz
= io
.BytesIO(self
.deflate(resp
.read()))
2842 resp
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
2843 resp
.msg
= old_resp
.msg
2844 del resp
.headers
['Content-encoding']
2845 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2846 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2847 if 300 <= resp
.code
< 400:
2848 location
= resp
.headers
.get('Location')
2850 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2851 if sys
.version_info
>= (3, 0):
2852 location
= location
.encode('iso-8859-1').decode('utf-8')
2854 location
= location
.decode('utf-8')
2855 location_escaped
= escape_url(location
)
2856 if location
!= location_escaped
:
2857 del resp
.headers
['Location']
2858 if sys
.version_info
< (3, 0):
2859 location_escaped
= location_escaped
.encode('utf-8')
2860 resp
.headers
['Location'] = location_escaped
2863 https_request
= http_request
2864 https_response
= http_response
2867 def make_socks_conn_class(base_class
, socks_proxy
):
2868 assert issubclass(base_class
, (
2869 compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
))
2871 url_components
= compat_urlparse
.urlparse(socks_proxy
)
2872 if url_components
.scheme
.lower() == 'socks5':
2873 socks_type
= ProxyType
.SOCKS5
2874 elif url_components
.scheme
.lower() in ('socks', 'socks4'):
2875 socks_type
= ProxyType
.SOCKS4
2876 elif url_components
.scheme
.lower() == 'socks4a':
2877 socks_type
= ProxyType
.SOCKS4A
2879 def unquote_if_non_empty(s
):
2882 return compat_urllib_parse_unquote_plus(s
)
2886 url_components
.hostname
, url_components
.port
or 1080,
2888 unquote_if_non_empty(url_components
.username
),
2889 unquote_if_non_empty(url_components
.password
),
2892 class SocksConnection(base_class
):
2894 self
.sock
= sockssocket()
2895 self
.sock
.setproxy(*proxy_args
)
2896 if type(self
.timeout
) in (int, float):
2897 self
.sock
.settimeout(self
.timeout
)
2898 self
.sock
.connect((self
.host
, self
.port
))
2900 if isinstance(self
, compat_http_client
.HTTPSConnection
):
2901 if hasattr(self
, '_context'): # Python > 2.6
2902 self
.sock
= self
._context
.wrap_socket(
2903 self
.sock
, server_hostname
=self
.host
)
2905 self
.sock
= ssl
.wrap_socket(self
.sock
)
2907 return SocksConnection
2910 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
):
2911 def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
):
2912 compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
)
2913 self
._https
_conn
_class
= https_conn_class
or compat_http_client
.HTTPSConnection
2914 self
._params
= params
2916 def https_open(self
, req
):
2918 conn_class
= self
._https
_conn
_class
2920 if hasattr(self
, '_context'): # python > 2.6
2921 kwargs
['context'] = self
._context
2922 if hasattr(self
, '_check_hostname'): # python 3.x
2923 kwargs
['check_hostname'] = self
._check
_hostname
2925 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
2927 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
2928 del req
.headers
['Ytdl-socks-proxy']
2930 return self
.do_open(functools
.partial(
2931 _create_http_connection
, self
, conn_class
, True),
2935 class YoutubeDLCookieJar(compat_cookiejar
.MozillaCookieJar
):
2937 See [1] for cookie file format.
2939 1. https://curl.haxx.se/docs/http-cookies.html
2941 _HTTPONLY_PREFIX
= '#HttpOnly_'
2943 _HEADER
= '''# Netscape HTTP Cookie File
2944 # This file is generated by yt-dlp. Do not edit.
2947 _CookieFileEntry
= collections
.namedtuple(
2949 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2951 def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
2953 Save cookies to a file.
2955 Most of the code is taken from CPython 3.8 and slightly adapted
2956 to support cookie files with UTF-8 in both python 2 and 3.
2958 if filename
is None:
2959 if self
.filename
is not None:
2960 filename
= self
.filename
2962 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
)
2964 # Store session cookies with `expires` set to 0 instead of an empty
2967 if cookie
.expires
is None:
2970 with io
.open(filename
, 'w', encoding
='utf-8') as f
:
2971 f
.write(self
._HEADER
)
2974 if not ignore_discard
and cookie
.discard
:
2976 if not ignore_expires
and cookie
.is_expired(now
):
2982 if cookie
.domain
.startswith('.'):
2983 initial_dot
= 'TRUE'
2985 initial_dot
= 'FALSE'
2986 if cookie
.expires
is not None:
2987 expires
= compat_str(cookie
.expires
)
2990 if cookie
.value
is None:
2991 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2992 # with no name, whereas http.cookiejar regards it as a
2993 # cookie with no value.
2998 value
= cookie
.value
3000 '\t'.join([cookie
.domain
, initial_dot
, cookie
.path
,
3001 secure
, expires
, name
, value
]) + '\n')
3003 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
3004 """Load cookies from a file."""
3005 if filename
is None:
3006 if self
.filename
is not None:
3007 filename
= self
.filename
3009 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
)
3011 def prepare_line(line
):
3012 if line
.startswith(self
._HTTPONLY
_PREFIX
):
3013 line
= line
[len(self
._HTTPONLY
_PREFIX
):]
3014 # comments and empty lines are fine
3015 if line
.startswith('#') or not line
.strip():
3017 cookie_list
= line
.split('\t')
3018 if len(cookie_list
) != self
._ENTRY
_LEN
:
3019 raise compat_cookiejar
.LoadError('invalid length %d' % len(cookie_list
))
3020 cookie
= self
._CookieFileEntry
(*cookie_list
)
3021 if cookie
.expires_at
and not cookie
.expires_at
.isdigit():
3022 raise compat_cookiejar
.LoadError('invalid expires at %s' % cookie
.expires_at
)
3026 with io
.open(filename
, encoding
='utf-8') as f
:
3029 cf
.write(prepare_line(line
))
3030 except compat_cookiejar
.LoadError
as e
:
3032 'WARNING: skipping cookie file entry due to %s: %r\n'
3033 % (e
, line
), sys
.stderr
)
3036 self
._really
_load
(cf
, filename
, ignore_discard
, ignore_expires
)
3037 # Session cookies are denoted by either `expires` field set to
3038 # an empty string or 0. MozillaCookieJar only recognizes the former
3039 # (see [1]). So we need force the latter to be recognized as session
3040 # cookies on our own.
3041 # Session cookies may be important for cookies-based authentication,
3042 # e.g. usually, when user does not check 'Remember me' check box while
3043 # logging in on a site, some important cookies are stored as session
3044 # cookies so that not recognizing them will result in failed login.
3045 # 1. https://bugs.python.org/issue17164
3047 # Treat `expires=0` cookies as session cookies
3048 if cookie
.expires
== 0:
3049 cookie
.expires
= None
3050 cookie
.discard
= True
3053 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
):
3054 def __init__(self
, cookiejar
=None):
3055 compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
)
3057 def http_response(self
, request
, response
):
3058 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3059 # characters in Set-Cookie HTTP header of last response (see
3060 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3061 # In order to at least prevent crashing we will percent encode Set-Cookie
3062 # header before HTTPCookieProcessor starts processing it.
3063 # if sys.version_info < (3, 0) and response.headers:
3064 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3065 # set_cookie = response.headers.get(set_cookie_header)
3067 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3068 # if set_cookie != set_cookie_escaped:
3069 # del response.headers[set_cookie_header]
3070 # response.headers[set_cookie_header] = set_cookie_escaped
3071 return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
)
3073 https_request
= compat_urllib_request
.HTTPCookieProcessor
.http_request
3074 https_response
= http_response
3077 class YoutubeDLRedirectHandler(compat_urllib_request
.HTTPRedirectHandler
):
3078 """YoutubeDL redirect handler
3080 The code is based on HTTPRedirectHandler implementation from CPython [1].
3082 This redirect handler solves two issues:
3083 - ensures redirect URL is always unicode under python 2
3084 - introduces support for experimental HTTP response status code
3085 308 Permanent Redirect [2] used by some sites [3]
3087 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3088 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3089 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3092 http_error_301
= http_error_303
= http_error_307
= http_error_308
= compat_urllib_request
.HTTPRedirectHandler
.http_error_302
3094 def redirect_request(self
, req
, fp
, code
, msg
, headers
, newurl
):
3095 """Return a Request or None in response to a redirect.
3097 This is called by the http_error_30x methods when a
3098 redirection response is received. If a redirection should
3099 take place, return a new Request to allow http_error_30x to
3100 perform the redirect. Otherwise, raise HTTPError if no-one
3101 else should try to handle this url. Return None if you can't
3102 but another Handler might.
3104 m
= req
.get_method()
3105 if (not (code
in (301, 302, 303, 307, 308) and m
in ("GET", "HEAD")
3106 or code
in (301, 302, 303) and m
== "POST")):
3107 raise compat_HTTPError(req
.full_url
, code
, msg
, headers
, fp
)
3108 # Strictly (according to RFC 2616), 301 or 302 in response to
3109 # a POST MUST NOT cause a redirection without confirmation
3110 # from the user (of urllib.request, in this case). In practice,
3111 # essentially all clients do redirect in this case, so we do
3114 # On python 2 urlh.geturl() may sometimes return redirect URL
3115 # as byte string instead of unicode. This workaround allows
3116 # to force it always return unicode.
3117 if sys
.version_info
[0] < 3:
3118 newurl
= compat_str(newurl
)
3120 # Be conciliant with URIs containing a space. This is mainly
3121 # redundant with the more complete encoding done in http_error_302(),
3122 # but it is kept for compatibility with other callers.
3123 newurl
= newurl
.replace(' ', '%20')
3125 CONTENT_HEADERS
= ("content-length", "content-type")
3126 # NB: don't use dict comprehension for python 2.6 compatibility
3127 newheaders
= dict((k
, v
) for k
, v
in req
.headers
.items()
3128 if k
.lower() not in CONTENT_HEADERS
)
3129 return compat_urllib_request
.Request(
3130 newurl
, headers
=newheaders
, origin_req_host
=req
.origin_req_host
,
3134 def extract_timezone(date_str
):
3137 ^.{8,}? # >=8 char non-TZ prefix, if present
3138 (?P<tz>Z| # just the UTC Z, or
3139 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3140 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3141 [ ]? # optional space
3142 (?P<sign>\+|-) # +/-
3143 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3147 timezone
= datetime
.timedelta()
3149 date_str
= date_str
[:-len(m
.group('tz'))]
3150 if not m
.group('sign'):
3151 timezone
= datetime
.timedelta()
3153 sign
= 1 if m
.group('sign') == '+' else -1
3154 timezone
= datetime
.timedelta(
3155 hours
=sign
* int(m
.group('hours')),
3156 minutes
=sign
* int(m
.group('minutes')))
3157 return timezone
, date_str
3160 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
3161 """ Return a UNIX timestamp from the given date """
3163 if date_str
is None:
3166 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
3168 if timezone
is None:
3169 timezone
, date_str
= extract_timezone(date_str
)
3172 date_format
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
)
3173 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
3174 return calendar
.timegm(dt
.timetuple())
3179 def date_formats(day_first
=True):
3180 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
3183 def unified_strdate(date_str
, day_first
=True):
3184 """Return a string with the date in the format YYYYMMDD"""
3186 if date_str
is None:
3190 date_str
= date_str
.replace(',', ' ')
3191 # Remove AM/PM + timezone
3192 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
3193 _
, date_str
= extract_timezone(date_str
)
3195 for expression
in date_formats(day_first
):
3197 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
3200 if upload_date
is None:
3201 timetuple
= email
.utils
.parsedate_tz(date_str
)
3204 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
3207 if upload_date
is not None:
3208 return compat_str(upload_date
)
3211 def unified_timestamp(date_str
, day_first
=True):
3212 if date_str
is None:
3215 date_str
= re
.sub(r
'[,|]', '', date_str
)
3217 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
3218 timezone
, date_str
= extract_timezone(date_str
)
3220 # Remove AM/PM + timezone
3221 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
3223 # Remove unrecognized timezones from ISO 8601 alike timestamps
3224 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
3226 date_str
= date_str
[:-len(m
.group('tz'))]
3228 # Python only supports microseconds, so remove nanoseconds
3229 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
3231 date_str
= m
.group(1)
3233 for expression
in date_formats(day_first
):
3235 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
3236 return calendar
.timegm(dt
.timetuple())
3239 timetuple
= email
.utils
.parsedate_tz(date_str
)
3241 return calendar
.timegm(timetuple
) + pm_delta
* 3600
3244 def determine_ext(url
, default_ext
='unknown_video'):
3245 if url
is None or '.' not in url
:
3247 guess
= url
.partition('?')[0].rpartition('.')[2]
3248 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
3250 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3251 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
3252 return guess
.rstrip('/')
3257 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
3258 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
3261 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
3263 Return a datetime object from a string in the format YYYYMMDD or
3264 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3266 format: string date format used to return datetime object from
3267 precision: round the time portion of a datetime object.
3268 auto|microsecond|second|minute|hour|day.
3269 auto: round to the unit provided in date_str (if applicable).
3271 auto_precision
= False
3272 if precision
== 'auto':
3273 auto_precision
= True
3274 precision
= 'microsecond'
3275 today
= datetime_round(datetime
.datetime
.now(), precision
)
3276 if date_str
in ('now', 'today'):
3278 if date_str
== 'yesterday':
3279 return today
- datetime
.timedelta(days
=1)
3281 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3283 if match
is not None:
3284 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
3285 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
3286 unit
= match
.group('unit')
3287 if unit
== 'month' or unit
== 'year':
3288 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
3294 delta
= datetime
.timedelta(**{unit + 's': time}
)
3295 new_date
= start_time
+ delta
3297 return datetime_round(new_date
, unit
)
3300 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
3303 def date_from_str(date_str
, format
='%Y%m%d'):
3305 Return a datetime object from a string in the format YYYYMMDD or
3306 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3308 format: string date format used to return datetime object from
3310 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
3313 def datetime_add_months(dt
, months
):
3314 """Increment/Decrement a datetime object by months."""
3315 month
= dt
.month
+ months
- 1
3316 year
= dt
.year
+ month
// 12
3317 month
= month
% 12 + 1
3318 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
3319 return dt
.replace(year
, month
, day
)
3322 def datetime_round(dt
, precision
='day'):
3324 Round a datetime object's time to a specific precision
3326 if precision
== 'microsecond':
3335 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
3336 timestamp
= calendar
.timegm(dt
.timetuple())
3337 return datetime
.datetime
.utcfromtimestamp(roundto(timestamp
, unit_seconds
[precision
]))
3340 def hyphenate_date(date_str
):
3342 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3343 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
3344 if match
is not None:
3345 return '-'.join(match
.groups())
3350 class DateRange(object):
3351 """Represents a time interval between two dates"""
3353 def __init__(self
, start
=None, end
=None):
3354 """start and end must be strings in the format accepted by date"""
3355 if start
is not None:
3356 self
.start
= date_from_str(start
)
3358 self
.start
= datetime
.datetime
.min.date()
3360 self
.end
= date_from_str(end
)
3362 self
.end
= datetime
.datetime
.max.date()
3363 if self
.start
> self
.end
:
3364 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
3368 """Returns a range that only contains the given day"""
3369 return cls(day
, day
)
3371 def __contains__(self
, date
):
3372 """Check if the date is in the range"""
3373 if not isinstance(date
, datetime
.date
):
3374 date
= date_from_str(date
)
3375 return self
.start
<= date
<= self
.end
3378 return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat())
3381 def platform_name():
3382 """ Returns the platform name as a compat_str """
3383 res
= platform
.platform()
3384 if isinstance(res
, bytes):
3385 res
= res
.decode(preferredencoding())
3387 assert isinstance(res
, compat_str
)
3391 def get_windows_version():
3392 ''' Get Windows version. None if it's not running on Windows '''
3393 if compat_os_name
== 'nt':
3394 return version_tuple(platform
.win32_ver()[1])
3399 def _windows_write_string(s
, out
):
3400 """ Returns True if the string was written using special methods,
3401 False if it has yet to be written out."""
3402 # Adapted from http://stackoverflow.com/a/3259271/35070
3405 import ctypes
.wintypes
3413 fileno
= out
.fileno()
3414 except AttributeError:
3415 # If the output stream doesn't have a fileno, it's virtual
3417 except io
.UnsupportedOperation
:
3418 # Some strange Windows pseudo files?
3420 if fileno
not in WIN_OUTPUT_IDS
:
3423 GetStdHandle
= compat_ctypes_WINFUNCTYPE(
3424 ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)(
3425 ('GetStdHandle', ctypes
.windll
.kernel32
))
3426 h
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
])
3428 WriteConsoleW
= compat_ctypes_WINFUNCTYPE(
3429 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
,
3430 ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
),
3431 ctypes
.wintypes
.LPVOID
)(('WriteConsoleW', ctypes
.windll
.kernel32
))
3432 written
= ctypes
.wintypes
.DWORD(0)
3434 GetFileType
= compat_ctypes_WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(('GetFileType', ctypes
.windll
.kernel32
))
3435 FILE_TYPE_CHAR
= 0x0002
3436 FILE_TYPE_REMOTE
= 0x8000
3437 GetConsoleMode
= compat_ctypes_WINFUNCTYPE(
3438 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
,
3439 ctypes
.POINTER(ctypes
.wintypes
.DWORD
))(
3440 ('GetConsoleMode', ctypes
.windll
.kernel32
))
3441 INVALID_HANDLE_VALUE
= ctypes
.wintypes
.DWORD(-1).value
3443 def not_a_console(handle
):
3444 if handle
== INVALID_HANDLE_VALUE
or handle
is None:
3446 return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
3447 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0)
3449 if not_a_console(h
):
3452 def next_nonbmp_pos(s
):
3454 return next(i
for i
, c
in enumerate(s
) if ord(c
) > 0xffff)
3455 except StopIteration:
3459 count
= min(next_nonbmp_pos(s
), 1024)
3461 ret
= WriteConsoleW(
3462 h
, s
, count
if count
else 2, ctypes
.byref(written
), None)
3464 raise OSError('Failed to write string')
3465 if not count
: # We just wrote a non-BMP character
3466 assert written
.value
== 2
3469 assert written
.value
> 0
3470 s
= s
[written
.value
:]
3474 def write_string(s
, out
=None, encoding
=None):
3477 assert type(s
) == compat_str
3479 if sys
.platform
== 'win32' and encoding
is None and hasattr(out
, 'fileno'):
3480 if _windows_write_string(s
, out
):
3483 if ('b' in getattr(out
, 'mode', '')
3484 or sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
3485 byt
= s
.encode(encoding
or preferredencoding(), 'ignore')
3487 elif hasattr(out
, 'buffer'):
3488 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
3489 byt
= s
.encode(enc
, 'ignore')
3490 out
.buffer.write(byt
)
3496 def bytes_to_intlist(bs
):
3499 if isinstance(bs
[0], int): # Python 3
3502 return [ord(c
) for c
in bs
]
3505 def intlist_to_bytes(xs
):
3508 return compat_struct_pack('%dB' % len(xs
), *xs
)
3511 # Cross-platform file locking
3512 if sys
.platform
== 'win32':
3513 import ctypes
.wintypes
3516 class OVERLAPPED(ctypes
.Structure
):
3518 ('Internal', ctypes
.wintypes
.LPVOID
),
3519 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
3520 ('Offset', ctypes
.wintypes
.DWORD
),
3521 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
3522 ('hEvent', ctypes
.wintypes
.HANDLE
),
3525 kernel32
= ctypes
.windll
.kernel32
3526 LockFileEx
= kernel32
.LockFileEx
3527 LockFileEx
.argtypes
= [
3528 ctypes
.wintypes
.HANDLE
, # hFile
3529 ctypes
.wintypes
.DWORD
, # dwFlags
3530 ctypes
.wintypes
.DWORD
, # dwReserved
3531 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
3532 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
3533 ctypes
.POINTER(OVERLAPPED
) # Overlapped
3535 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
3536 UnlockFileEx
= kernel32
.UnlockFileEx
3537 UnlockFileEx
.argtypes
= [
3538 ctypes
.wintypes
.HANDLE
, # hFile
3539 ctypes
.wintypes
.DWORD
, # dwReserved
3540 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
3541 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
3542 ctypes
.POINTER(OVERLAPPED
) # Overlapped
3544 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
3545 whole_low
= 0xffffffff
3546 whole_high
= 0x7fffffff
3548 def _lock_file(f
, exclusive
):
3549 overlapped
= OVERLAPPED()
3550 overlapped
.Offset
= 0
3551 overlapped
.OffsetHigh
= 0
3552 overlapped
.hEvent
= 0
3553 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
3554 handle
= msvcrt
.get_osfhandle(f
.fileno())
3555 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
3556 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
3557 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
3559 def _unlock_file(f
):
3560 assert f
._lock
_file
_overlapped
_p
3561 handle
= msvcrt
.get_osfhandle(f
.fileno())
3562 if not UnlockFileEx(handle
, 0,
3563 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
3564 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
3567 # Some platforms, such as Jython, is missing fcntl
3571 def _lock_file(f
, exclusive
):
3572 fcntl
.flock(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
3574 def _unlock_file(f
):
3575 fcntl
.flock(f
, fcntl
.LOCK_UN
)
3577 UNSUPPORTED_MSG
= 'file locking is not supported on this platform'
3579 def _lock_file(f
, exclusive
):
3580 raise IOError(UNSUPPORTED_MSG
)
3582 def _unlock_file(f
):
3583 raise IOError(UNSUPPORTED_MSG
)
3586 class locked_file(object):
3587 def __init__(self
, filename
, mode
, encoding
=None):
3588 assert mode
in ['r', 'a', 'w']
3589 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
3592 def __enter__(self
):
3593 exclusive
= self
.mode
!= 'r'
3595 _lock_file(self
.f
, exclusive
)
3601 def __exit__(self
, etype
, value
, traceback
):
3603 _unlock_file(self
.f
)
3610 def write(self
, *args
):
3611 return self
.f
.write(*args
)
3613 def read(self
, *args
):
3614 return self
.f
.read(*args
)
3617 def get_filesystem_encoding():
3618 encoding
= sys
.getfilesystemencoding()
3619 return encoding
if encoding
is not None else 'utf-8'
3622 def shell_quote(args
):
3624 encoding
= get_filesystem_encoding()
3626 if isinstance(a
, bytes):
3627 # We may get a filename encoded with 'encodeFilename'
3628 a
= a
.decode(encoding
)
3629 quoted_args
.append(compat_shlex_quote(a
))
3630 return ' '.join(quoted_args
)
3633 def smuggle_url(url
, data
):
3634 """ Pass additional data in a URL for internal use. """
3636 url
, idata
= unsmuggle_url(url
, {})
3638 sdata
= compat_urllib_parse_urlencode(
3639 {'__youtubedl_smuggle': json.dumps(data)}
)
3640 return url
+ '#' + sdata
3643 def unsmuggle_url(smug_url
, default
=None):
3644 if '#__youtubedl_smuggle' not in smug_url
:
3645 return smug_url
, default
3646 url
, _
, sdata
= smug_url
.rpartition('#')
3647 jsond
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0]
3648 data
= json
.loads(jsond
)
3652 def format_bytes(bytes):
3655 if type(bytes) is str:
3656 bytes = float(bytes)
3660 exponent
= int(math
.log(bytes, 1024.0))
3661 suffix
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
]
3662 converted
= float(bytes) / float(1024 ** exponent
)
3663 return '%.2f%s' % (converted
, suffix
)
3666 def lookup_unit_table(unit_table
, s
):
3667 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
3669 r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
)
3672 num_str
= m
.group('num').replace(',', '.')
3673 mult
= unit_table
[m
.group('unit')]
3674 return int(float(num_str
) * mult
)
3677 def parse_filesize(s
):
3681 # The lower-case forms are of course incorrect and unofficial,
3682 # but we support those too
3699 'megabytes': 1000 ** 2,
3700 'mebibytes': 1024 ** 2,
3706 'gigabytes': 1000 ** 3,
3707 'gibibytes': 1024 ** 3,
3713 'terabytes': 1000 ** 4,
3714 'tebibytes': 1024 ** 4,
3720 'petabytes': 1000 ** 5,
3721 'pebibytes': 1024 ** 5,
3727 'exabytes': 1000 ** 6,
3728 'exbibytes': 1024 ** 6,
3734 'zettabytes': 1000 ** 7,
3735 'zebibytes': 1024 ** 7,
3741 'yottabytes': 1000 ** 8,
3742 'yobibytes': 1024 ** 8,
3745 return lookup_unit_table(_UNIT_TABLE
, s
)
3754 if re
.match(r
'^[\d,.]+$', s
):
3755 return str_to_int(s
)
3766 return lookup_unit_table(_UNIT_TABLE
, s
)
3769 def parse_resolution(s
):
3773 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
3776 'width': int(mobj
.group('w')),
3777 'height': int(mobj
.group('h')),
3780 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
3782 return {'height': int(mobj.group(1))}
3784 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
3786 return {'height': int(mobj.group(1)) * 540}
3791 def parse_bitrate(s
):
3792 if not isinstance(s
, compat_str
):
3794 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
3796 return int(mobj
.group(1))
3799 def month_by_name(name
, lang
='en'):
3800 """ Return the number of a month by (locale-independently) English name """
3802 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
3805 return month_names
.index(name
) + 1
3810 def month_by_abbreviation(abbrev
):
3811 """ Return the number of a month by (locale-independently) English
3815 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
3820 def fix_xml_ampersands(xml_str
):
3821 """Replace all the '&' by '&' in XML"""
3823 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3828 def setproctitle(title
):
3829 assert isinstance(title
, compat_str
)
3831 # ctypes in Jython is not complete
3832 # http://bugs.jython.org/issue2148
3833 if sys
.platform
.startswith('java'):
3837 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
3841 # LoadLibrary in Windows Python 2.7.13 only expects
3842 # a bytestring, but since unicode_literals turns
3843 # every string into a unicode string, it fails.
3845 title_bytes
= title
.encode('utf-8')
3846 buf
= ctypes
.create_string_buffer(len(title_bytes
))
3847 buf
.value
= title_bytes
3849 libc
.prctl(15, buf
, 0, 0, 0)
3850 except AttributeError:
3851 return # Strange libc, just skip this
3854 def remove_start(s
, start
):
3855 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
3858 def remove_end(s
, end
):
3859 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
3862 def remove_quotes(s
):
3863 if s
is None or len(s
) < 2:
3865 for quote
in ('"', "'", ):
3866 if s
[0] == quote
and s
[-1] == quote
:
3871 def get_domain(url
):
3872 domain
= re
.match(r
'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url
)
3873 return domain
.group('domain') if domain
else None
3876 def url_basename(url
):
3877 path
= compat_urlparse
.urlparse(url
).path
3878 return path
.strip('/').split('/')[-1]
3882 return re
.match(r
'https?://[^?#&]+/', url
).group()
3885 def urljoin(base
, path
):
3886 if isinstance(path
, bytes):
3887 path
= path
.decode('utf-8')
3888 if not isinstance(path
, compat_str
) or not path
:
3890 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
3892 if isinstance(base
, bytes):
3893 base
= base
.decode('utf-8')
3894 if not isinstance(base
, compat_str
) or not re
.match(
3895 r
'^(?:https?:)?//', base
):
3897 return compat_urlparse
.urljoin(base
, path
)
3900 class HEADRequest(compat_urllib_request
.Request
):
3901 def get_method(self
):
3905 class PUTRequest(compat_urllib_request
.Request
):
3906 def get_method(self
):
3910 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
3913 v
= getattr(v
, get_attr
, None)
3919 return int(v
) * invscale
// scale
3920 except (ValueError, TypeError, OverflowError):
3924 def str_or_none(v
, default
=None):
3925 return default
if v
is None else compat_str(v
)
3928 def str_to_int(int_str
):
3929 """ A more relaxed version of int_or_none """
3930 if isinstance(int_str
, compat_integer_types
):
3932 elif isinstance(int_str
, compat_str
):
3933 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
3934 return int_or_none(int_str
)
3937 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
3941 return float(v
) * invscale
/ scale
3942 except (ValueError, TypeError):
3946 def bool_or_none(v
, default
=None):
3947 return v
if isinstance(v
, bool) else default
3950 def strip_or_none(v
, default
=None):
3951 return v
.strip() if isinstance(v
, compat_str
) else default
3954 def url_or_none(url
):
3955 if not url
or not isinstance(url
, compat_str
):
3958 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
3961 def strftime_or_none(timestamp
, date_format
, default
=None):
3962 datetime_object
= None
3964 if isinstance(timestamp
, compat_numeric_types
): # unix timestamp
3965 datetime_object
= datetime
.datetime
.utcfromtimestamp(timestamp
)
3966 elif isinstance(timestamp
, compat_str
): # assume YYYYMMDD
3967 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
3968 return datetime_object
.strftime(date_format
)
3969 except (ValueError, TypeError, AttributeError):
3973 def parse_duration(s
):
3974 if not isinstance(s
, compat_basestring
):
3980 days
, hours
, mins
, secs
, ms
= [None] * 5
3981 m
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
)
3983 days
, hours
, mins
, secs
, ms
= m
.groups()
3988 [0-9]+\s*y(?:ears?)?\s*
3991 [0-9]+\s*m(?:onths?)?\s*
3994 [0-9]+\s*w(?:eeks?)?\s*
3997 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
4001 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
4004 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
4007 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
4010 days
, hours
, mins
, secs
, ms
= m
.groups()
4012 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
4014 hours
, mins
= m
.groups()
4020 duration
+= float(secs
)
4022 duration
+= float(mins
) * 60
4024 duration
+= float(hours
) * 60 * 60
4026 duration
+= float(days
) * 24 * 60 * 60
4028 duration
+= float(ms
)
4032 def prepend_extension(filename
, ext
, expected_real_ext
=None):
4033 name
, real_ext
= os
.path
.splitext(filename
)
4035 '{0}.{1}{2}'.format(name
, ext
, real_ext
)
4036 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
4037 else '{0}.{1}'.format(filename
, ext
))
4040 def replace_extension(filename
, ext
, expected_real_ext
=None):
4041 name
, real_ext
= os
.path
.splitext(filename
)
4042 return '{0}.{1}'.format(
4043 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
4047 def check_executable(exe
, args
=[]):
4048 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4049 args can be a list of arguments for a short output (like -version) """
4051 Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate_or_kill()
4057 def _get_exe_version_output(exe
, args
):
4059 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4060 # SIGTTOU if yt-dlp is run in the background.
4061 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4063 [encodeArgument(exe
)] + args
, stdin
=subprocess
.PIPE
,
4064 stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate_or_kill()
4067 if isinstance(out
, bytes): # Python 2.x
4068 out
= out
.decode('ascii', 'ignore')
4072 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
4073 assert isinstance(output
, compat_str
)
4074 if version_re
is None:
4075 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
4076 m
= re
.search(version_re
, output
)
4083 def get_exe_version(exe
, args
=['--version'],
4084 version_re
=None, unrecognized
='present'):
4085 """ Returns the version of the specified executable,
4086 or False if the executable is not present """
4087 out
= _get_exe_version_output(exe
, args
)
4088 return detect_exe_version(out
, version_re
, unrecognized
) if out
else False
4091 class LazyList(collections
.abc
.Sequence
):
4092 ''' Lazy immutable list from an iterable
4093 Note that slices of a LazyList are lists and not LazyList'''
4095 class IndexError(IndexError):
4098 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
4099 self
.__iterable
= iter(iterable
)
4100 self
.__cache
= [] if _cache
is None else _cache
4101 self
.__reversed
= reverse
4105 # We need to consume the entire iterable to iterate in reverse
4106 yield from self
.exhaust()
4108 yield from self
.__cache
4109 for item
in self
.__iterable
:
4110 self
.__cache
.append(item
)
4113 def __exhaust(self
):
4114 self
.__cache
.extend(self
.__iterable
)
4115 # Discard the emptied iterable to make it pickle-able
4116 self
.__iterable
= []
4120 ''' Evaluate the entire iterable '''
4121 return self
.__exhaust
()[::-1 if self
.__reversed
else 1]
4124 def __reverse_index(x
):
4125 return None if x
is None else -(x
+ 1)
4127 def __getitem__(self
, idx
):
4128 if isinstance(idx
, slice):
4130 idx
= slice(self
.__reverse
_index
(idx
.start
), self
.__reverse
_index
(idx
.stop
), -(idx
.step
or 1))
4131 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
4132 elif isinstance(idx
, int):
4134 idx
= self
.__reverse
_index
(idx
)
4135 start
, stop
, step
= idx
, idx
, 0
4137 raise TypeError('indices must be integers or slices')
4138 if ((start
or 0) < 0 or (stop
or 0) < 0
4139 or (start
is None and step
< 0)
4140 or (stop
is None and step
> 0)):
4141 # We need to consume the entire iterable to be able to slice from the end
4142 # Obviously, never use this with infinite iterables
4145 return self
.__cache
[idx
]
4146 except IndexError as e
:
4147 raise self
.IndexError(e
) from e
4148 n
= max(start
or 0, stop
or 0) - len(self
.__cache
) + 1
4150 self
.__cache
.extend(itertools
.islice(self
.__iterable
, n
))
4152 return self
.__cache
[idx
]
4153 except IndexError as e
:
4154 raise self
.IndexError(e
) from e
4158 self
[-1] if self
.__reversed
else self
[0]
4159 except self
.IndexError:
4165 return len(self
.__cache
)
4167 def __reversed__(self
):
4168 return type(self
)(self
.__iterable
, reverse
=not self
.__reversed
, _cache
=self
.__cache
)
4171 return type(self
)(self
.__iterable
, reverse
=self
.__reversed
, _cache
=self
.__cache
)
4173 def __deepcopy__(self
, memo
):
4174 # FIXME: This is actually just a shallow copy
4176 memo
[id_
] = self
.__copy
__()
4180 # repr and str should mimic a list. So we exhaust the iterable
4181 return repr(self
.exhaust())
4184 return repr(self
.exhaust())
4189 class IndexError(IndexError):
4193 # This is only useful for tests
4194 return len(self
.getslice())
4196 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
4197 self
._pagefunc
= pagefunc
4198 self
._pagesize
= pagesize
4199 self
._use
_cache
= use_cache
4202 def getpage(self
, pagenum
):
4203 page_results
= self
._cache
.get(pagenum
)
4204 if page_results
is None:
4205 page_results
= list(self
._pagefunc
(pagenum
))
4207 self
._cache
[pagenum
] = page_results
4210 def getslice(self
, start
=0, end
=None):
4211 return list(self
._getslice
(start
, end
))
4213 def _getslice(self
, start
, end
):
4214 raise NotImplementedError('This method must be implemented by subclasses')
4216 def __getitem__(self
, idx
):
4217 # NOTE: cache must be enabled if this is used
4218 if not isinstance(idx
, int) or idx
< 0:
4219 raise TypeError('indices must be non-negative integers')
4220 entries
= self
.getslice(idx
, idx
+ 1)
4222 raise self
.IndexError()
4226 class OnDemandPagedList(PagedList
):
4227 def _getslice(self
, start
, end
):
4228 for pagenum
in itertools
.count(start
// self
._pagesize
):
4229 firstid
= pagenum
* self
._pagesize
4230 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
4231 if start
>= nextfirstid
:
4235 start
% self
._pagesize
4236 if firstid
<= start
< nextfirstid
4239 ((end
- 1) % self
._pagesize
) + 1
4240 if (end
is not None and firstid
<= end
<= nextfirstid
)
4243 page_results
= self
.getpage(pagenum
)
4244 if startv
!= 0 or endv
is not None:
4245 page_results
= page_results
[startv
:endv
]
4246 yield from page_results
4248 # A little optimization - if current page is not "full", ie. does
4249 # not contain page_size videos then we can assume that this page
4250 # is the last one - there are no more ids on further pages -
4251 # i.e. no need to query again.
4252 if len(page_results
) + startv
< self
._pagesize
:
4255 # If we got the whole page, but the next page is not interesting,
4256 # break out early as well
4257 if end
== nextfirstid
:
4261 class InAdvancePagedList(PagedList
):
4262 def __init__(self
, pagefunc
, pagecount
, pagesize
):
4263 self
._pagecount
= pagecount
4264 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
4266 def _getslice(self
, start
, end
):
4267 start_page
= start
// self
._pagesize
4269 self
._pagecount
if end
is None else (end
// self
._pagesize
+ 1))
4270 skip_elems
= start
- start_page
* self
._pagesize
4271 only_more
= None if end
is None else end
- start
4272 for pagenum
in range(start_page
, end_page
):
4273 page_results
= self
.getpage(pagenum
)
4275 page_results
= page_results
[skip_elems
:]
4277 if only_more
is not None:
4278 if len(page_results
) < only_more
:
4279 only_more
-= len(page_results
)
4281 yield from page_results
[:only_more
]
4283 yield from page_results
4286 def uppercase_escape(s
):
4287 unicode_escape
= codecs
.getdecoder('unicode_escape')
4289 r
'\\U[0-9a-fA-F]{8}',
4290 lambda m
: unicode_escape(m
.group(0))[0],
4294 def lowercase_escape(s
):
4295 unicode_escape
= codecs
.getdecoder('unicode_escape')
4297 r
'\\u[0-9a-fA-F]{4}',
4298 lambda m
: unicode_escape(m
.group(0))[0],
4302 def escape_rfc3986(s
):
4303 """Escape non-ASCII characters as suggested by RFC 3986"""
4304 if sys
.version_info
< (3, 0) and isinstance(s
, compat_str
):
4305 s
= s
.encode('utf-8')
4306 return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
4309 def escape_url(url
):
4310 """Escape URL as suggested by RFC 3986"""
4311 url_parsed
= compat_urllib_parse_urlparse(url
)
4312 return url_parsed
._replace
(
4313 netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'),
4314 path
=escape_rfc3986(url_parsed
.path
),
4315 params
=escape_rfc3986(url_parsed
.params
),
4316 query
=escape_rfc3986(url_parsed
.query
),
4317 fragment
=escape_rfc3986(url_parsed
.fragment
)
4322 return compat_parse_qs(compat_urllib_parse_urlparse(url
).query
)
4325 def read_batch_urls(batch_fd
):
4327 if not isinstance(url
, compat_str
):
4328 url
= url
.decode('utf-8', 'replace')
4329 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
4330 for bom
in BOM_UTF8
:
4331 if url
.startswith(bom
):
4332 url
= url
[len(bom
):]
4334 if not url
or url
.startswith(('#', ';', ']')):
4336 # "#" cannot be stripped out since it is part of the URI
4337 # However, it can be safely stipped out if follwing a whitespace
4338 return re
.split(r
'\s#', url
, 1)[0].rstrip()
4340 with contextlib
.closing(batch_fd
) as fd
:
4341 return [url
for url
in map(fixup
, fd
) if url
]
4344 def urlencode_postdata(*args
, **kargs
):
4345 return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii')
4348 def update_url_query(url
, query
):
4351 parsed_url
= compat_urlparse
.urlparse(url
)
4352 qs
= compat_parse_qs(parsed_url
.query
)
4354 return compat_urlparse
.urlunparse(parsed_url
._replace
(
4355 query
=compat_urllib_parse_urlencode(qs
, True)))
4358 def update_Request(req
, url
=None, data
=None, headers
={}, query={}
):
4359 req_headers
= req
.headers
.copy()
4360 req_headers
.update(headers
)
4361 req_data
= data
or req
.data
4362 req_url
= update_url_query(url
or req
.get_full_url(), query
)
4363 req_get_method
= req
.get_method()
4364 if req_get_method
== 'HEAD':
4365 req_type
= HEADRequest
4366 elif req_get_method
== 'PUT':
4367 req_type
= PUTRequest
4369 req_type
= compat_urllib_request
.Request
4371 req_url
, data
=req_data
, headers
=req_headers
,
4372 origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
)
4373 if hasattr(req
, 'timeout'):
4374 new_req
.timeout
= req
.timeout
4378 def _multipart_encode_impl(data
, boundary
):
4379 content_type
= 'multipart/form-data; boundary=%s' % boundary
4382 for k
, v
in data
.items():
4383 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
4384 if isinstance(k
, compat_str
):
4385 k
= k
.encode('utf-8')
4386 if isinstance(v
, compat_str
):
4387 v
= v
.encode('utf-8')
4388 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4389 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4390 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
4391 if boundary
.encode('ascii') in content
:
4392 raise ValueError('Boundary overlaps with data')
4395 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
4397 return out
, content_type
4400 def multipart_encode(data
, boundary
=None):
4402 Encode a dict to RFC 7578-compliant form-data
4405 A dict where keys and values can be either Unicode or bytes-like
4408 If specified a Unicode object, it's used as the boundary. Otherwise
4409 a random boundary is generated.
4411 Reference: https://tools.ietf.org/html/rfc7578
4413 has_specified_boundary
= boundary
is not None
4416 if boundary
is None:
4417 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
4420 out
, content_type
= _multipart_encode_impl(data
, boundary
)
4423 if has_specified_boundary
:
4427 return out
, content_type
4430 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True):
4431 if isinstance(key_or_keys
, (list, tuple)):
4432 for key
in key_or_keys
:
4433 if key
not in d
or d
[key
] is None or skip_false_values
and not d
[key
]:
4437 return d
.get(key_or_keys
, default
)
4440 def try_get(src
, getter
, expected_type
=None):
4441 for get
in variadic(getter
):
4444 except (AttributeError, KeyError, TypeError, IndexError):
4447 if expected_type
is None or isinstance(v
, expected_type
):
4451 def merge_dicts(*dicts
):
4453 for a_dict
in dicts
:
4454 for k
, v
in a_dict
.items():
4458 or (isinstance(v
, compat_str
) and v
4459 and isinstance(merged
[k
], compat_str
)
4460 and not merged
[k
])):
4465 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
4466 return string
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
)
4478 TV_PARENTAL_GUIDELINES
= {
4488 def parse_age_limit(s
):
4490 return s
if 0 <= s
<= 21 else None
4491 if not isinstance(s
, compat_basestring
):
4493 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
4495 return int(m
.group('age'))
4498 return US_RATINGS
[s
]
4499 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
4501 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
4505 def strip_jsonp(code
):
4508 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4509 (?:\s*&&\s*(?P=func_name))?
4510 \s*\(\s*(?P<callback_data>.*)\);?
4511 \s*?(?://[^\n]*)*$''',
4512 r
'\g<callback_data>', code
)
4515 def js_to_json(code
, vars={}):
4516 # vars is a dict of var, val pairs to substitute
4517 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4518 SKIP_RE
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
)
4520 (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16),
4521 (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8),
4526 if v
in ('true', 'false', 'null'):
4528 elif v
in ('undefined', 'void 0'):
4530 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
4533 if v
[0] in ("'", '"'):
4534 v
= re
.sub(r
'(?s)\\.|"', lambda m
: {
4539 }.get(m
.group(0), m
.group(0)), v
[1:-1])
4541 for regex
, base
in INTEGER_TABLE
:
4542 im
= re
.match(regex
, v
)
4544 i
= int(im
.group(1), base
)
4545 return '"%d":' % i
if v
.endswith(':') else '%d' % i
4552 return re
.sub(r
'''(?sx)
4553 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4554 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4555 {comment}|,(?={skip}[\]}}])|
4556 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4557 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4560 '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
)
4563 def qualities(quality_ids
):
4564 """ Get a numeric quality value out of a list of possible values """
4567 return quality_ids
.index(qid
)
4574 'default': '%(title)s [%(id)s].%(ext)s',
4575 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4581 'description': 'description',
4582 'annotation': 'annotations.xml',
4583 'infojson': 'info.json',
4585 'pl_thumbnail': None,
4586 'pl_description': 'description',
4587 'pl_infojson': 'info.json',
4590 # As of [1] format syntax is:
4591 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4592 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4593 STR_FORMAT_RE_TMPL
= r
'''(?x)
4594 (?<!%)(?P<prefix>(?:%%)*)
4596 (?P<has_key>\((?P<key>{0})\))?
4598 (?P<conversion>[#0\-+ ]+)?
4600 (?P<precision>\.\d+)?
4601 (?P<len_mod>[hlL])? # unused in python
4602 {1} # conversion type
4607 STR_FORMAT_TYPES
= 'diouxXeEfFgGcrs'
4610 def limit_length(s
, length
):
4611 """ Add ellipses to overly long strings """
4616 return s
[:length
- len(ELLIPSES
)] + ELLIPSES
4620 def version_tuple(v
):
4621 return tuple(int(e
) for e
in re
.split(r
'[-.]', v
))
4624 def is_outdated_version(version
, limit
, assume_new
=True):
4626 return not assume_new
4628 return version_tuple(version
) < version_tuple(limit
)
4630 return not assume_new
4633 def ytdl_is_updateable():
4634 """ Returns if yt-dlp can be updated with -U """
4636 from .update
import is_non_updateable
4638 return not is_non_updateable()
4641 def args_to_str(args
):
4642 # Get a short string representation for a subprocess command
4643 return ' '.join(compat_shlex_quote(a
) for a
in args
)
4646 def error_to_compat_str(err
):
4648 # On python 2 error byte string must be decoded with proper
4649 # encoding rather than ascii
4650 if sys
.version_info
[0] < 3:
4651 err_str
= err_str
.decode(preferredencoding())
4655 def mimetype2ext(mt
):
4659 mt
, _
, params
= mt
.partition(';')
4664 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4665 # it's the most popular one
4666 'audio/mpeg': 'mp3',
4667 'audio/x-wav': 'wav',
4669 'audio/wave': 'wav',
4672 ext
= FULL_MAP
.get(mt
)
4678 'smptett+xml': 'tt',
4682 'x-mp4-fragmented': 'mp4',
4683 'x-ms-sami': 'sami',
4686 'x-mpegurl': 'm3u8',
4687 'vnd.apple.mpegurl': 'm3u8',
4691 'vnd.ms-sstr+xml': 'ism',
4695 'filmstrip+json': 'fs',
4699 _
, _
, subtype
= mt
.rpartition('/')
4700 ext
= SUBTYPE_MAP
.get(subtype
.lower())
4711 _
, _
, suffix
= subtype
.partition('+')
4712 ext
= SUFFIX_MAP
.get(suffix
)
4716 return subtype
.replace('+', '.')
4719 def ext2mimetype(ext_or_url
):
4722 if '.' not in ext_or_url
:
4723 ext_or_url
= f
'file.{ext_or_url}'
4724 return mimetypes
.guess_type(ext_or_url
)[0]
4727 def parse_codecs(codecs_str
):
4728 # http://tools.ietf.org/html/rfc6381
4731 split_codecs
= list(filter(None, map(
4732 str.strip
, codecs_str
.strip().strip(',').split(','))))
4733 vcodec
, acodec
, hdr
= None, None, None
4734 for full_codec
in split_codecs
:
4735 parts
= full_codec
.split('.')
4736 codec
= parts
[0].replace('0', '')
4737 if codec
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4738 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4740 vcodec
= '.'.join(parts
[:4]) if codec
in ('vp9', 'av1') else full_codec
4741 if codec
in ('dvh1', 'dvhe'):
4743 elif codec
== 'av1' and len(parts
) > 3 and parts
[3] == '10':
4745 elif full_codec
.replace('0', '').startswith('vp9.2'):
4747 elif codec
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4751 write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
)
4752 if not vcodec
and not acodec
:
4753 if len(split_codecs
) == 2:
4755 'vcodec': split_codecs
[0],
4756 'acodec': split_codecs
[1],
4760 'vcodec': vcodec
or 'none',
4761 'acodec': acodec
or 'none',
4762 'dynamic_range': hdr
,
4767 def urlhandle_detect_ext(url_handle
):
4768 getheader
= url_handle
.headers
.get
4770 cd
= getheader('Content-Disposition')
4772 m
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
)
4774 e
= determine_ext(m
.group('filename'), default_ext
=None)
4778 return mimetype2ext(getheader('Content-Type'))
4781 def encode_data_uri(data
, mime_type
):
4782 return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii'))
4785 def age_restricted(content_limit
, age_limit
):
4786 """ Returns True iff the content should be blocked """
4788 if age_limit
is None: # No limit set
4790 if content_limit
is None:
4791 return False # Content available for everyone
4792 return age_limit
< content_limit
4795 def is_html(first_bytes
):
4796 """ Detect whether a file contains HTML by examining its first bytes. """
4799 (b
'\xef\xbb\xbf', 'utf-8'),
4800 (b
'\x00\x00\xfe\xff', 'utf-32-be'),
4801 (b
'\xff\xfe\x00\x00', 'utf-32-le'),
4802 (b
'\xff\xfe', 'utf-16-le'),
4803 (b
'\xfe\xff', 'utf-16-be'),
4805 for bom
, enc
in BOMS
:
4806 if first_bytes
.startswith(bom
):
4807 s
= first_bytes
[len(bom
):].decode(enc
, 'replace')
4810 s
= first_bytes
.decode('utf-8', 'replace')
4812 return re
.match(r
'^\s*<', s
)
4815 def determine_protocol(info_dict
):
4816 protocol
= info_dict
.get('protocol')
4817 if protocol
is not None:
4820 url
= sanitize_url(info_dict
['url'])
4821 if url
.startswith('rtmp'):
4823 elif url
.startswith('mms'):
4825 elif url
.startswith('rtsp'):
4828 ext
= determine_ext(url
)
4834 return compat_urllib_parse_urlparse(url
).scheme
4837 def render_table(header_row
, data
, delim
=False, extra_gap
=0, hide_empty
=False):
4838 """ Render a list of rows, each as a list of values.
4839 Text after a \t will be right aligned """
4841 return len(remove_terminal_sequences(string
).replace('\t', ''))
4843 def get_max_lens(table
):
4844 return [max(width(str(v
)) for v
in col
) for col
in zip(*table
)]
4846 def filter_using_list(row
, filterArray
):
4847 return [col
for (take
, col
) in zip(filterArray
, row
) if take
]
4850 max_lens
= get_max_lens(data
)
4851 header_row
= filter_using_list(header_row
, max_lens
)
4852 data
= [filter_using_list(row
, max_lens
) for row
in data
]
4854 table
= [header_row
] + data
4855 max_lens
= get_max_lens(table
)
4858 table
= [header_row
, [delim
* (ml
+ extra_gap
) for ml
in max_lens
]] + data
4859 table
[1][-1] = table
[1][-1][:-extra_gap
] # Remove extra_gap from end of delimiter
4861 for pos
, text
in enumerate(map(str, row
)):
4863 row
[pos
] = text
.replace('\t', ' ' * (max_lens
[pos
] - width(text
))) + ' ' * extra_gap
4865 row
[pos
] = text
+ ' ' * (max_lens
[pos
] - width(text
) + extra_gap
)
4866 ret
= '\n'.join(''.join(row
).rstrip() for row
in table
)
4870 def _match_one(filter_part
, dct
, incomplete
):
4871 # TODO: Generalize code with YoutubeDL._build_format_filter
4872 STRING_OPERATORS
= {
4873 '*=': operator
.contains
,
4874 '^=': lambda attr
, value
: attr
.startswith(value
),
4875 '$=': lambda attr
, value
: attr
.endswith(value
),
4876 '~=': lambda attr
, value
: re
.search(value
, attr
),
4878 COMPARISON_OPERATORS
= {
4880 '<=': operator
.le
, # "<=" must be defined above "<"
4887 operator_rex
= re
.compile(r
'''(?x)\s*
4889 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4891 (?P<quote>["\'])(?P
<quotedstrval
>.+?
)(?P
=quote
)|
4895 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4896 m = operator_rex.search(filter_part)
4899 unnegated_op = COMPARISON_OPERATORS[m['op']]
4901 op = lambda attr, value: not unnegated_op(attr, value)
4904 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4906 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4907 actual_value = dct.get(m['key'])
4908 numeric_comparison = None
4909 if isinstance(actual_value, compat_numeric_types):
4910 # If the original field is a string and matching comparisonvalue is
4911 # a number we should respect the origin of the original field
4912 # and process comparison value as a string (see
4913 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4915 numeric_comparison = int(comparison_value)
4917 numeric_comparison = parse_filesize(comparison_value)
4918 if numeric_comparison is None:
4919 numeric_comparison = parse_filesize(f'{comparison_value}B')
4920 if numeric_comparison is None:
4921 numeric_comparison = parse_duration(comparison_value)
4922 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4923 raise ValueError('Operator %s only supports string values!' % m['op'])
4924 if actual_value is None:
4925 return incomplete or m['none_inclusive']
4926 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4929 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4930 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4932 operator_rex = re.compile(r'''(?x
)\s
*
4933 (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+)
4935 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4936 m = operator_rex.search(filter_part)
4938 op = UNARY_OPERATORS[m.group('op')]
4939 actual_value = dct.get(m.group('key'))
4940 if incomplete and actual_value is None:
4942 return op(actual_value)
4944 raise ValueError('Invalid filter part %r' % filter_part)
4947 def match_str(filter_str, dct, incomplete=False):
4948 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4949 When incomplete, all conditions passes on missing fields
4952 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4953 for filter_part in re.split(r'(?<!\\)&', filter_str))
4956 def match_filter_func(filter_str):
4957 def _match_func(info_dict, *args, **kwargs):
4958 if match_str(filter_str, info_dict, *args, **kwargs):
4961 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4962 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4966 def parse_dfxp_time_expr(time_expr):
4970 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4972 return float(mobj.group('time_offset'))
4974 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4976 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4979 def srt_subtitles_timecode(seconds):
4980 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4983 def ass_subtitles_timecode(seconds):
4984 time = timetuple_from_msec(seconds * 1000)
4985 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4988 def dfxp2srt(dfxp_data):
4990 @param dfxp_data A
bytes-like
object containing DFXP data
4991 @returns A
unicode object containing converted SRT data
4993 LEGACY_NAMESPACES = (
4994 (b'http://www.w3.org/ns/ttml', [
4995 b'http://www.w3.org/2004/11/ttaf1',
4996 b'http://www.w3.org/2006/04/ttaf1',
4997 b'http://www.w3.org/2006/10/ttaf1',
4999 (b'http://www.w3.org/ns/ttml#styling', [
5000 b'http://www.w3.org/ns/ttml#style',
5004 SUPPORTED_STYLING = [
5013 _x = functools.partial(xpath_with_ns, ns_map={
5014 'xml': 'http://www.w3.org/XML/1998/namespace',
5015 'ttml': 'http://www.w3.org/ns/ttml',
5016 'tts': 'http://www.w3.org/ns/ttml#styling',
5022 class TTMLPElementParser(object):
5024 _unclosed_elements = []
5025 _applied_styles = []
5027 def start(self, tag, attrib):
5028 if tag in (_x('ttml:br'), 'br'):
5031 unclosed_elements = []
5033 element_style_id = attrib.get('style')
5035 style.update(default_style)
5036 if element_style_id:
5037 style.update(styles.get(element_style_id, {}))
5038 for prop in SUPPORTED_STYLING:
5039 prop_val = attrib.get(_x('tts:' + prop))
5041 style[prop] = prop_val
5044 for k, v in sorted(style.items()):
5045 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5048 font += ' color="%s"' % v
5049 elif k == 'fontSize':
5050 font += ' size="%s"' % v
5051 elif k == 'fontFamily':
5052 font += ' face="%s"' % v
5053 elif k == 'fontWeight' and v == 'bold':
5055 unclosed_elements.append('b')
5056 elif k == 'fontStyle' and v == 'italic':
5058 unclosed_elements.append('i')
5059 elif k == 'textDecoration' and v == 'underline':
5061 unclosed_elements.append('u')
5063 self._out += '<font' + font + '>'
5064 unclosed_elements.append('font')
5066 if self._applied_styles:
5067 applied_style.update(self._applied_styles[-1])
5068 applied_style.update(style)
5069 self._applied_styles.append(applied_style)
5070 self._unclosed_elements.append(unclosed_elements)
5073 if tag not in (_x('ttml:br'), 'br'):
5074 unclosed_elements = self._unclosed_elements.pop()
5075 for element in reversed(unclosed_elements):
5076 self._out += '</%s>' % element
5077 if unclosed_elements and self._applied_styles:
5078 self._applied_styles.pop()
5080 def data(self, data):
5084 return self._out.strip()
5086 def parse_node(node):
5087 target = TTMLPElementParser()
5088 parser = xml.etree.ElementTree.XMLParser(target=target)
5089 parser.feed(xml.etree.ElementTree.tostring(node))
5090 return parser.close()
5092 for k, v in LEGACY_NAMESPACES:
5094 dfxp_data = dfxp_data.replace(ns, k)
5096 dfxp = compat_etree_fromstring(dfxp_data)
5098 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5101 raise ValueError('Invalid dfxp/TTML subtitle')
5105 for style in dfxp.findall(_x('.//ttml:style')):
5106 style_id = style.get('id') or style.get(_x('xml:id'))
5109 parent_style_id = style.get('style')
5111 if parent_style_id not in styles:
5114 styles[style_id] = styles[parent_style_id].copy()
5115 for prop in SUPPORTED_STYLING:
5116 prop_val = style.get(_x('tts:' + prop))
5118 styles.setdefault(style_id, {})[prop] = prop_val
5124 for p in ('body', 'div'):
5125 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5128 style = styles.get(ele.get('style'))
5131 default_style.update(style)
5133 for para, index in zip(paras, itertools.count(1)):
5134 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5135 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5136 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5137 if begin_time is None:
5142 end_time = begin_time + dur
5143 out.append('%d\n%s --> %s\n%s\n\n' % (
5145 srt_subtitles_timecode(begin_time),
5146 srt_subtitles_timecode(end_time),
5152 def cli_option(params, command_option, param):
5153 param = params.get(param)
5155 param = compat_str(param)
5156 return [command_option, param] if param is not None else []
5159 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5160 param = params.get(param)
5163 assert isinstance(param, bool)
5165 return [command_option + separator + (true_value if param else false_value)]
5166 return [command_option, true_value if param else false_value]
5169 def cli_valueless_option(params, command_option, param, expected_value=True):
5170 param = params.get(param)
5171 return [command_option] if param == expected_value else []
5174 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5175 if isinstance(argdict, (list, tuple)): # for backward compatibility
5182 assert isinstance(argdict, dict)
5184 assert isinstance(keys, (list, tuple))
5185 for key_list in keys:
5186 arg_list = list(filter(
5187 lambda x: x is not None,
5188 [argdict.get(key.lower()) for key in variadic(key_list)]))
5190 return [arg for args in arg_list for arg in args]
5194 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5195 main_key, exe = main_key.lower(), exe.lower()
5196 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5197 keys = [f'{root_key}{k}' for k in (keys or [''])]
5198 if root_key in keys:
5200 keys.append((main_key, exe))
5201 keys.append('default')
5204 return cli_configuration_args(argdict, keys, default, use_compat)
5207 class ISO639Utils(object):
5208 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5267 'iw': 'heb', # Replaced by he in 1989 revision
5277 'in': 'ind', # Replaced by id in 1989 revision
5392 'ji': 'yid', # Replaced by yi in 1989 revision
5400 def short2long(cls, code):
5401 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5402 return cls._lang_map.get(code[:2])
5405 def long2short(cls, code):
5406 """Convert language code from ISO 639-2/T to ISO 639-1"""
5407 for short_name, long_name in cls._lang_map.items():
5408 if long_name == code:
5412 class ISO3166Utils(object):
5413 # From http://data.okfn.org/data/core/country-list
5415 'AF': 'Afghanistan',
5416 'AX': 'Åland Islands',
5419 'AS': 'American Samoa',
5424 'AG': 'Antigua and Barbuda',
5441 'BO': 'Bolivia, Plurinational State of',
5442 'BQ': 'Bonaire, Sint Eustatius and Saba',
5443 'BA': 'Bosnia and Herzegovina',
5445 'BV': 'Bouvet Island',
5447 'IO': 'British Indian Ocean Territory',
5448 'BN': 'Brunei Darussalam',
5450 'BF': 'Burkina Faso',
5456 'KY': 'Cayman Islands',
5457 'CF': 'Central African Republic',
5461 'CX': 'Christmas Island',
5462 'CC': 'Cocos (Keeling) Islands',
5466 'CD': 'Congo, the Democratic Republic of the',
5467 'CK': 'Cook Islands',
5469 'CI': 'Côte d\'Ivoire',
5474 'CZ': 'Czech Republic',
5478 'DO': 'Dominican Republic',
5481 'SV': 'El Salvador',
5482 'GQ': 'Equatorial Guinea',
5486 'FK': 'Falkland Islands (Malvinas)',
5487 'FO': 'Faroe Islands',
5491 'GF': 'French Guiana',
5492 'PF': 'French Polynesia',
5493 'TF': 'French Southern Territories',
5508 'GW': 'Guinea-Bissau',
5511 'HM': 'Heard Island and McDonald Islands',
5512 'VA': 'Holy See (Vatican City State)',
5519 'IR': 'Iran, Islamic Republic of',
5522 'IM': 'Isle of Man',
5532 'KP': 'Korea, Democratic People\'s Republic of',
5533 'KR': 'Korea, Republic of',
5536 'LA': 'Lao People\'s Democratic Republic',
5542 'LI': 'Liechtenstein',
5546 'MK': 'Macedonia, the Former Yugoslav Republic of',
5553 'MH': 'Marshall Islands',
5559 'FM': 'Micronesia, Federated States of',
5560 'MD': 'Moldova, Republic of',
5571 'NL': 'Netherlands',
5572 'NC': 'New Caledonia',
5573 'NZ': 'New Zealand',
5578 'NF': 'Norfolk Island',
5579 'MP': 'Northern Mariana Islands',
5584 'PS': 'Palestine, State of',
5586 'PG': 'Papua New Guinea',
5589 'PH': 'Philippines',
5593 'PR': 'Puerto Rico',
5597 'RU': 'Russian Federation',
5599 'BL': 'Saint Barthélemy',
5600 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5601 'KN': 'Saint Kitts and Nevis',
5602 'LC': 'Saint Lucia',
5603 'MF': 'Saint Martin (French part)',
5604 'PM': 'Saint Pierre and Miquelon',
5605 'VC': 'Saint Vincent and the Grenadines',
5608 'ST': 'Sao Tome and Principe',
5609 'SA': 'Saudi Arabia',
5613 'SL': 'Sierra Leone',
5615 'SX': 'Sint Maarten (Dutch part)',
5618 'SB': 'Solomon Islands',
5620 'ZA': 'South Africa',
5621 'GS': 'South Georgia and the South Sandwich Islands',
5622 'SS': 'South Sudan',
5627 'SJ': 'Svalbard and Jan Mayen',
5630 'CH': 'Switzerland',
5631 'SY': 'Syrian Arab Republic',
5632 'TW': 'Taiwan, Province of China',
5634 'TZ': 'Tanzania, United Republic of',
5636 'TL': 'Timor-Leste',
5640 'TT': 'Trinidad and Tobago',
5643 'TM': 'Turkmenistan',
5644 'TC': 'Turks and Caicos Islands',
5648 'AE': 'United Arab Emirates',
5649 'GB': 'United Kingdom',
5650 'US': 'United States',
5651 'UM': 'United States Minor Outlying Islands',
5655 'VE': 'Venezuela, Bolivarian Republic of',
5657 'VG': 'Virgin Islands, British',
5658 'VI': 'Virgin Islands, U.S.',
5659 'WF': 'Wallis and Futuna',
5660 'EH': 'Western Sahara',
5667 def short2full(cls, code):
5668 """Convert an ISO 3166-2 country code to the corresponding full name"""
5669 return cls._country_map.get(code.upper())
5672 class GeoUtils(object):
5673 # Major IPv4 address blocks per country
5675 'AD': '46.172.224.0/19',
5676 'AE': '94.200.0.0/13',
5677 'AF': '149.54.0.0/17',
5678 'AG': '209.59.64.0/18',
5679 'AI': '204.14.248.0/21',
5680 'AL': '46.99.0.0/16',
5681 'AM': '46.70.0.0/15',
5682 'AO': '105.168.0.0/13',
5683 'AP': '182.50.184.0/21',
5684 'AQ': '23.154.160.0/24',
5685 'AR': '181.0.0.0/12',
5686 'AS': '202.70.112.0/20',
5687 'AT': '77.116.0.0/14',
5688 'AU': '1.128.0.0/11',
5689 'AW': '181.41.0.0/18',
5690 'AX': '185.217.4.0/22',
5691 'AZ': '5.197.0.0/16',
5692 'BA': '31.176.128.0/17',
5693 'BB': '65.48.128.0/17',
5694 'BD': '114.130.0.0/16',
5696 'BF': '102.178.0.0/15',
5697 'BG': '95.42.0.0/15',
5698 'BH': '37.131.0.0/17',
5699 'BI': '154.117.192.0/18',
5700 'BJ': '137.255.0.0/16',
5701 'BL': '185.212.72.0/23',
5702 'BM': '196.12.64.0/18',
5703 'BN': '156.31.0.0/16',
5704 'BO': '161.56.0.0/16',
5705 'BQ': '161.0.80.0/20',
5706 'BR': '191.128.0.0/12',
5707 'BS': '24.51.64.0/18',
5708 'BT': '119.2.96.0/19',
5709 'BW': '168.167.0.0/16',
5710 'BY': '178.120.0.0/13',
5711 'BZ': '179.42.192.0/18',
5712 'CA': '99.224.0.0/11',
5713 'CD': '41.243.0.0/16',
5714 'CF': '197.242.176.0/21',
5715 'CG': '160.113.0.0/16',
5716 'CH': '85.0.0.0/13',
5717 'CI': '102.136.0.0/14',
5718 'CK': '202.65.32.0/19',
5719 'CL': '152.172.0.0/14',
5720 'CM': '102.244.0.0/14',
5721 'CN': '36.128.0.0/10',
5722 'CO': '181.240.0.0/12',
5723 'CR': '201.192.0.0/12',
5724 'CU': '152.206.0.0/15',
5725 'CV': '165.90.96.0/19',
5726 'CW': '190.88.128.0/17',
5727 'CY': '31.153.0.0/16',
5728 'CZ': '88.100.0.0/14',
5730 'DJ': '197.241.0.0/17',
5731 'DK': '87.48.0.0/12',
5732 'DM': '192.243.48.0/20',
5733 'DO': '152.166.0.0/15',
5734 'DZ': '41.96.0.0/12',
5735 'EC': '186.68.0.0/15',
5736 'EE': '90.190.0.0/15',
5737 'EG': '156.160.0.0/11',
5738 'ER': '196.200.96.0/20',
5739 'ES': '88.0.0.0/11',
5740 'ET': '196.188.0.0/14',
5741 'EU': '2.16.0.0/13',
5742 'FI': '91.152.0.0/13',
5743 'FJ': '144.120.0.0/16',
5744 'FK': '80.73.208.0/21',
5745 'FM': '119.252.112.0/20',
5746 'FO': '88.85.32.0/19',
5748 'GA': '41.158.0.0/15',
5750 'GD': '74.122.88.0/21',
5751 'GE': '31.146.0.0/16',
5752 'GF': '161.22.64.0/18',
5753 'GG': '62.68.160.0/19',
5754 'GH': '154.160.0.0/12',
5755 'GI': '95.164.0.0/16',
5756 'GL': '88.83.0.0/19',
5757 'GM': '160.182.0.0/15',
5758 'GN': '197.149.192.0/18',
5759 'GP': '104.250.0.0/19',
5760 'GQ': '105.235.224.0/20',
5761 'GR': '94.64.0.0/13',
5762 'GT': '168.234.0.0/16',
5763 'GU': '168.123.0.0/16',
5764 'GW': '197.214.80.0/20',
5765 'GY': '181.41.64.0/18',
5766 'HK': '113.252.0.0/14',
5767 'HN': '181.210.0.0/16',
5768 'HR': '93.136.0.0/13',
5769 'HT': '148.102.128.0/17',
5770 'HU': '84.0.0.0/14',
5771 'ID': '39.192.0.0/10',
5772 'IE': '87.32.0.0/12',
5773 'IL': '79.176.0.0/13',
5774 'IM': '5.62.80.0/20',
5775 'IN': '117.192.0.0/10',
5776 'IO': '203.83.48.0/21',
5777 'IQ': '37.236.0.0/14',
5778 'IR': '2.176.0.0/12',
5779 'IS': '82.221.0.0/16',
5780 'IT': '79.0.0.0/10',
5781 'JE': '87.244.64.0/18',
5782 'JM': '72.27.0.0/17',
5783 'JO': '176.29.0.0/16',
5784 'JP': '133.0.0.0/8',
5785 'KE': '105.48.0.0/12',
5786 'KG': '158.181.128.0/17',
5787 'KH': '36.37.128.0/17',
5788 'KI': '103.25.140.0/22',
5789 'KM': '197.255.224.0/20',
5790 'KN': '198.167.192.0/19',
5791 'KP': '175.45.176.0/22',
5792 'KR': '175.192.0.0/10',
5793 'KW': '37.36.0.0/14',
5794 'KY': '64.96.0.0/15',
5795 'KZ': '2.72.0.0/13',
5796 'LA': '115.84.64.0/18',
5797 'LB': '178.135.0.0/16',
5798 'LC': '24.92.144.0/20',
5799 'LI': '82.117.0.0/19',
5800 'LK': '112.134.0.0/15',
5801 'LR': '102.183.0.0/16',
5802 'LS': '129.232.0.0/17',
5803 'LT': '78.56.0.0/13',
5804 'LU': '188.42.0.0/16',
5805 'LV': '46.109.0.0/16',
5806 'LY': '41.252.0.0/14',
5807 'MA': '105.128.0.0/11',
5808 'MC': '88.209.64.0/18',
5809 'MD': '37.246.0.0/16',
5810 'ME': '178.175.0.0/17',
5811 'MF': '74.112.232.0/21',
5812 'MG': '154.126.0.0/17',
5813 'MH': '117.103.88.0/21',
5814 'MK': '77.28.0.0/15',
5815 'ML': '154.118.128.0/18',
5816 'MM': '37.111.0.0/17',
5817 'MN': '49.0.128.0/17',
5818 'MO': '60.246.0.0/16',
5819 'MP': '202.88.64.0/20',
5820 'MQ': '109.203.224.0/19',
5821 'MR': '41.188.64.0/18',
5822 'MS': '208.90.112.0/22',
5823 'MT': '46.11.0.0/16',
5824 'MU': '105.16.0.0/12',
5825 'MV': '27.114.128.0/18',
5826 'MW': '102.70.0.0/15',
5827 'MX': '187.192.0.0/11',
5828 'MY': '175.136.0.0/13',
5829 'MZ': '197.218.0.0/15',
5830 'NA': '41.182.0.0/16',
5831 'NC': '101.101.0.0/18',
5832 'NE': '197.214.0.0/18',
5833 'NF': '203.17.240.0/22',
5834 'NG': '105.112.0.0/12',
5835 'NI': '186.76.0.0/15',
5836 'NL': '145.96.0.0/11',
5837 'NO': '84.208.0.0/13',
5838 'NP': '36.252.0.0/15',
5839 'NR': '203.98.224.0/19',
5840 'NU': '49.156.48.0/22',
5841 'NZ': '49.224.0.0/14',
5842 'OM': '5.36.0.0/15',
5843 'PA': '186.72.0.0/15',
5844 'PE': '186.160.0.0/14',
5845 'PF': '123.50.64.0/18',
5846 'PG': '124.240.192.0/19',
5847 'PH': '49.144.0.0/13',
5848 'PK': '39.32.0.0/11',
5849 'PL': '83.0.0.0/11',
5850 'PM': '70.36.0.0/20',
5851 'PR': '66.50.0.0/16',
5852 'PS': '188.161.0.0/16',
5853 'PT': '85.240.0.0/13',
5854 'PW': '202.124.224.0/20',
5855 'PY': '181.120.0.0/14',
5856 'QA': '37.210.0.0/15',
5857 'RE': '102.35.0.0/16',
5858 'RO': '79.112.0.0/13',
5859 'RS': '93.86.0.0/15',
5860 'RU': '5.136.0.0/13',
5861 'RW': '41.186.0.0/16',
5862 'SA': '188.48.0.0/13',
5863 'SB': '202.1.160.0/19',
5864 'SC': '154.192.0.0/11',
5865 'SD': '102.120.0.0/13',
5866 'SE': '78.64.0.0/12',
5867 'SG': '8.128.0.0/10',
5868 'SI': '188.196.0.0/14',
5869 'SK': '78.98.0.0/15',
5870 'SL': '102.143.0.0/17',
5871 'SM': '89.186.32.0/19',
5872 'SN': '41.82.0.0/15',
5873 'SO': '154.115.192.0/18',
5874 'SR': '186.179.128.0/17',
5875 'SS': '105.235.208.0/21',
5876 'ST': '197.159.160.0/19',
5877 'SV': '168.243.0.0/16',
5878 'SX': '190.102.0.0/20',
5880 'SZ': '41.84.224.0/19',
5881 'TC': '65.255.48.0/20',
5882 'TD': '154.68.128.0/19',
5883 'TG': '196.168.0.0/14',
5884 'TH': '171.96.0.0/13',
5885 'TJ': '85.9.128.0/18',
5886 'TK': '27.96.24.0/21',
5887 'TL': '180.189.160.0/20',
5888 'TM': '95.85.96.0/19',
5889 'TN': '197.0.0.0/11',
5890 'TO': '175.176.144.0/21',
5891 'TR': '78.160.0.0/11',
5892 'TT': '186.44.0.0/15',
5893 'TV': '202.2.96.0/19',
5894 'TW': '120.96.0.0/11',
5895 'TZ': '156.156.0.0/14',
5896 'UA': '37.52.0.0/14',
5897 'UG': '102.80.0.0/13',
5899 'UY': '167.56.0.0/13',
5900 'UZ': '84.54.64.0/18',
5901 'VA': '212.77.0.0/19',
5902 'VC': '207.191.240.0/21',
5903 'VE': '186.88.0.0/13',
5904 'VG': '66.81.192.0/20',
5905 'VI': '146.226.0.0/16',
5906 'VN': '14.160.0.0/11',
5907 'VU': '202.80.32.0/20',
5908 'WF': '117.20.32.0/21',
5909 'WS': '202.4.32.0/19',
5910 'YE': '134.35.0.0/16',
5911 'YT': '41.242.116.0/22',
5912 'ZA': '41.0.0.0/11',
5913 'ZM': '102.144.0.0/13',
5914 'ZW': '102.177.192.0/18',
5918 def random_ipv4(cls, code_or_block):
5919 if len(code_or_block) == 2:
5920 block = cls._country_ip_map.get(code_or_block.upper())
5924 block = code_or_block
5925 addr, preflen = block.split('/')
5926 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5927 addr_max = addr_min | (0xffffffff >> int(preflen))
5928 return compat_str(socket.inet_ntoa(
5929 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5932 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5933 def __init__(self, proxies=None):
5934 # Set default handlers
5935 for type in ('http', 'https'):
5936 setattr(self, '%s_open' % type,
5937 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5938 meth(r, proxy, type))
5939 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5941 def proxy_open(self, req, proxy, type):
5942 req_proxy = req.headers.get('Ytdl-request-proxy')
5943 if req_proxy is not None:
5945 del req.headers['Ytdl-request-proxy']
5947 if proxy == '__noproxy__':
5948 return None # No Proxy
5949 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5950 req.add_header('Ytdl-socks-proxy', proxy)
5951 # yt-dlp's http/https handlers do wrapping the socket with socks
5953 return compat_urllib_request.ProxyHandler.proxy_open(
5954 self, req, proxy, type)
5957 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5958 # released into Public Domain
5959 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5961 def long_to_bytes(n, blocksize=0):
5962 """long_to_bytes(n:long, blocksize:int) : string
5963 Convert a long integer to a byte string.
5965 If optional blocksize is given and greater than zero, pad the front of the
5966 byte string with binary zeros so that the length is a multiple of
5969 # after much testing, this algorithm was deemed to be the fastest
5973 s = compat_struct_pack('>I', n & 0xffffffff) + s
5975 # strip off leading zeros
5976 for i in range(len(s)):
5977 if s[i] != b'\000'[0]:
5980 # only happens when n == 0
5984 # add back some pad bytes. this could be done more efficiently w.r.t. the
5985 # de-padding being done above, but sigh...
5986 if blocksize > 0 and len(s) % blocksize:
5987 s = (blocksize - len(s) % blocksize) * b'\000' + s
5991 def bytes_to_long(s):
5992 """bytes_to_long(string) : long
5993 Convert a byte string to a long integer.
5995 This is (essentially) the inverse of long_to_bytes().
6000 extra = (4 - length % 4)
6001 s = b'\000' * extra + s
6002 length = length + extra
6003 for i in range(0, length, 4):
6004 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
6008 def ohdave_rsa_encrypt(data, exponent, modulus):
6010 Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/
6013 data: data to encrypt, bytes-like object
6014 exponent, modulus: parameter e and N of RSA algorithm, both integer
6015 Output: hex string of encrypted data
6017 Limitation: supports one block encryption only
6020 payload = int(binascii.hexlify(data[::-1]), 16)
6021 encrypted = pow(payload, exponent, modulus)
6022 return '%x' % encrypted
6025 def pkcs1pad(data, length):
6027 Padding input data with PKCS#1 scheme
6029 @param {int[]} data input data
6030 @param {int} length target length
6031 @returns {int[]} padded data
6033 if len(data) > length - 11:
6034 raise ValueError('Input data too
long for PKCS
#1 padding')
6036 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
6037 return [0, 2] + pseudo_random
+ [0] + data
6040 def encode_base_n(num
, n
, table
=None):
6041 FULL_TABLE
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
6043 table
= FULL_TABLE
[:n
]
6046 raise ValueError('base %d exceeds table length %d' % (n
, len(table
)))
6053 ret
= table
[num
% n
] + ret
6058 def decode_packed_codes(code
):
6059 mobj
= re
.search(PACKED_CODES_RE
, code
)
6060 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
6063 symbols
= symbols
.split('|')
6068 base_n_count
= encode_base_n(count
, base
)
6069 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
6072 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
6076 def caesar(s
, alphabet
, shift
):
6081 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
6086 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6089 def parse_m3u8_attributes(attrib
):
6091 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
6092 if val
.startswith('"'):
6098 def urshift(val
, n
):
6099 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
6102 # Based on png2str() written by @gdkchan and improved by @yokrysty
6103 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6104 def decode_png(png_data
):
6105 # Reference: https://www.w3.org/TR/PNG/
6106 header
= png_data
[8:]
6108 if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR':
6109 raise IOError('Not a valid PNG file.')
6111 int_map
= {1: '>B', 2: '>H', 4: '>I'}
6112 unpack_integer
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0]
6117 length
= unpack_integer(header
[:4])
6120 chunk_type
= header
[:4]
6123 chunk_data
= header
[:length
]
6124 header
= header
[length
:]
6126 header
= header
[4:] # Skip CRC
6134 ihdr
= chunks
[0]['data']
6136 width
= unpack_integer(ihdr
[:4])
6137 height
= unpack_integer(ihdr
[4:8])
6141 for chunk
in chunks
:
6142 if chunk
['type'] == b
'IDAT':
6143 idat
+= chunk
['data']
6146 raise IOError('Unable to read PNG data.')
6148 decompressed_data
= bytearray(zlib
.decompress(idat
))
6153 def _get_pixel(idx
):
6158 for y
in range(height
):
6159 basePos
= y
* (1 + stride
)
6160 filter_type
= decompressed_data
[basePos
]
6164 pixels
.append(current_row
)
6166 for x
in range(stride
):
6167 color
= decompressed_data
[1 + basePos
+ x
]
6168 basex
= y
* stride
+ x
6173 left
= _get_pixel(basex
- 3)
6175 up
= _get_pixel(basex
- stride
)
6177 if filter_type
== 1: # Sub
6178 color
= (color
+ left
) & 0xff
6179 elif filter_type
== 2: # Up
6180 color
= (color
+ up
) & 0xff
6181 elif filter_type
== 3: # Average
6182 color
= (color
+ ((left
+ up
) >> 1)) & 0xff
6183 elif filter_type
== 4: # Paeth
6189 c
= _get_pixel(basex
- stride
- 3)
6197 if pa
<= pb
and pa
<= pc
:
6198 color
= (color
+ a
) & 0xff
6200 color
= (color
+ b
) & 0xff
6202 color
= (color
+ c
) & 0xff
6204 current_row
.append(color
)
6206 return width
, height
, pixels
6209 def write_xattr(path
, key
, value
):
6210 # This mess below finds the best xattr tool for the job
6212 # try the pyxattr module...
6215 if hasattr(xattr
, 'set'): # pyxattr
6216 # Unicode arguments are not supported in python-pyxattr until
6218 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6219 pyxattr_required_version
= '0.5.0'
6220 if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
):
6221 # TODO: fallback to CLI tools
6222 raise XAttrUnavailableError(
6223 'python-pyxattr is detected but is too old. '
6224 'yt-dlp requires %s or above while your version is %s. '
6225 'Falling back to other xattr implementations' % (
6226 pyxattr_required_version
, xattr
.__version
__))
6228 setxattr
= xattr
.set
6230 setxattr
= xattr
.setxattr
6233 setxattr(path
, key
, value
)
6234 except EnvironmentError as e
:
6235 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6238 if compat_os_name
== 'nt':
6239 # Write xattrs to NTFS Alternate Data Streams:
6240 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6241 assert ':' not in key
6242 assert os
.path
.exists(path
)
6244 ads_fn
= path
+ ':' + key
6246 with open(ads_fn
, 'wb') as f
:
6248 except EnvironmentError as e
:
6249 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6251 user_has_setfattr
= check_executable('setfattr', ['--version'])
6252 user_has_xattr
= check_executable('xattr', ['-h'])
6254 if user_has_setfattr
or user_has_xattr
:
6256 value
= value
.decode('utf-8')
6257 if user_has_setfattr
:
6258 executable
= 'setfattr'
6259 opts
= ['-n', key
, '-v', value
]
6260 elif user_has_xattr
:
6261 executable
= 'xattr'
6262 opts
= ['-w', key
, value
]
6264 cmd
= ([encodeFilename(executable
, True)]
6265 + [encodeArgument(o
) for o
in opts
]
6266 + [encodeFilename(path
, True)])
6270 cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
6271 except EnvironmentError as e
:
6272 raise XAttrMetadataError(e
.errno
, e
.strerror
)
6273 stdout
, stderr
= p
.communicate_or_kill()
6274 stderr
= stderr
.decode('utf-8', 'replace')
6275 if p
.returncode
!= 0:
6276 raise XAttrMetadataError(p
.returncode
, stderr
)
6279 # On Unix, and can't find pyxattr, setfattr, or xattr.
6280 if sys
.platform
.startswith('linux'):
6281 raise XAttrUnavailableError(
6282 "Couldn't find a tool to set the xattrs. "
6283 "Install either the python 'pyxattr' or 'xattr' "
6284 "modules, or the GNU 'attr' package "
6285 "(which contains the 'setfattr' tool).")
6287 raise XAttrUnavailableError(
6288 "Couldn't find a tool to set the xattrs. "
6289 "Install either the python 'xattr' module, "
6290 "or the 'xattr' binary.")
6293 def random_birthday(year_field
, month_field
, day_field
):
6294 start_date
= datetime
.date(1950, 1, 1)
6295 end_date
= datetime
.date(1995, 12, 31)
6296 offset
= random
.randint(0, (end_date
- start_date
).days
)
6297 random_date
= start_date
+ datetime
.timedelta(offset
)
6299 year_field
: str(random_date
.year
),
6300 month_field
: str(random_date
.month
),
6301 day_field
: str(random_date
.day
),
6305 # Templates for internet shortcut files, which are plain text files.
6306 DOT_URL_LINK_TEMPLATE
= '''
6311 DOT_WEBLOC_LINK_TEMPLATE
= '''
6312 <?xml version="1.0" encoding="UTF-8"?>
6313 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6314 <plist version="1.0">
6317 \t<string>%(url)s</string>
6322 DOT_DESKTOP_LINK_TEMPLATE
= '''
6332 'url': DOT_URL_LINK_TEMPLATE
,
6333 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
6334 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
6338 def iri_to_uri(iri
):
6340 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6342 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6345 iri_parts
= compat_urllib_parse_urlparse(iri
)
6347 if '[' in iri_parts
.netloc
:
6348 raise ValueError('IPv6 URIs are not, yet, supported.')
6349 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6351 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6354 if iri_parts
.username
:
6355 net_location
+= compat_urllib_parse_quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
6356 if iri_parts
.password
is not None:
6357 net_location
+= ':' + compat_urllib_parse_quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
6360 net_location
+= iri_parts
.hostname
.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6361 # The 'idna' encoding produces ASCII text.
6362 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
6363 net_location
+= ':' + str(iri_parts
.port
)
6365 return compat_urllib_parse_urlunparse(
6369 compat_urllib_parse_quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
6371 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6372 compat_urllib_parse_quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
6374 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6375 compat_urllib_parse_quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
6377 compat_urllib_parse_quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
6379 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6382 def to_high_limit_path(path
):
6383 if sys
.platform
in ['win32', 'cygwin']:
6384 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6385 return r
'\\?\ '.rstrip() + os
.path
.abspath(path
)
6390 def format_field(obj
, field
=None, template
='%s', ignore
=(None, ''), default
='', func
=None):
6392 val
= obj
if obj
is not None else default
6394 val
= obj
.get(field
, default
)
6395 if func
and val
not in ignore
:
6397 return template
% val
if val
not in ignore
else default
6400 def clean_podcast_url(url
):
6401 return re
.sub(r
'''(?x)
6405 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6408 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6411 cn\.co| # https://podcorn.com/analytics-prefix/
6412 st\.fm # https://podsights.com/docs/
6417 _HEX_TABLE
= '0123456789abcdef'
6420 def random_uuidv4():
6421 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6424 def make_dir(path
, to_screen
=None):
6426 dn
= os
.path
.dirname(path
)
6427 if dn
and not os
.path
.exists(dn
):
6430 except (OSError, IOError) as err
:
6431 if callable(to_screen
) is not None:
6432 to_screen('unable to create directory ' + error_to_compat_str(err
))
6436 def get_executable_path():
6437 from zipimport
import zipimporter
6438 if hasattr(sys
, 'frozen'): # Running from PyInstaller
6439 path
= os
.path
.dirname(sys
.executable
)
6440 elif isinstance(globals().get('__loader__'), zipimporter
): # Running from ZIP
6441 path
= os
.path
.join(os
.path
.dirname(__file__
), '../..')
6443 path
= os
.path
.join(os
.path
.dirname(__file__
), '..')
6444 return os
.path
.abspath(path
)
6447 def load_plugins(name
, suffix
, namespace
):
6450 plugins_spec
= importlib
.util
.spec_from_file_location(
6451 name
, os
.path
.join(get_executable_path(), 'ytdlp_plugins', name
, '__init__.py'))
6452 plugins
= importlib
.util
.module_from_spec(plugins_spec
)
6453 sys
.modules
[plugins_spec
.name
] = plugins
6454 plugins_spec
.loader
.exec_module(plugins
)
6455 for name
in dir(plugins
):
6456 if name
in namespace
:
6458 if not name
.endswith(suffix
):
6460 klass
= getattr(plugins
, name
)
6461 classes
[name
] = namespace
[name
] = klass
6462 except FileNotFoundError
:
6468 obj
, *path_list
, default
=None, expected_type
=None, get_all
=True,
6469 casesense
=True, is_user_input
=False, traverse_string
=False):
6470 ''' Traverse nested list/dict/tuple
6471 @param path_list A list of paths which are checked one by one.
6472 Each path is a list of keys where each key is a string,
6473 a function, a tuple of strings or "...".
6474 When a fuction is given, it takes the key as argument and
6475 returns whether the key matches or not. When a tuple is given,
6476 all the keys given in the tuple are traversed, and
6477 "..." traverses all the keys in the object
6478 @param default Default value to return
6479 @param expected_type Only accept final value of this type (Can also be any callable)
6480 @param get_all Return all the values obtained from a path or only the first one
6481 @param casesense Whether to consider dictionary keys as case sensitive
6482 @param is_user_input Whether the keys are generated from user input. If True,
6483 strings are converted to int/slice if necessary
6484 @param traverse_string Whether to traverse inside strings. If True, any
6485 non-compatible object will also be converted into a string
6489 _lower
= lambda k
: (k
.lower() if isinstance(k
, str) else k
)
6490 path_list
= (map(_lower
, variadic(path
)) for path
in path_list
)
6492 def _traverse_obj(obj
, path
, _current_depth
=0):
6494 path
= tuple(variadic(path
))
6495 for i
, key
in enumerate(path
):
6498 if isinstance(key
, (list, tuple)):
6499 obj
= [_traverse_obj(obj
, sub_key
, _current_depth
) for sub_key
in key
]
6502 obj
= (obj
.values() if isinstance(obj
, dict)
6503 else obj
if isinstance(obj
, (list, tuple, LazyList
))
6504 else str(obj
) if traverse_string
else [])
6506 depth
= max(depth
, _current_depth
)
6507 return [_traverse_obj(inner_obj
, path
[i
+ 1:], _current_depth
) for inner_obj
in obj
]
6509 if isinstance(obj
, (list, tuple, LazyList
)):
6510 obj
= enumerate(obj
)
6511 elif isinstance(obj
, dict):
6514 if not traverse_string
:
6518 depth
= max(depth
, _current_depth
)
6519 return [_traverse_obj(v
, path
[i
+ 1:], _current_depth
) for k
, v
in obj
if key(k
)]
6520 elif isinstance(obj
, dict) and not (is_user_input
and key
== ':'):
6521 obj
= (obj
.get(key
) if casesense
or (key
in obj
)
6522 else next((v
for k
, v
in obj
.items() if _lower(k
) == key
), None))
6525 key
= (int_or_none(key
) if ':' not in key
6526 else slice(*map(int_or_none
, key
.split(':'))))
6527 if key
== slice(None):
6528 return _traverse_obj(obj
, (..., *path
[i
+ 1:]), _current_depth
)
6529 if not isinstance(key
, (int, slice)):
6531 if not isinstance(obj
, (list, tuple, LazyList
)):
6532 if not traverse_string
:
6541 if isinstance(expected_type
, type):
6542 type_test
= lambda val
: val
if isinstance(val
, expected_type
) else None
6543 elif expected_type
is not None:
6544 type_test
= expected_type
6546 type_test
= lambda val
: val
6548 for path
in path_list
:
6550 val
= _traverse_obj(obj
, path
)
6553 for _
in range(depth
- 1):
6554 val
= itertools
.chain
.from_iterable(v
for v
in val
if v
is not None)
6555 val
= [v
for v
in map(type_test
, val
) if v
is not None]
6557 return val
if get_all
else val
[0]
6559 val
= type_test(val
)
6566 def traverse_dict(dictn
, keys
, casesense
=True):
6567 write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
6568 'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
6569 return traverse_obj(dictn
, keys
, casesense
=casesense
, is_user_input
=True, traverse_string
=True)
6572 def variadic(x
, allowed_types
=(str, bytes)):
6573 return x
if isinstance(x
, collections
.abc
.Iterable
) and not isinstance(x
, allowed_types
) else (x
,)
6576 # create a JSON Web Signature (jws) with HS256 algorithm
6577 # the resulting format is in JWS Compact Serialization
6578 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6579 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6580 def jwt_encode_hs256(payload_data
, key
, headers
={}):
6586 header_data
.update(headers
)
6587 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode('utf-8'))
6588 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode('utf-8'))
6589 h
= hmac
.new(key
.encode('utf-8'), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
6590 signature_b64
= base64
.b64encode(h
.digest())
6591 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
6595 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6596 def jwt_decode_hs256(jwt
):
6597 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
6598 payload_data
= json
.loads(base64
.urlsafe_b64decode(payload_b64
))
6602 def supports_terminal_sequences(stream
):
6603 if compat_os_name
== 'nt':
6604 from .compat
import WINDOWS_VT_MODE
# Must be imported locally
6605 if not WINDOWS_VT_MODE
or get_windows_version() < (10, 0, 10586):
6607 elif not os
.getenv('TERM'):
6610 return stream
.isatty()
6611 except BaseException
:
6615 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
6618 def remove_terminal_sequences(string
):
6619 return _terminal_sequences_re
.sub('', string
)
6622 def number_of_digits(number
):
6623 return len('%d' % number
)
6626 def join_nonempty(*values
, delim
='-', from_dict
=None):
6627 if from_dict
is not None:
6628 values
= map(from_dict
.get
, values
)
6629 return delim
.join(map(str, filter(None, values
)))