]> jfr.im git - yt-dlp.git/blame_incremental - youtube_dl/compat.py
[socks] Add socks.py from @bluec0re's public domain implementation
[yt-dlp.git] / youtube_dl / compat.py
... / ...
CommitLineData
1from __future__ import unicode_literals
2
3import binascii
4import collections
5import email
6import getpass
7import io
8import optparse
9import os
10import re
11import shlex
12import shutil
13import socket
14import subprocess
15import sys
16import itertools
17import xml.etree.ElementTree
18
19
20try:
21 import urllib.request as compat_urllib_request
22except ImportError: # Python 2
23 import urllib2 as compat_urllib_request
24
25try:
26 import urllib.error as compat_urllib_error
27except ImportError: # Python 2
28 import urllib2 as compat_urllib_error
29
30try:
31 import urllib.parse as compat_urllib_parse
32except ImportError: # Python 2
33 import urllib as compat_urllib_parse
34
35try:
36 from urllib.parse import urlparse as compat_urllib_parse_urlparse
37except ImportError: # Python 2
38 from urlparse import urlparse as compat_urllib_parse_urlparse
39
40try:
41 import urllib.parse as compat_urlparse
42except ImportError: # Python 2
43 import urlparse as compat_urlparse
44
45try:
46 import urllib.response as compat_urllib_response
47except ImportError: # Python 2
48 import urllib as compat_urllib_response
49
50try:
51 import http.cookiejar as compat_cookiejar
52except ImportError: # Python 2
53 import cookielib as compat_cookiejar
54
55try:
56 import http.cookies as compat_cookies
57except ImportError: # Python 2
58 import Cookie as compat_cookies
59
60try:
61 import html.entities as compat_html_entities
62except ImportError: # Python 2
63 import htmlentitydefs as compat_html_entities
64
65try:
66 import http.client as compat_http_client
67except ImportError: # Python 2
68 import httplib as compat_http_client
69
70try:
71 from urllib.error import HTTPError as compat_HTTPError
72except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
74
75try:
76 from urllib.request import urlretrieve as compat_urlretrieve
77except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
79
80try:
81 from html.parser import HTMLParser as compat_HTMLParser
82except ImportError: # Python 2
83 from HTMLParser import HTMLParser as compat_HTMLParser
84
85
86try:
87 from subprocess import DEVNULL
88 compat_subprocess_get_DEVNULL = lambda: DEVNULL
89except ImportError:
90 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
91
92try:
93 import http.server as compat_http_server
94except ImportError:
95 import BaseHTTPServer as compat_http_server
96
97try:
98 compat_str = unicode # Python 2
99except NameError:
100 compat_str = str
101
102try:
103 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
104 from urllib.parse import unquote as compat_urllib_parse_unquote
105 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
106except ImportError: # Python 2
107 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
108 else re.compile('([\x00-\x7f]+)'))
109
110 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
111 # implementations from cpython 3.4.3's stdlib. Python 2's version
112 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
113
114 def compat_urllib_parse_unquote_to_bytes(string):
115 """unquote_to_bytes('abc%20def') -> b'abc def'."""
116 # Note: strings are encoded as UTF-8. This is only an issue if it contains
117 # unescaped non-ASCII characters, which URIs should not.
118 if not string:
119 # Is it a string-like object?
120 string.split
121 return b''
122 if isinstance(string, compat_str):
123 string = string.encode('utf-8')
124 bits = string.split(b'%')
125 if len(bits) == 1:
126 return string
127 res = [bits[0]]
128 append = res.append
129 for item in bits[1:]:
130 try:
131 append(compat_urllib_parse._hextochr[item[:2]])
132 append(item[2:])
133 except KeyError:
134 append(b'%')
135 append(item)
136 return b''.join(res)
137
138 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
139 """Replace %xx escapes by their single-character equivalent. The optional
140 encoding and errors parameters specify how to decode percent-encoded
141 sequences into Unicode characters, as accepted by the bytes.decode()
142 method.
143 By default, percent-encoded sequences are decoded with UTF-8, and invalid
144 sequences are replaced by a placeholder character.
145
146 unquote('abc%20def') -> 'abc def'.
147 """
148 if '%' not in string:
149 string.split
150 return string
151 if encoding is None:
152 encoding = 'utf-8'
153 if errors is None:
154 errors = 'replace'
155 bits = _asciire.split(string)
156 res = [bits[0]]
157 append = res.append
158 for i in range(1, len(bits), 2):
159 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
160 append(bits[i + 1])
161 return ''.join(res)
162
163 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
164 """Like unquote(), but also replace plus signs by spaces, as required for
165 unquoting HTML form values.
166
167 unquote_plus('%7e/abc+def') -> '~/abc def'
168 """
169 string = string.replace('+', ' ')
170 return compat_urllib_parse_unquote(string, encoding, errors)
171
172try:
173 from urllib.parse import urlencode as compat_urllib_parse_urlencode
174except ImportError: # Python 2
175 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
176 # Possible solutions are to either port it from python 3 with all
177 # the friends or manually ensure input query contains only byte strings.
178 # We will stick with latter thus recursively encoding the whole query.
179 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
180 def encode_elem(e):
181 if isinstance(e, dict):
182 e = encode_dict(e)
183 elif isinstance(e, (list, tuple,)):
184 list_e = encode_list(e)
185 e = tuple(list_e) if isinstance(e, tuple) else list_e
186 elif isinstance(e, compat_str):
187 e = e.encode(encoding)
188 return e
189
190 def encode_dict(d):
191 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
192
193 def encode_list(l):
194 return [encode_elem(e) for e in l]
195
196 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
197
198try:
199 from urllib.request import DataHandler as compat_urllib_request_DataHandler
200except ImportError: # Python < 3.4
201 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
202 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
203 def data_open(self, req):
204 # data URLs as specified in RFC 2397.
205 #
206 # ignores POSTed data
207 #
208 # syntax:
209 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
210 # mediatype := [ type "/" subtype ] *( ";" parameter )
211 # data := *urlchar
212 # parameter := attribute "=" value
213 url = req.get_full_url()
214
215 scheme, data = url.split(':', 1)
216 mediatype, data = data.split(',', 1)
217
218 # even base64 encoded data URLs might be quoted so unquote in any case:
219 data = compat_urllib_parse_unquote_to_bytes(data)
220 if mediatype.endswith(';base64'):
221 data = binascii.a2b_base64(data)
222 mediatype = mediatype[:-7]
223
224 if not mediatype:
225 mediatype = 'text/plain;charset=US-ASCII'
226
227 headers = email.message_from_string(
228 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
229
230 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
231
232try:
233 compat_basestring = basestring # Python 2
234except NameError:
235 compat_basestring = str
236
237try:
238 compat_chr = unichr # Python 2
239except NameError:
240 compat_chr = chr
241
242try:
243 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
244except ImportError: # Python 2.6
245 from xml.parsers.expat import ExpatError as compat_xml_parse_error
246
247if sys.version_info[0] >= 3:
248 compat_etree_fromstring = xml.etree.ElementTree.fromstring
249else:
250 # python 2.x tries to encode unicode strings with ascii (see the
251 # XMLParser._fixtext method)
252 etree = xml.etree.ElementTree
253
254 try:
255 _etree_iter = etree.Element.iter
256 except AttributeError: # Python <=2.6
257 def _etree_iter(root):
258 for el in root.findall('*'):
259 yield el
260 for sub in _etree_iter(el):
261 yield sub
262
263 # on 2.6 XML doesn't have a parser argument, function copied from CPython
264 # 2.7 source
265 def _XML(text, parser=None):
266 if not parser:
267 parser = etree.XMLParser(target=etree.TreeBuilder())
268 parser.feed(text)
269 return parser.close()
270
271 def _element_factory(*args, **kwargs):
272 el = etree.Element(*args, **kwargs)
273 for k, v in el.items():
274 if isinstance(v, bytes):
275 el.set(k, v.decode('utf-8'))
276 return el
277
278 def compat_etree_fromstring(text):
279 doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
280 for el in _etree_iter(doc):
281 if el.text is not None and isinstance(el.text, bytes):
282 el.text = el.text.decode('utf-8')
283 return doc
284
285if sys.version_info < (2, 7):
286 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
287 # .//node does not match if a node is a direct child of . !
288 def compat_xpath(xpath):
289 if isinstance(xpath, compat_str):
290 xpath = xpath.encode('ascii')
291 return xpath
292else:
293 compat_xpath = lambda xpath: xpath
294
295try:
296 from urllib.parse import parse_qs as compat_parse_qs
297except ImportError: # Python 2
298 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
299 # Python 2's version is apparently totally broken
300
301 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
302 encoding='utf-8', errors='replace'):
303 qs, _coerce_result = qs, compat_str
304 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
305 r = []
306 for name_value in pairs:
307 if not name_value and not strict_parsing:
308 continue
309 nv = name_value.split('=', 1)
310 if len(nv) != 2:
311 if strict_parsing:
312 raise ValueError('bad query field: %r' % (name_value,))
313 # Handle case of a control-name with no equal sign
314 if keep_blank_values:
315 nv.append('')
316 else:
317 continue
318 if len(nv[1]) or keep_blank_values:
319 name = nv[0].replace('+', ' ')
320 name = compat_urllib_parse_unquote(
321 name, encoding=encoding, errors=errors)
322 name = _coerce_result(name)
323 value = nv[1].replace('+', ' ')
324 value = compat_urllib_parse_unquote(
325 value, encoding=encoding, errors=errors)
326 value = _coerce_result(value)
327 r.append((name, value))
328 return r
329
330 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
331 encoding='utf-8', errors='replace'):
332 parsed_result = {}
333 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
334 encoding=encoding, errors=errors)
335 for name, value in pairs:
336 if name in parsed_result:
337 parsed_result[name].append(value)
338 else:
339 parsed_result[name] = [value]
340 return parsed_result
341
342try:
343 from shlex import quote as shlex_quote
344except ImportError: # Python < 3.3
345 def shlex_quote(s):
346 if re.match(r'^[-_\w./]+$', s):
347 return s
348 else:
349 return "'" + s.replace("'", "'\"'\"'") + "'"
350
351
352if sys.version_info >= (2, 7, 3):
353 compat_shlex_split = shlex.split
354else:
355 # Working around shlex issue with unicode strings on some python 2
356 # versions (see http://bugs.python.org/issue1548891)
357 def compat_shlex_split(s, comments=False, posix=True):
358 if isinstance(s, compat_str):
359 s = s.encode('utf-8')
360 return shlex.split(s, comments, posix)
361
362
363def compat_ord(c):
364 if type(c) is int:
365 return c
366 else:
367 return ord(c)
368
369
370compat_os_name = os._name if os.name == 'java' else os.name
371
372
373if sys.version_info >= (3, 0):
374 compat_getenv = os.getenv
375 compat_expanduser = os.path.expanduser
376
377 def compat_setenv(key, value, env=os.environ):
378 env[key] = value
379else:
380 # Environment variables should be decoded with filesystem encoding.
381 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
382
383 def compat_getenv(key, default=None):
384 from .utils import get_filesystem_encoding
385 env = os.getenv(key, default)
386 if env:
387 env = env.decode(get_filesystem_encoding())
388 return env
389
390 def compat_setenv(key, value, env=os.environ):
391 def encode(v):
392 from .utils import get_filesystem_encoding
393 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
394 env[encode(key)] = encode(value)
395
396 # HACK: The default implementations of os.path.expanduser from cpython do not decode
397 # environment variables with filesystem encoding. We will work around this by
398 # providing adjusted implementations.
399 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
400 # for different platforms with correct environment variables decoding.
401
402 if compat_os_name == 'posix':
403 def compat_expanduser(path):
404 """Expand ~ and ~user constructions. If user or $HOME is unknown,
405 do nothing."""
406 if not path.startswith('~'):
407 return path
408 i = path.find('/', 1)
409 if i < 0:
410 i = len(path)
411 if i == 1:
412 if 'HOME' not in os.environ:
413 import pwd
414 userhome = pwd.getpwuid(os.getuid()).pw_dir
415 else:
416 userhome = compat_getenv('HOME')
417 else:
418 import pwd
419 try:
420 pwent = pwd.getpwnam(path[1:i])
421 except KeyError:
422 return path
423 userhome = pwent.pw_dir
424 userhome = userhome.rstrip('/')
425 return (userhome + path[i:]) or '/'
426 elif compat_os_name == 'nt' or compat_os_name == 'ce':
427 def compat_expanduser(path):
428 """Expand ~ and ~user constructs.
429
430 If user or $HOME is unknown, do nothing."""
431 if path[:1] != '~':
432 return path
433 i, n = 1, len(path)
434 while i < n and path[i] not in '/\\':
435 i = i + 1
436
437 if 'HOME' in os.environ:
438 userhome = compat_getenv('HOME')
439 elif 'USERPROFILE' in os.environ:
440 userhome = compat_getenv('USERPROFILE')
441 elif 'HOMEPATH' not in os.environ:
442 return path
443 else:
444 try:
445 drive = compat_getenv('HOMEDRIVE')
446 except KeyError:
447 drive = ''
448 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
449
450 if i != 1: # ~user
451 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
452
453 return userhome + path[i:]
454 else:
455 compat_expanduser = os.path.expanduser
456
457
458if sys.version_info < (3, 0):
459 def compat_print(s):
460 from .utils import preferredencoding
461 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
462else:
463 def compat_print(s):
464 assert isinstance(s, compat_str)
465 print(s)
466
467
468try:
469 subprocess_check_output = subprocess.check_output
470except AttributeError:
471 def subprocess_check_output(*args, **kwargs):
472 assert 'input' not in kwargs
473 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
474 output, _ = p.communicate()
475 ret = p.poll()
476 if ret:
477 raise subprocess.CalledProcessError(ret, p.args, output=output)
478 return output
479
480if sys.version_info < (3, 0) and sys.platform == 'win32':
481 def compat_getpass(prompt, *args, **kwargs):
482 if isinstance(prompt, compat_str):
483 from .utils import preferredencoding
484 prompt = prompt.encode(preferredencoding())
485 return getpass.getpass(prompt, *args, **kwargs)
486else:
487 compat_getpass = getpass.getpass
488
489# Python < 2.6.5 require kwargs to be bytes
490try:
491 def _testfunc(x):
492 pass
493 _testfunc(**{'x': 0})
494except TypeError:
495 def compat_kwargs(kwargs):
496 return dict((bytes(k), v) for k, v in kwargs.items())
497else:
498 compat_kwargs = lambda kwargs: kwargs
499
500
501if sys.version_info < (2, 7):
502 def compat_socket_create_connection(address, timeout, source_address=None):
503 host, port = address
504 err = None
505 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
506 af, socktype, proto, canonname, sa = res
507 sock = None
508 try:
509 sock = socket.socket(af, socktype, proto)
510 sock.settimeout(timeout)
511 if source_address:
512 sock.bind(source_address)
513 sock.connect(sa)
514 return sock
515 except socket.error as _:
516 err = _
517 if sock is not None:
518 sock.close()
519 if err is not None:
520 raise err
521 else:
522 raise socket.error('getaddrinfo returns an empty list')
523else:
524 compat_socket_create_connection = socket.create_connection
525
526
527# Fix https://github.com/rg3/youtube-dl/issues/4223
528# See http://bugs.python.org/issue9161 for what is broken
529def workaround_optparse_bug9161():
530 op = optparse.OptionParser()
531 og = optparse.OptionGroup(op, 'foo')
532 try:
533 og.add_option('-t')
534 except TypeError:
535 real_add_option = optparse.OptionGroup.add_option
536
537 def _compat_add_option(self, *args, **kwargs):
538 enc = lambda v: (
539 v.encode('ascii', 'replace') if isinstance(v, compat_str)
540 else v)
541 bargs = [enc(a) for a in args]
542 bkwargs = dict(
543 (k, enc(v)) for k, v in kwargs.items())
544 return real_add_option(self, *bargs, **bkwargs)
545 optparse.OptionGroup.add_option = _compat_add_option
546
547if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
548 compat_get_terminal_size = shutil.get_terminal_size
549else:
550 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
551
552 def compat_get_terminal_size(fallback=(80, 24)):
553 columns = compat_getenv('COLUMNS')
554 if columns:
555 columns = int(columns)
556 else:
557 columns = None
558 lines = compat_getenv('LINES')
559 if lines:
560 lines = int(lines)
561 else:
562 lines = None
563
564 if columns is None or lines is None or columns <= 0 or lines <= 0:
565 try:
566 sp = subprocess.Popen(
567 ['stty', 'size'],
568 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
569 out, err = sp.communicate()
570 _lines, _columns = map(int, out.split())
571 except Exception:
572 _columns, _lines = _terminal_size(*fallback)
573
574 if columns is None or columns <= 0:
575 columns = _columns
576 if lines is None or lines <= 0:
577 lines = _lines
578 return _terminal_size(columns, lines)
579
580try:
581 itertools.count(start=0, step=1)
582 compat_itertools_count = itertools.count
583except TypeError: # Python 2.6
584 def compat_itertools_count(start=0, step=1):
585 n = start
586 while True:
587 yield n
588 n += step
589
590if sys.version_info >= (3, 0):
591 from tokenize import tokenize as compat_tokenize_tokenize
592else:
593 from tokenize import generate_tokens as compat_tokenize_tokenize
594
595__all__ = [
596 'compat_HTMLParser',
597 'compat_HTTPError',
598 'compat_basestring',
599 'compat_chr',
600 'compat_cookiejar',
601 'compat_cookies',
602 'compat_etree_fromstring',
603 'compat_expanduser',
604 'compat_get_terminal_size',
605 'compat_getenv',
606 'compat_getpass',
607 'compat_html_entities',
608 'compat_http_client',
609 'compat_http_server',
610 'compat_itertools_count',
611 'compat_kwargs',
612 'compat_ord',
613 'compat_os_name',
614 'compat_parse_qs',
615 'compat_print',
616 'compat_setenv',
617 'compat_shlex_split',
618 'compat_socket_create_connection',
619 'compat_str',
620 'compat_subprocess_get_DEVNULL',
621 'compat_tokenize_tokenize',
622 'compat_urllib_error',
623 'compat_urllib_parse',
624 'compat_urllib_parse_unquote',
625 'compat_urllib_parse_unquote_plus',
626 'compat_urllib_parse_unquote_to_bytes',
627 'compat_urllib_parse_urlencode',
628 'compat_urllib_parse_urlparse',
629 'compat_urllib_request',
630 'compat_urllib_request_DataHandler',
631 'compat_urllib_response',
632 'compat_urlparse',
633 'compat_urlretrieve',
634 'compat_xml_parse_error',
635 'compat_xpath',
636 'shlex_quote',
637 'subprocess_check_output',
638 'workaround_optparse_bug9161',
639]