]> jfr.im git - yt-dlp.git/blob - youtube_dl/compat.py
[utils] Unquote crendentials passed to SOCKS proxies
[yt-dlp.git] / youtube_dl / compat.py
1 from __future__ import unicode_literals
2
3 import binascii
4 import collections
5 import email
6 import getpass
7 import io
8 import optparse
9 import os
10 import re
11 import shlex
12 import shutil
13 import socket
14 import struct
15 import subprocess
16 import sys
17 import itertools
18 import xml.etree.ElementTree
19
20
21 try:
22 import urllib.request as compat_urllib_request
23 except ImportError: # Python 2
24 import urllib2 as compat_urllib_request
25
26 try:
27 import urllib.error as compat_urllib_error
28 except ImportError: # Python 2
29 import urllib2 as compat_urllib_error
30
31 try:
32 import urllib.parse as compat_urllib_parse
33 except ImportError: # Python 2
34 import urllib as compat_urllib_parse
35
36 try:
37 from urllib.parse import urlparse as compat_urllib_parse_urlparse
38 except ImportError: # Python 2
39 from urlparse import urlparse as compat_urllib_parse_urlparse
40
41 try:
42 import urllib.parse as compat_urlparse
43 except ImportError: # Python 2
44 import urlparse as compat_urlparse
45
46 try:
47 import urllib.response as compat_urllib_response
48 except ImportError: # Python 2
49 import urllib as compat_urllib_response
50
51 try:
52 import http.cookiejar as compat_cookiejar
53 except ImportError: # Python 2
54 import cookielib as compat_cookiejar
55
56 try:
57 import http.cookies as compat_cookies
58 except ImportError: # Python 2
59 import Cookie as compat_cookies
60
61 try:
62 import html.entities as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs as compat_html_entities
65
66 try:
67 import http.client as compat_http_client
68 except ImportError: # Python 2
69 import httplib as compat_http_client
70
71 try:
72 from urllib.error import HTTPError as compat_HTTPError
73 except ImportError: # Python 2
74 from urllib2 import HTTPError as compat_HTTPError
75
76 try:
77 from urllib.request import urlretrieve as compat_urlretrieve
78 except ImportError: # Python 2
79 from urllib import urlretrieve as compat_urlretrieve
80
81 try:
82 from html.parser import HTMLParser as compat_HTMLParser
83 except ImportError: # Python 2
84 from HTMLParser import HTMLParser as compat_HTMLParser
85
86
87 try:
88 from subprocess import DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: DEVNULL
90 except ImportError:
91 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
92
93 try:
94 import http.server as compat_http_server
95 except ImportError:
96 import BaseHTTPServer as compat_http_server
97
98 try:
99 compat_str = unicode # Python 2
100 except NameError:
101 compat_str = str
102
103 try:
104 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
105 from urllib.parse import unquote as compat_urllib_parse_unquote
106 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
107 except ImportError: # Python 2
108 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
109 else re.compile('([\x00-\x7f]+)'))
110
111 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
112 # implementations from cpython 3.4.3's stdlib. Python 2's version
113 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
114
115 def compat_urllib_parse_unquote_to_bytes(string):
116 """unquote_to_bytes('abc%20def') -> b'abc def'."""
117 # Note: strings are encoded as UTF-8. This is only an issue if it contains
118 # unescaped non-ASCII characters, which URIs should not.
119 if not string:
120 # Is it a string-like object?
121 string.split
122 return b''
123 if isinstance(string, compat_str):
124 string = string.encode('utf-8')
125 bits = string.split(b'%')
126 if len(bits) == 1:
127 return string
128 res = [bits[0]]
129 append = res.append
130 for item in bits[1:]:
131 try:
132 append(compat_urllib_parse._hextochr[item[:2]])
133 append(item[2:])
134 except KeyError:
135 append(b'%')
136 append(item)
137 return b''.join(res)
138
139 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
140 """Replace %xx escapes by their single-character equivalent. The optional
141 encoding and errors parameters specify how to decode percent-encoded
142 sequences into Unicode characters, as accepted by the bytes.decode()
143 method.
144 By default, percent-encoded sequences are decoded with UTF-8, and invalid
145 sequences are replaced by a placeholder character.
146
147 unquote('abc%20def') -> 'abc def'.
148 """
149 if '%' not in string:
150 string.split
151 return string
152 if encoding is None:
153 encoding = 'utf-8'
154 if errors is None:
155 errors = 'replace'
156 bits = _asciire.split(string)
157 res = [bits[0]]
158 append = res.append
159 for i in range(1, len(bits), 2):
160 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
161 append(bits[i + 1])
162 return ''.join(res)
163
164 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
165 """Like unquote(), but also replace plus signs by spaces, as required for
166 unquoting HTML form values.
167
168 unquote_plus('%7e/abc+def') -> '~/abc def'
169 """
170 string = string.replace('+', ' ')
171 return compat_urllib_parse_unquote(string, encoding, errors)
172
173 try:
174 from urllib.parse import urlencode as compat_urllib_parse_urlencode
175 except ImportError: # Python 2
176 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
177 # Possible solutions are to either port it from python 3 with all
178 # the friends or manually ensure input query contains only byte strings.
179 # We will stick with latter thus recursively encoding the whole query.
180 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
181 def encode_elem(e):
182 if isinstance(e, dict):
183 e = encode_dict(e)
184 elif isinstance(e, (list, tuple,)):
185 list_e = encode_list(e)
186 e = tuple(list_e) if isinstance(e, tuple) else list_e
187 elif isinstance(e, compat_str):
188 e = e.encode(encoding)
189 return e
190
191 def encode_dict(d):
192 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
193
194 def encode_list(l):
195 return [encode_elem(e) for e in l]
196
197 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
198
199 try:
200 from urllib.request import DataHandler as compat_urllib_request_DataHandler
201 except ImportError: # Python < 3.4
202 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
203 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
204 def data_open(self, req):
205 # data URLs as specified in RFC 2397.
206 #
207 # ignores POSTed data
208 #
209 # syntax:
210 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
211 # mediatype := [ type "/" subtype ] *( ";" parameter )
212 # data := *urlchar
213 # parameter := attribute "=" value
214 url = req.get_full_url()
215
216 scheme, data = url.split(':', 1)
217 mediatype, data = data.split(',', 1)
218
219 # even base64 encoded data URLs might be quoted so unquote in any case:
220 data = compat_urllib_parse_unquote_to_bytes(data)
221 if mediatype.endswith(';base64'):
222 data = binascii.a2b_base64(data)
223 mediatype = mediatype[:-7]
224
225 if not mediatype:
226 mediatype = 'text/plain;charset=US-ASCII'
227
228 headers = email.message_from_string(
229 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
230
231 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
232
233 try:
234 compat_basestring = basestring # Python 2
235 except NameError:
236 compat_basestring = str
237
238 try:
239 compat_chr = unichr # Python 2
240 except NameError:
241 compat_chr = chr
242
243 try:
244 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
245 except ImportError: # Python 2.6
246 from xml.parsers.expat import ExpatError as compat_xml_parse_error
247
248 if sys.version_info[0] >= 3:
249 compat_etree_fromstring = xml.etree.ElementTree.fromstring
250 else:
251 # python 2.x tries to encode unicode strings with ascii (see the
252 # XMLParser._fixtext method)
253 etree = xml.etree.ElementTree
254
255 try:
256 _etree_iter = etree.Element.iter
257 except AttributeError: # Python <=2.6
258 def _etree_iter(root):
259 for el in root.findall('*'):
260 yield el
261 for sub in _etree_iter(el):
262 yield sub
263
264 # on 2.6 XML doesn't have a parser argument, function copied from CPython
265 # 2.7 source
266 def _XML(text, parser=None):
267 if not parser:
268 parser = etree.XMLParser(target=etree.TreeBuilder())
269 parser.feed(text)
270 return parser.close()
271
272 def _element_factory(*args, **kwargs):
273 el = etree.Element(*args, **kwargs)
274 for k, v in el.items():
275 if isinstance(v, bytes):
276 el.set(k, v.decode('utf-8'))
277 return el
278
279 def compat_etree_fromstring(text):
280 doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
281 for el in _etree_iter(doc):
282 if el.text is not None and isinstance(el.text, bytes):
283 el.text = el.text.decode('utf-8')
284 return doc
285
286 if sys.version_info < (2, 7):
287 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
288 # .//node does not match if a node is a direct child of . !
289 def compat_xpath(xpath):
290 if isinstance(xpath, compat_str):
291 xpath = xpath.encode('ascii')
292 return xpath
293 else:
294 compat_xpath = lambda xpath: xpath
295
296 try:
297 from urllib.parse import parse_qs as compat_parse_qs
298 except ImportError: # Python 2
299 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
300 # Python 2's version is apparently totally broken
301
302 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
303 encoding='utf-8', errors='replace'):
304 qs, _coerce_result = qs, compat_str
305 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
306 r = []
307 for name_value in pairs:
308 if not name_value and not strict_parsing:
309 continue
310 nv = name_value.split('=', 1)
311 if len(nv) != 2:
312 if strict_parsing:
313 raise ValueError('bad query field: %r' % (name_value,))
314 # Handle case of a control-name with no equal sign
315 if keep_blank_values:
316 nv.append('')
317 else:
318 continue
319 if len(nv[1]) or keep_blank_values:
320 name = nv[0].replace('+', ' ')
321 name = compat_urllib_parse_unquote(
322 name, encoding=encoding, errors=errors)
323 name = _coerce_result(name)
324 value = nv[1].replace('+', ' ')
325 value = compat_urllib_parse_unquote(
326 value, encoding=encoding, errors=errors)
327 value = _coerce_result(value)
328 r.append((name, value))
329 return r
330
331 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
332 encoding='utf-8', errors='replace'):
333 parsed_result = {}
334 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
335 encoding=encoding, errors=errors)
336 for name, value in pairs:
337 if name in parsed_result:
338 parsed_result[name].append(value)
339 else:
340 parsed_result[name] = [value]
341 return parsed_result
342
343 try:
344 from shlex import quote as compat_shlex_quote
345 except ImportError: # Python < 3.3
346 def compat_shlex_quote(s):
347 if re.match(r'^[-_\w./]+$', s):
348 return s
349 else:
350 return "'" + s.replace("'", "'\"'\"'") + "'"
351
352
353 if sys.version_info >= (2, 7, 3):
354 compat_shlex_split = shlex.split
355 else:
356 # Working around shlex issue with unicode strings on some python 2
357 # versions (see http://bugs.python.org/issue1548891)
358 def compat_shlex_split(s, comments=False, posix=True):
359 if isinstance(s, compat_str):
360 s = s.encode('utf-8')
361 return shlex.split(s, comments, posix)
362
363
364 def compat_ord(c):
365 if type(c) is int:
366 return c
367 else:
368 return ord(c)
369
370
371 compat_os_name = os._name if os.name == 'java' else os.name
372
373
374 if sys.version_info >= (3, 0):
375 compat_getenv = os.getenv
376 compat_expanduser = os.path.expanduser
377
378 def compat_setenv(key, value, env=os.environ):
379 env[key] = value
380 else:
381 # Environment variables should be decoded with filesystem encoding.
382 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
383
384 def compat_getenv(key, default=None):
385 from .utils import get_filesystem_encoding
386 env = os.getenv(key, default)
387 if env:
388 env = env.decode(get_filesystem_encoding())
389 return env
390
391 def compat_setenv(key, value, env=os.environ):
392 def encode(v):
393 from .utils import get_filesystem_encoding
394 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
395 env[encode(key)] = encode(value)
396
397 # HACK: The default implementations of os.path.expanduser from cpython do not decode
398 # environment variables with filesystem encoding. We will work around this by
399 # providing adjusted implementations.
400 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
401 # for different platforms with correct environment variables decoding.
402
403 if compat_os_name == 'posix':
404 def compat_expanduser(path):
405 """Expand ~ and ~user constructions. If user or $HOME is unknown,
406 do nothing."""
407 if not path.startswith('~'):
408 return path
409 i = path.find('/', 1)
410 if i < 0:
411 i = len(path)
412 if i == 1:
413 if 'HOME' not in os.environ:
414 import pwd
415 userhome = pwd.getpwuid(os.getuid()).pw_dir
416 else:
417 userhome = compat_getenv('HOME')
418 else:
419 import pwd
420 try:
421 pwent = pwd.getpwnam(path[1:i])
422 except KeyError:
423 return path
424 userhome = pwent.pw_dir
425 userhome = userhome.rstrip('/')
426 return (userhome + path[i:]) or '/'
427 elif compat_os_name == 'nt' or compat_os_name == 'ce':
428 def compat_expanduser(path):
429 """Expand ~ and ~user constructs.
430
431 If user or $HOME is unknown, do nothing."""
432 if path[:1] != '~':
433 return path
434 i, n = 1, len(path)
435 while i < n and path[i] not in '/\\':
436 i = i + 1
437
438 if 'HOME' in os.environ:
439 userhome = compat_getenv('HOME')
440 elif 'USERPROFILE' in os.environ:
441 userhome = compat_getenv('USERPROFILE')
442 elif 'HOMEPATH' not in os.environ:
443 return path
444 else:
445 try:
446 drive = compat_getenv('HOMEDRIVE')
447 except KeyError:
448 drive = ''
449 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
450
451 if i != 1: # ~user
452 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
453
454 return userhome + path[i:]
455 else:
456 compat_expanduser = os.path.expanduser
457
458
459 if sys.version_info < (3, 0):
460 def compat_print(s):
461 from .utils import preferredencoding
462 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
463 else:
464 def compat_print(s):
465 assert isinstance(s, compat_str)
466 print(s)
467
468
469 if sys.version_info < (3, 0) and sys.platform == 'win32':
470 def compat_getpass(prompt, *args, **kwargs):
471 if isinstance(prompt, compat_str):
472 from .utils import preferredencoding
473 prompt = prompt.encode(preferredencoding())
474 return getpass.getpass(prompt, *args, **kwargs)
475 else:
476 compat_getpass = getpass.getpass
477
478 # Python < 2.6.5 require kwargs to be bytes
479 try:
480 def _testfunc(x):
481 pass
482 _testfunc(**{'x': 0})
483 except TypeError:
484 def compat_kwargs(kwargs):
485 return dict((bytes(k), v) for k, v in kwargs.items())
486 else:
487 compat_kwargs = lambda kwargs: kwargs
488
489
490 if sys.version_info < (2, 7):
491 def compat_socket_create_connection(address, timeout, source_address=None):
492 host, port = address
493 err = None
494 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
495 af, socktype, proto, canonname, sa = res
496 sock = None
497 try:
498 sock = socket.socket(af, socktype, proto)
499 sock.settimeout(timeout)
500 if source_address:
501 sock.bind(source_address)
502 sock.connect(sa)
503 return sock
504 except socket.error as _:
505 err = _
506 if sock is not None:
507 sock.close()
508 if err is not None:
509 raise err
510 else:
511 raise socket.error('getaddrinfo returns an empty list')
512 else:
513 compat_socket_create_connection = socket.create_connection
514
515
516 # Fix https://github.com/rg3/youtube-dl/issues/4223
517 # See http://bugs.python.org/issue9161 for what is broken
518 def workaround_optparse_bug9161():
519 op = optparse.OptionParser()
520 og = optparse.OptionGroup(op, 'foo')
521 try:
522 og.add_option('-t')
523 except TypeError:
524 real_add_option = optparse.OptionGroup.add_option
525
526 def _compat_add_option(self, *args, **kwargs):
527 enc = lambda v: (
528 v.encode('ascii', 'replace') if isinstance(v, compat_str)
529 else v)
530 bargs = [enc(a) for a in args]
531 bkwargs = dict(
532 (k, enc(v)) for k, v in kwargs.items())
533 return real_add_option(self, *bargs, **bkwargs)
534 optparse.OptionGroup.add_option = _compat_add_option
535
536 if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
537 compat_get_terminal_size = shutil.get_terminal_size
538 else:
539 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
540
541 def compat_get_terminal_size(fallback=(80, 24)):
542 columns = compat_getenv('COLUMNS')
543 if columns:
544 columns = int(columns)
545 else:
546 columns = None
547 lines = compat_getenv('LINES')
548 if lines:
549 lines = int(lines)
550 else:
551 lines = None
552
553 if columns is None or lines is None or columns <= 0 or lines <= 0:
554 try:
555 sp = subprocess.Popen(
556 ['stty', 'size'],
557 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
558 out, err = sp.communicate()
559 _lines, _columns = map(int, out.split())
560 except Exception:
561 _columns, _lines = _terminal_size(*fallback)
562
563 if columns is None or columns <= 0:
564 columns = _columns
565 if lines is None or lines <= 0:
566 lines = _lines
567 return _terminal_size(columns, lines)
568
569 try:
570 itertools.count(start=0, step=1)
571 compat_itertools_count = itertools.count
572 except TypeError: # Python 2.6
573 def compat_itertools_count(start=0, step=1):
574 n = start
575 while True:
576 yield n
577 n += step
578
579 if sys.version_info >= (3, 0):
580 from tokenize import tokenize as compat_tokenize_tokenize
581 else:
582 from tokenize import generate_tokens as compat_tokenize_tokenize
583
584
585 try:
586 struct.pack('!I', 0)
587 except TypeError:
588 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
589 # See https://bugs.python.org/issue19099
590 def compat_struct_pack(spec, *args):
591 if isinstance(spec, compat_str):
592 spec = spec.encode('ascii')
593 return struct.pack(spec, *args)
594
595 def compat_struct_unpack(spec, *args):
596 if isinstance(spec, compat_str):
597 spec = spec.encode('ascii')
598 return struct.unpack(spec, *args)
599 else:
600 compat_struct_pack = struct.pack
601 compat_struct_unpack = struct.unpack
602
603
604 __all__ = [
605 'compat_HTMLParser',
606 'compat_HTTPError',
607 'compat_basestring',
608 'compat_chr',
609 'compat_cookiejar',
610 'compat_cookies',
611 'compat_etree_fromstring',
612 'compat_expanduser',
613 'compat_get_terminal_size',
614 'compat_getenv',
615 'compat_getpass',
616 'compat_html_entities',
617 'compat_http_client',
618 'compat_http_server',
619 'compat_itertools_count',
620 'compat_kwargs',
621 'compat_ord',
622 'compat_os_name',
623 'compat_parse_qs',
624 'compat_print',
625 'compat_setenv',
626 'compat_shlex_quote',
627 'compat_shlex_split',
628 'compat_socket_create_connection',
629 'compat_str',
630 'compat_struct_pack',
631 'compat_struct_unpack',
632 'compat_subprocess_get_DEVNULL',
633 'compat_tokenize_tokenize',
634 'compat_urllib_error',
635 'compat_urllib_parse',
636 'compat_urllib_parse_unquote',
637 'compat_urllib_parse_unquote_plus',
638 'compat_urllib_parse_unquote_to_bytes',
639 'compat_urllib_parse_urlencode',
640 'compat_urllib_parse_urlparse',
641 'compat_urllib_request',
642 'compat_urllib_request_DataHandler',
643 'compat_urllib_response',
644 'compat_urlparse',
645 'compat_urlretrieve',
646 'compat_xml_parse_error',
647 'compat_xpath',
648 'workaround_optparse_bug9161',
649 ]