youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import collections
   4 import getpass
   5 import optparse
   6 import os
   7 import re
   8 import shlex
   9 import shutil
  10 import socket
  11 import subprocess
  12 import sys
  13 import itertools
  14
  15
  16 try:
  17     import urllib.request as compat_urllib_request
  18 except ImportError:  # Python 2
  19     import urllib2 as compat_urllib_request
  20
  21 try:
  22     import urllib.error as compat_urllib_error
  23 except ImportError:  # Python 2
  24     import urllib2 as compat_urllib_error
  25
  26 try:
  27     import urllib.parse as compat_urllib_parse
  28 except ImportError:  # Python 2
  29     import urllib as compat_urllib_parse
  30
  31 try:
  32     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  33 except ImportError:  # Python 2
  34     from urlparse import urlparse as compat_urllib_parse_urlparse
  35
  36 try:
  37     import urllib.parse as compat_urlparse
  38 except ImportError:  # Python 2
  39     import urlparse as compat_urlparse
  40
  41 try:
  42     import http.cookiejar as compat_cookiejar
  43 except ImportError:  # Python 2
  44     import cookielib as compat_cookiejar
  45
  46 try:
  47     import http.cookies as compat_cookies
  48 except ImportError:  # Python 2
  49     import Cookie as compat_cookies
  50
  51 try:
  52     import html.entities as compat_html_entities
  53 except ImportError:  # Python 2
  54     import htmlentitydefs as compat_html_entities
  55
  56 try:
  57     import http.client as compat_http_client
  58 except ImportError:  # Python 2
  59     import httplib as compat_http_client
  60
  61 try:
  62     from urllib.error import HTTPError as compat_HTTPError
  63 except ImportError:  # Python 2
  64     from urllib2 import HTTPError as compat_HTTPError
  65
  66 try:
  67     from urllib.request import urlretrieve as compat_urlretrieve
  68 except ImportError:  # Python 2
  69     from urllib import urlretrieve as compat_urlretrieve
  70
  71
  72 try:
  73     from subprocess import DEVNULL
  74     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  75 except ImportError:
  76     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  77
  78 try:
  79     import http.server as compat_http_server
  80 except ImportError:
  81     import BaseHTTPServer as compat_http_server
  82
  83 try:
  84     compat_str = unicode  # Python 2
  85 except NameError:
  86     compat_str = str
  87
  88 try:
  89     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
  90     from urllib.parse import unquote as compat_urllib_parse_unquote
  91     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
  92 except ImportError:  # Python 2
  93     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
  94                 else re.compile('([\x00-\x7f]+)'))
  95
  96     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
  97     # implementations from cpython 3.4.3's stdlib. Python 2's version
  98     # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
  99
 100     def compat_urllib_parse_unquote_to_bytes(string):
 101         """unquote_to_bytes('abc%20def') -> b'abc def'."""
 102         # Note: strings are encoded as UTF-8. This is only an issue if it contains
 103         # unescaped non-ASCII characters, which URIs should not.
 104         if not string:
 105             # Is it a string-like object?
 106             string.split
 107             return b''
 108         if isinstance(string, compat_str):
 109             string = string.encode('utf-8')
 110         bits = string.split(b'%')
 111         if len(bits) == 1:
 112             return string
 113         res = [bits[0]]
 114         append = res.append
 115         for item in bits[1:]:
 116             try:
 117                 append(compat_urllib_parse._hextochr[item[:2]])
 118                 append(item[2:])
 119             except KeyError:
 120                 append(b'%')
 121                 append(item)
 122         return b''.join(res)
 123
 124     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 125         """Replace %xx escapes by their single-character equivalent. The optional
 126         encoding and errors parameters specify how to decode percent-encoded
 127         sequences into Unicode characters, as accepted by the bytes.decode()
 128         method.
 129         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 130         sequences are replaced by a placeholder character.
 131
 132         unquote('abc%20def') -> 'abc def'.
 133         """
 134         if '%' not in string:
 135             string.split
 136             return string
 137         if encoding is None:
 138             encoding = 'utf-8'
 139         if errors is None:
 140             errors = 'replace'
 141         bits = _asciire.split(string)
 142         res = [bits[0]]
 143         append = res.append
 144         for i in range(1, len(bits), 2):
 145             append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
 146             append(bits[i + 1])
 147         return ''.join(res)
 148
 149     def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
 150         """Like unquote(), but also replace plus signs by spaces, as required for
 151         unquoting HTML form values.
 152
 153         unquote_plus('%7e/abc+def') -> '~/abc def'
 154         """
 155         string = string.replace('+', ' ')
 156         return compat_urllib_parse_unquote(string, encoding, errors)
 157
 158 try:
 159     compat_basestring = basestring  # Python 2
 160 except NameError:
 161     compat_basestring = str
 162
 163 try:
 164     compat_chr = unichr  # Python 2
 165 except NameError:
 166     compat_chr = chr
 167
 168 try:
 169     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 170 except ImportError:  # Python 2.6
 171     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 172
 173
 174 try:
 175     from urllib.parse import parse_qs as compat_parse_qs
 176 except ImportError:  # Python 2
 177     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 178     # Python 2's version is apparently totally broken
 179
 180     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 181                    encoding='utf-8', errors='replace'):
 182         qs, _coerce_result = qs, compat_str
 183         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 184         r = []
 185         for name_value in pairs:
 186             if not name_value and not strict_parsing:
 187                 continue
 188             nv = name_value.split('=', 1)
 189             if len(nv) != 2:
 190                 if strict_parsing:
 191                     raise ValueError("bad query field: %r" % (name_value,))
 192                 # Handle case of a control-name with no equal sign
 193                 if keep_blank_values:
 194                     nv.append('')
 195                 else:
 196                     continue
 197             if len(nv[1]) or keep_blank_values:
 198                 name = nv[0].replace('+', ' ')
 199                 name = compat_urllib_parse_unquote(
 200                     name, encoding=encoding, errors=errors)
 201                 name = _coerce_result(name)
 202                 value = nv[1].replace('+', ' ')
 203                 value = compat_urllib_parse_unquote(
 204                     value, encoding=encoding, errors=errors)
 205                 value = _coerce_result(value)
 206                 r.append((name, value))
 207         return r
 208
 209     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 210                         encoding='utf-8', errors='replace'):
 211         parsed_result = {}
 212         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 213                            encoding=encoding, errors=errors)
 214         for name, value in pairs:
 215             if name in parsed_result:
 216                 parsed_result[name].append(value)
 217             else:
 218                 parsed_result[name] = [value]
 219         return parsed_result
 220
 221 try:
 222     from shlex import quote as shlex_quote
 223 except ImportError:  # Python < 3.3
 224     def shlex_quote(s):
 225         if re.match(r'^[-_\w./]+$', s):
 226             return s
 227         else:
 228             return "'" + s.replace("'", "'\"'\"'") + "'"
 229
 230
 231 if sys.version_info >= (2, 7, 3):
 232     compat_shlex_split = shlex.split
 233 else:
 234     # Working around shlex issue with unicode strings on some python 2
 235     # versions (see http://bugs.python.org/issue1548891)
 236     def compat_shlex_split(s, comments=False, posix=True):
 237         if isinstance(s, compat_str):
 238             s = s.encode('utf-8')
 239         return shlex.split(s, comments, posix)
 240
 241
 242 def compat_ord(c):
 243     if type(c) is int:
 244         return c
 245     else:
 246         return ord(c)
 247
 248
 249 if sys.version_info >= (3, 0):
 250     compat_getenv = os.getenv
 251     compat_expanduser = os.path.expanduser
 252 else:
 253     # Environment variables should be decoded with filesystem encoding.
 254     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 255
 256     def compat_getenv(key, default=None):
 257         from .utils import get_filesystem_encoding
 258         env = os.getenv(key, default)
 259         if env:
 260             env = env.decode(get_filesystem_encoding())
 261         return env
 262
 263     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 264     # environment variables with filesystem encoding. We will work around this by
 265     # providing adjusted implementations.
 266     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 267     # for different platforms with correct environment variables decoding.
 268
 269     if os.name == 'posix':
 270         def compat_expanduser(path):
 271             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 272             do nothing."""
 273             if not path.startswith('~'):
 274                 return path
 275             i = path.find('/', 1)
 276             if i < 0:
 277                 i = len(path)
 278             if i == 1:
 279                 if 'HOME' not in os.environ:
 280                     import pwd
 281                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 282                 else:
 283                     userhome = compat_getenv('HOME')
 284             else:
 285                 import pwd
 286                 try:
 287                     pwent = pwd.getpwnam(path[1:i])
 288                 except KeyError:
 289                     return path
 290                 userhome = pwent.pw_dir
 291             userhome = userhome.rstrip('/')
 292             return (userhome + path[i:]) or '/'
 293     elif os.name == 'nt' or os.name == 'ce':
 294         def compat_expanduser(path):
 295             """Expand ~ and ~user constructs.
 296
 297             If user or $HOME is unknown, do nothing."""
 298             if path[:1] != '~':
 299                 return path
 300             i, n = 1, len(path)
 301             while i < n and path[i] not in '/\\':
 302                 i = i + 1
 303
 304             if 'HOME' in os.environ:
 305                 userhome = compat_getenv('HOME')
 306             elif 'USERPROFILE' in os.environ:
 307                 userhome = compat_getenv('USERPROFILE')
 308             elif 'HOMEPATH' not in os.environ:
 309                 return path
 310             else:
 311                 try:
 312                     drive = compat_getenv('HOMEDRIVE')
 313                 except KeyError:
 314                     drive = ''
 315                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 316
 317             if i != 1:  # ~user
 318                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 319
 320             return userhome + path[i:]
 321     else:
 322         compat_expanduser = os.path.expanduser
 323
 324
 325 if sys.version_info < (3, 0):
 326     def compat_print(s):
 327         from .utils import preferredencoding
 328         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 329 else:
 330     def compat_print(s):
 331         assert isinstance(s, compat_str)
 332         print(s)
 333
 334
 335 try:
 336     subprocess_check_output = subprocess.check_output
 337 except AttributeError:
 338     def subprocess_check_output(*args, **kwargs):
 339         assert 'input' not in kwargs
 340         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
 341         output, _ = p.communicate()
 342         ret = p.poll()
 343         if ret:
 344             raise subprocess.CalledProcessError(ret, p.args, output=output)
 345         return output
 346
 347 if sys.version_info < (3, 0) and sys.platform == 'win32':
 348     def compat_getpass(prompt, *args, **kwargs):
 349         if isinstance(prompt, compat_str):
 350             from .utils import preferredencoding
 351             prompt = prompt.encode(preferredencoding())
 352         return getpass.getpass(prompt, *args, **kwargs)
 353 else:
 354     compat_getpass = getpass.getpass
 355
 356 # Old 2.6 and 2.7 releases require kwargs to be bytes
 357 try:
 358     def _testfunc(x):
 359         pass
 360     _testfunc(**{'x': 0})
 361 except TypeError:
 362     def compat_kwargs(kwargs):
 363         return dict((bytes(k), v) for k, v in kwargs.items())
 364 else:
 365     compat_kwargs = lambda kwargs: kwargs
 366
 367
 368 if sys.version_info < (2, 7):
 369     def compat_socket_create_connection(address, timeout, source_address=None):
 370         host, port = address
 371         err = None
 372         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 373             af, socktype, proto, canonname, sa = res
 374             sock = None
 375             try:
 376                 sock = socket.socket(af, socktype, proto)
 377                 sock.settimeout(timeout)
 378                 if source_address:
 379                     sock.bind(source_address)
 380                 sock.connect(sa)
 381                 return sock
 382             except socket.error as _:
 383                 err = _
 384                 if sock is not None:
 385                     sock.close()
 386         if err is not None:
 387             raise err
 388         else:
 389             raise socket.error("getaddrinfo returns an empty list")
 390 else:
 391     compat_socket_create_connection = socket.create_connection
 392
 393
 394 # Fix https://github.com/rg3/youtube-dl/issues/4223
 395 # See http://bugs.python.org/issue9161 for what is broken
 396 def workaround_optparse_bug9161():
 397     op = optparse.OptionParser()
 398     og = optparse.OptionGroup(op, 'foo')
 399     try:
 400         og.add_option('-t')
 401     except TypeError:
 402         real_add_option = optparse.OptionGroup.add_option
 403
 404         def _compat_add_option(self, *args, **kwargs):
 405             enc = lambda v: (
 406                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 407                 else v)
 408             bargs = [enc(a) for a in args]
 409             bkwargs = dict(
 410                 (k, enc(v)) for k, v in kwargs.items())
 411             return real_add_option(self, *bargs, **bkwargs)
 412         optparse.OptionGroup.add_option = _compat_add_option
 413
 414 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 415     compat_get_terminal_size = shutil.get_terminal_size
 416 else:
 417     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 418
 419     def compat_get_terminal_size(fallback=(80, 24)):
 420         columns = compat_getenv('COLUMNS', None)
 421         if columns:
 422             columns = int(columns)
 423         else:
 424             columns = None
 425         lines = compat_getenv('LINES', None)
 426         if lines:
 427             lines = int(lines)
 428         else:
 429             lines = None
 430
 431         if columns <= 0 or lines <= 0:
 432             try:
 433                 sp = subprocess.Popen(
 434                     ['stty', 'size'],
 435                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 436                 out, err = sp.communicate()
 437                 _columns, _lines = map(int, out.split())
 438             except Exception:
 439                 _columns, _lines = _terminal_size(*fallback)
 440
 441             if columns <= 0:
 442                 columns = _columns
 443             if lines <= 0:
 444                 lines = _lines
 445         return _terminal_size(columns, lines)
 446
 447 try:
 448     itertools.count(start=0, step=1)
 449     compat_itertools_count = itertools.count
 450 except TypeError:  # Python 2.6
 451     def compat_itertools_count(start=0, step=1):
 452         n = start
 453         while True:
 454             yield n
 455             n += step
 456
 457 if sys.version_info >= (3, 0):
 458     from tokenize import tokenize as compat_tokenize_tokenize
 459 else:
 460     from tokenize import generate_tokens as compat_tokenize_tokenize
 461
 462 __all__ = [
 463     'compat_HTTPError',
 464     'compat_basestring',
 465     'compat_chr',
 466     'compat_cookiejar',
 467     'compat_cookies',
 468     'compat_expanduser',
 469     'compat_get_terminal_size',
 470     'compat_getenv',
 471     'compat_getpass',
 472     'compat_html_entities',
 473     'compat_http_client',
 474     'compat_http_server',
 475     'compat_itertools_count',
 476     'compat_kwargs',
 477     'compat_ord',
 478     'compat_parse_qs',
 479     'compat_print',
 480     'compat_shlex_split',
 481     'compat_socket_create_connection',
 482     'compat_str',
 483     'compat_subprocess_get_DEVNULL',
 484     'compat_tokenize_tokenize',
 485     'compat_urllib_error',
 486     'compat_urllib_parse',
 487     'compat_urllib_parse_unquote',
 488     'compat_urllib_parse_unquote_plus',
 489     'compat_urllib_parse_unquote_to_bytes',
 490     'compat_urllib_parse_urlparse',
 491     'compat_urllib_request',
 492     'compat_urlparse',
 493     'compat_urlretrieve',
 494     'compat_xml_parse_error',
 495     'shlex_quote',
 496     'subprocess_check_output',
 497     'workaround_optparse_bug9161',
 498 ]