youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import binascii
   4 import collections
   5 import email
   6 import getpass
   7 import io
   8 import optparse
   9 import os
  10 import re
  11 import shlex
  12 import shutil
  13 import socket
  14 import struct
  15 import subprocess
  16 import sys
  17 import itertools
  18 import xml.etree.ElementTree
  19
  20
  21 try:
  22     import urllib.request as compat_urllib_request
  23 except ImportError:  # Python 2
  24     import urllib2 as compat_urllib_request
  25
  26 try:
  27     import urllib.error as compat_urllib_error
  28 except ImportError:  # Python 2
  29     import urllib2 as compat_urllib_error
  30
  31 try:
  32     import urllib.parse as compat_urllib_parse
  33 except ImportError:  # Python 2
  34     import urllib as compat_urllib_parse
  35
  36 try:
  37     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  38 except ImportError:  # Python 2
  39     from urlparse import urlparse as compat_urllib_parse_urlparse
  40
  41 try:
  42     import urllib.parse as compat_urlparse
  43 except ImportError:  # Python 2
  44     import urlparse as compat_urlparse
  45
  46 try:
  47     import urllib.response as compat_urllib_response
  48 except ImportError:  # Python 2
  49     import urllib as compat_urllib_response
  50
  51 try:
  52     import http.cookiejar as compat_cookiejar
  53 except ImportError:  # Python 2
  54     import cookielib as compat_cookiejar
  55
  56 try:
  57     import http.cookies as compat_cookies
  58 except ImportError:  # Python 2
  59     import Cookie as compat_cookies
  60
  61 try:
  62     import html.entities as compat_html_entities
  63 except ImportError:  # Python 2
  64     import htmlentitydefs as compat_html_entities
  65
  66 try:
  67     import http.client as compat_http_client
  68 except ImportError:  # Python 2
  69     import httplib as compat_http_client
  70
  71 try:
  72     from urllib.error import HTTPError as compat_HTTPError
  73 except ImportError:  # Python 2
  74     from urllib2 import HTTPError as compat_HTTPError
  75
  76 try:
  77     from urllib.request import urlretrieve as compat_urlretrieve
  78 except ImportError:  # Python 2
  79     from urllib import urlretrieve as compat_urlretrieve
  80
  81 try:
  82     from html.parser import HTMLParser as compat_HTMLParser
  83 except ImportError:  # Python 2
  84     from HTMLParser import HTMLParser as compat_HTMLParser
  85
  86
  87 try:
  88     from subprocess import DEVNULL
  89     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  90 except ImportError:
  91     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  92
  93 try:
  94     import http.server as compat_http_server
  95 except ImportError:
  96     import BaseHTTPServer as compat_http_server
  97
  98 try:
  99     compat_str = unicode  # Python 2
 100 except NameError:
 101     compat_str = str
 102
 103 try:
 104     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
 105     from urllib.parse import unquote as compat_urllib_parse_unquote
 106     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
 107 except ImportError:  # Python 2
 108     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
 109                 else re.compile('([\x00-\x7f]+)'))
 110
 111     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
 112     # implementations from cpython 3.4.3's stdlib. Python 2's version
 113     # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
 114
 115     def compat_urllib_parse_unquote_to_bytes(string):
 116         """unquote_to_bytes('abc%20def') -> b'abc def'."""
 117         # Note: strings are encoded as UTF-8. This is only an issue if it contains
 118         # unescaped non-ASCII characters, which URIs should not.
 119         if not string:
 120             # Is it a string-like object?
 121             string.split
 122             return b''
 123         if isinstance(string, compat_str):
 124             string = string.encode('utf-8')
 125         bits = string.split(b'%')
 126         if len(bits) == 1:
 127             return string
 128         res = [bits[0]]
 129         append = res.append
 130         for item in bits[1:]:
 131             try:
 132                 append(compat_urllib_parse._hextochr[item[:2]])
 133                 append(item[2:])
 134             except KeyError:
 135                 append(b'%')
 136                 append(item)
 137         return b''.join(res)
 138
 139     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 140         """Replace %xx escapes by their single-character equivalent. The optional
 141         encoding and errors parameters specify how to decode percent-encoded
 142         sequences into Unicode characters, as accepted by the bytes.decode()
 143         method.
 144         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 145         sequences are replaced by a placeholder character.
 146
 147         unquote('abc%20def') -> 'abc def'.
 148         """
 149         if '%' not in string:
 150             string.split
 151             return string
 152         if encoding is None:
 153             encoding = 'utf-8'
 154         if errors is None:
 155             errors = 'replace'
 156         bits = _asciire.split(string)
 157         res = [bits[0]]
 158         append = res.append
 159         for i in range(1, len(bits), 2):
 160             append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
 161             append(bits[i + 1])
 162         return ''.join(res)
 163
 164     def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
 165         """Like unquote(), but also replace plus signs by spaces, as required for
 166         unquoting HTML form values.
 167
 168         unquote_plus('%7e/abc+def') -> '~/abc def'
 169         """
 170         string = string.replace('+', ' ')
 171         return compat_urllib_parse_unquote(string, encoding, errors)
 172
 173 try:
 174     from urllib.parse import urlencode as compat_urllib_parse_urlencode
 175 except ImportError:  # Python 2
 176     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
 177     # Possible solutions are to either port it from python 3 with all
 178     # the friends or manually ensure input query contains only byte strings.
 179     # We will stick with latter thus recursively encoding the whole query.
 180     def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
 181         def encode_elem(e):
 182             if isinstance(e, dict):
 183                 e = encode_dict(e)
 184             elif isinstance(e, (list, tuple,)):
 185                 list_e = encode_list(e)
 186                 e = tuple(list_e) if isinstance(e, tuple) else list_e
 187             elif isinstance(e, compat_str):
 188                 e = e.encode(encoding)
 189             return e
 190
 191         def encode_dict(d):
 192             return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
 193
 194         def encode_list(l):
 195             return [encode_elem(e) for e in l]
 196
 197         return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
 198
 199 try:
 200     from urllib.request import DataHandler as compat_urllib_request_DataHandler
 201 except ImportError:  # Python < 3.4
 202     # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
 203     class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
 204         def data_open(self, req):
 205             # data URLs as specified in RFC 2397.
 206             #
 207             # ignores POSTed data
 208             #
 209             # syntax:
 210             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 211             # mediatype := [ type "/" subtype ] *( ";" parameter )
 212             # data      := *urlchar
 213             # parameter := attribute "=" value
 214             url = req.get_full_url()
 215
 216             scheme, data = url.split(':', 1)
 217             mediatype, data = data.split(',', 1)
 218
 219             # even base64 encoded data URLs might be quoted so unquote in any case:
 220             data = compat_urllib_parse_unquote_to_bytes(data)
 221             if mediatype.endswith(';base64'):
 222                 data = binascii.a2b_base64(data)
 223                 mediatype = mediatype[:-7]
 224
 225             if not mediatype:
 226                 mediatype = 'text/plain;charset=US-ASCII'
 227
 228             headers = email.message_from_string(
 229                 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
 230
 231             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 232
 233 try:
 234     compat_basestring = basestring  # Python 2
 235 except NameError:
 236     compat_basestring = str
 237
 238 try:
 239     compat_chr = unichr  # Python 2
 240 except NameError:
 241     compat_chr = chr
 242
 243 try:
 244     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 245 except ImportError:  # Python 2.6
 246     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 247
 248 if sys.version_info[0] >= 3:
 249     compat_etree_fromstring = xml.etree.ElementTree.fromstring
 250 else:
 251     # python 2.x tries to encode unicode strings with ascii (see the
 252     # XMLParser._fixtext method)
 253     etree = xml.etree.ElementTree
 254
 255     try:
 256         _etree_iter = etree.Element.iter
 257     except AttributeError:  # Python <=2.6
 258         def _etree_iter(root):
 259             for el in root.findall('*'):
 260                 yield el
 261                 for sub in _etree_iter(el):
 262                     yield sub
 263
 264     # on 2.6 XML doesn't have a parser argument, function copied from CPython
 265     # 2.7 source
 266     def _XML(text, parser=None):
 267         if not parser:
 268             parser = etree.XMLParser(target=etree.TreeBuilder())
 269         parser.feed(text)
 270         return parser.close()
 271
 272     def _element_factory(*args, **kwargs):
 273         el = etree.Element(*args, **kwargs)
 274         for k, v in el.items():
 275             if isinstance(v, bytes):
 276                 el.set(k, v.decode('utf-8'))
 277         return el
 278
 279     def compat_etree_fromstring(text):
 280         doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
 281         for el in _etree_iter(doc):
 282             if el.text is not None and isinstance(el.text, bytes):
 283                 el.text = el.text.decode('utf-8')
 284         return doc
 285
 286 if sys.version_info < (2, 7):
 287     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 288     # .//node does not match if a node is a direct child of . !
 289     def compat_xpath(xpath):
 290         if isinstance(xpath, compat_str):
 291             xpath = xpath.encode('ascii')
 292         return xpath
 293 else:
 294     compat_xpath = lambda xpath: xpath
 295
 296 try:
 297     from urllib.parse import parse_qs as compat_parse_qs
 298 except ImportError:  # Python 2
 299     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 300     # Python 2's version is apparently totally broken
 301
 302     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 303                    encoding='utf-8', errors='replace'):
 304         qs, _coerce_result = qs, compat_str
 305         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 306         r = []
 307         for name_value in pairs:
 308             if not name_value and not strict_parsing:
 309                 continue
 310             nv = name_value.split('=', 1)
 311             if len(nv) != 2:
 312                 if strict_parsing:
 313                     raise ValueError('bad query field: %r' % (name_value,))
 314                 # Handle case of a control-name with no equal sign
 315                 if keep_blank_values:
 316                     nv.append('')
 317                 else:
 318                     continue
 319             if len(nv[1]) or keep_blank_values:
 320                 name = nv[0].replace('+', ' ')
 321                 name = compat_urllib_parse_unquote(
 322                     name, encoding=encoding, errors=errors)
 323                 name = _coerce_result(name)
 324                 value = nv[1].replace('+', ' ')
 325                 value = compat_urllib_parse_unquote(
 326                     value, encoding=encoding, errors=errors)
 327                 value = _coerce_result(value)
 328                 r.append((name, value))
 329         return r
 330
 331     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 332                         encoding='utf-8', errors='replace'):
 333         parsed_result = {}
 334         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 335                            encoding=encoding, errors=errors)
 336         for name, value in pairs:
 337             if name in parsed_result:
 338                 parsed_result[name].append(value)
 339             else:
 340                 parsed_result[name] = [value]
 341         return parsed_result
 342
 343 try:
 344     from shlex import quote as compat_shlex_quote
 345 except ImportError:  # Python < 3.3
 346     def compat_shlex_quote(s):
 347         if re.match(r'^[-_\w./]+$', s):
 348             return s
 349         else:
 350             return "'" + s.replace("'", "'\"'\"'") + "'"
 351
 352
 353 if sys.version_info >= (2, 7, 3):
 354     compat_shlex_split = shlex.split
 355 else:
 356     # Working around shlex issue with unicode strings on some python 2
 357     # versions (see http://bugs.python.org/issue1548891)
 358     def compat_shlex_split(s, comments=False, posix=True):
 359         if isinstance(s, compat_str):
 360             s = s.encode('utf-8')
 361         return shlex.split(s, comments, posix)
 362
 363
 364 def compat_ord(c):
 365     if type(c) is int:
 366         return c
 367     else:
 368         return ord(c)
 369
 370
 371 compat_os_name = os._name if os.name == 'java' else os.name
 372
 373
 374 if sys.version_info >= (3, 0):
 375     compat_getenv = os.getenv
 376     compat_expanduser = os.path.expanduser
 377
 378     def compat_setenv(key, value, env=os.environ):
 379         env[key] = value
 380 else:
 381     # Environment variables should be decoded with filesystem encoding.
 382     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 383
 384     def compat_getenv(key, default=None):
 385         from .utils import get_filesystem_encoding
 386         env = os.getenv(key, default)
 387         if env:
 388             env = env.decode(get_filesystem_encoding())
 389         return env
 390
 391     def compat_setenv(key, value, env=os.environ):
 392         def encode(v):
 393             from .utils import get_filesystem_encoding
 394             return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
 395         env[encode(key)] = encode(value)
 396
 397     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 398     # environment variables with filesystem encoding. We will work around this by
 399     # providing adjusted implementations.
 400     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 401     # for different platforms with correct environment variables decoding.
 402
 403     if compat_os_name == 'posix':
 404         def compat_expanduser(path):
 405             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 406             do nothing."""
 407             if not path.startswith('~'):
 408                 return path
 409             i = path.find('/', 1)
 410             if i < 0:
 411                 i = len(path)
 412             if i == 1:
 413                 if 'HOME' not in os.environ:
 414                     import pwd
 415                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 416                 else:
 417                     userhome = compat_getenv('HOME')
 418             else:
 419                 import pwd
 420                 try:
 421                     pwent = pwd.getpwnam(path[1:i])
 422                 except KeyError:
 423                     return path
 424                 userhome = pwent.pw_dir
 425             userhome = userhome.rstrip('/')
 426             return (userhome + path[i:]) or '/'
 427     elif compat_os_name == 'nt' or compat_os_name == 'ce':
 428         def compat_expanduser(path):
 429             """Expand ~ and ~user constructs.
 430
 431             If user or $HOME is unknown, do nothing."""
 432             if path[:1] != '~':
 433                 return path
 434             i, n = 1, len(path)
 435             while i < n and path[i] not in '/\\':
 436                 i = i + 1
 437
 438             if 'HOME' in os.environ:
 439                 userhome = compat_getenv('HOME')
 440             elif 'USERPROFILE' in os.environ:
 441                 userhome = compat_getenv('USERPROFILE')
 442             elif 'HOMEPATH' not in os.environ:
 443                 return path
 444             else:
 445                 try:
 446                     drive = compat_getenv('HOMEDRIVE')
 447                 except KeyError:
 448                     drive = ''
 449                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 450
 451             if i != 1:  # ~user
 452                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 453
 454             return userhome + path[i:]
 455     else:
 456         compat_expanduser = os.path.expanduser
 457
 458
 459 if sys.version_info < (3, 0):
 460     def compat_print(s):
 461         from .utils import preferredencoding
 462         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 463 else:
 464     def compat_print(s):
 465         assert isinstance(s, compat_str)
 466         print(s)
 467
 468
 469 if sys.version_info < (3, 0) and sys.platform == 'win32':
 470     def compat_getpass(prompt, *args, **kwargs):
 471         if isinstance(prompt, compat_str):
 472             from .utils import preferredencoding
 473             prompt = prompt.encode(preferredencoding())
 474         return getpass.getpass(prompt, *args, **kwargs)
 475 else:
 476     compat_getpass = getpass.getpass
 477
 478 # Python < 2.6.5 require kwargs to be bytes
 479 try:
 480     def _testfunc(x):
 481         pass
 482     _testfunc(**{'x': 0})
 483 except TypeError:
 484     def compat_kwargs(kwargs):
 485         return dict((bytes(k), v) for k, v in kwargs.items())
 486 else:
 487     compat_kwargs = lambda kwargs: kwargs
 488
 489
 490 if sys.version_info < (2, 7):
 491     def compat_socket_create_connection(address, timeout, source_address=None):
 492         host, port = address
 493         err = None
 494         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 495             af, socktype, proto, canonname, sa = res
 496             sock = None
 497             try:
 498                 sock = socket.socket(af, socktype, proto)
 499                 sock.settimeout(timeout)
 500                 if source_address:
 501                     sock.bind(source_address)
 502                 sock.connect(sa)
 503                 return sock
 504             except socket.error as _:
 505                 err = _
 506                 if sock is not None:
 507                     sock.close()
 508         if err is not None:
 509             raise err
 510         else:
 511             raise socket.error('getaddrinfo returns an empty list')
 512 else:
 513     compat_socket_create_connection = socket.create_connection
 514
 515
 516 # Fix https://github.com/rg3/youtube-dl/issues/4223
 517 # See http://bugs.python.org/issue9161 for what is broken
 518 def workaround_optparse_bug9161():
 519     op = optparse.OptionParser()
 520     og = optparse.OptionGroup(op, 'foo')
 521     try:
 522         og.add_option('-t')
 523     except TypeError:
 524         real_add_option = optparse.OptionGroup.add_option
 525
 526         def _compat_add_option(self, *args, **kwargs):
 527             enc = lambda v: (
 528                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 529                 else v)
 530             bargs = [enc(a) for a in args]
 531             bkwargs = dict(
 532                 (k, enc(v)) for k, v in kwargs.items())
 533             return real_add_option(self, *bargs, **bkwargs)
 534         optparse.OptionGroup.add_option = _compat_add_option
 535
 536 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 537     compat_get_terminal_size = shutil.get_terminal_size
 538 else:
 539     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 540
 541     def compat_get_terminal_size(fallback=(80, 24)):
 542         columns = compat_getenv('COLUMNS')
 543         if columns:
 544             columns = int(columns)
 545         else:
 546             columns = None
 547         lines = compat_getenv('LINES')
 548         if lines:
 549             lines = int(lines)
 550         else:
 551             lines = None
 552
 553         if columns is None or lines is None or columns <= 0 or lines <= 0:
 554             try:
 555                 sp = subprocess.Popen(
 556                     ['stty', 'size'],
 557                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 558                 out, err = sp.communicate()
 559                 _lines, _columns = map(int, out.split())
 560             except Exception:
 561                 _columns, _lines = _terminal_size(*fallback)
 562
 563             if columns is None or columns <= 0:
 564                 columns = _columns
 565             if lines is None or lines <= 0:
 566                 lines = _lines
 567         return _terminal_size(columns, lines)
 568
 569 try:
 570     itertools.count(start=0, step=1)
 571     compat_itertools_count = itertools.count
 572 except TypeError:  # Python 2.6
 573     def compat_itertools_count(start=0, step=1):
 574         n = start
 575         while True:
 576             yield n
 577             n += step
 578
 579 if sys.version_info >= (3, 0):
 580     from tokenize import tokenize as compat_tokenize_tokenize
 581 else:
 582     from tokenize import generate_tokens as compat_tokenize_tokenize
 583
 584
 585 try:
 586     struct.pack('!I', 0)
 587 except TypeError:
 588     # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
 589     # See https://bugs.python.org/issue19099
 590     def compat_struct_pack(spec, *args):
 591         if isinstance(spec, compat_str):
 592             spec = spec.encode('ascii')
 593         return struct.pack(spec, *args)
 594
 595     def compat_struct_unpack(spec, *args):
 596         if isinstance(spec, compat_str):
 597             spec = spec.encode('ascii')
 598         return struct.unpack(spec, *args)
 599 else:
 600     compat_struct_pack = struct.pack
 601     compat_struct_unpack = struct.unpack
 602
 603
 604 __all__ = [
 605     'compat_HTMLParser',
 606     'compat_HTTPError',
 607     'compat_basestring',
 608     'compat_chr',
 609     'compat_cookiejar',
 610     'compat_cookies',
 611     'compat_etree_fromstring',
 612     'compat_expanduser',
 613     'compat_get_terminal_size',
 614     'compat_getenv',
 615     'compat_getpass',
 616     'compat_html_entities',
 617     'compat_http_client',
 618     'compat_http_server',
 619     'compat_itertools_count',
 620     'compat_kwargs',
 621     'compat_ord',
 622     'compat_os_name',
 623     'compat_parse_qs',
 624     'compat_print',
 625     'compat_setenv',
 626     'compat_shlex_quote',
 627     'compat_shlex_split',
 628     'compat_socket_create_connection',
 629     'compat_str',
 630     'compat_struct_pack',
 631     'compat_struct_unpack',
 632     'compat_subprocess_get_DEVNULL',
 633     'compat_tokenize_tokenize',
 634     'compat_urllib_error',
 635     'compat_urllib_parse',
 636     'compat_urllib_parse_unquote',
 637     'compat_urllib_parse_unquote_plus',
 638     'compat_urllib_parse_unquote_to_bytes',
 639     'compat_urllib_parse_urlencode',
 640     'compat_urllib_parse_urlparse',
 641     'compat_urllib_request',
 642     'compat_urllib_request_DataHandler',
 643     'compat_urllib_response',
 644     'compat_urlparse',
 645     'compat_urlretrieve',
 646     'compat_xml_parse_error',
 647     'compat_xpath',
 648     'workaround_optparse_bug9161',
 649 ]