]> jfr.im git - yt-dlp.git/blame - youtube_dl/compat.py
Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)
[yt-dlp.git] / youtube_dl / compat.py
CommitLineData
451948b2
PH
1from __future__ import unicode_literals
2
0a67a363 3import binascii
003c69a8 4import collections
0a67a363 5import email
8c25f81b 6import getpass
0a67a363 7import io
e07e9313 8import optparse
8c25f81b 9import os
7d4111ed 10import re
51f579b6 11import shlex
003c69a8 12import shutil
be4a824d 13import socket
8c25f81b
PH
14import subprocess
15import sys
a0e060ac 16import itertools
36e6f62c 17import xml.etree.ElementTree
8c25f81b
PH
18
19
20try:
21 import urllib.request as compat_urllib_request
5f6a1245 22except ImportError: # Python 2
8c25f81b
PH
23 import urllib2 as compat_urllib_request
24
25try:
26 import urllib.error as compat_urllib_error
5f6a1245 27except ImportError: # Python 2
8c25f81b
PH
28 import urllib2 as compat_urllib_error
29
30try:
31 import urllib.parse as compat_urllib_parse
5f6a1245 32except ImportError: # Python 2
8c25f81b
PH
33 import urllib as compat_urllib_parse
34
35try:
36 from urllib.parse import urlparse as compat_urllib_parse_urlparse
5f6a1245 37except ImportError: # Python 2
8c25f81b
PH
38 from urlparse import urlparse as compat_urllib_parse_urlparse
39
40try:
41 import urllib.parse as compat_urlparse
5f6a1245 42except ImportError: # Python 2
8c25f81b
PH
43 import urlparse as compat_urlparse
44
0a67a363
YCH
45try:
46 import urllib.response as compat_urllib_response
47except ImportError: # Python 2
48 import urllib as compat_urllib_response
49
8c25f81b
PH
50try:
51 import http.cookiejar as compat_cookiejar
5f6a1245 52except ImportError: # Python 2
8c25f81b
PH
53 import cookielib as compat_cookiejar
54
799207e8 55try:
56 import http.cookies as compat_cookies
57except ImportError: # Python 2
58 import Cookie as compat_cookies
59
8c25f81b
PH
60try:
61 import html.entities as compat_html_entities
5f6a1245 62except ImportError: # Python 2
8c25f81b
PH
63 import htmlentitydefs as compat_html_entities
64
8c25f81b
PH
65try:
66 import http.client as compat_http_client
5f6a1245 67except ImportError: # Python 2
8c25f81b
PH
68 import httplib as compat_http_client
69
70try:
71 from urllib.error import HTTPError as compat_HTTPError
72except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
74
75try:
76 from urllib.request import urlretrieve as compat_urlretrieve
77except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
79
80
81try:
82 from subprocess import DEVNULL
83 compat_subprocess_get_DEVNULL = lambda: DEVNULL
84except ImportError:
85 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
86
83fda3c0
PH
87try:
88 import http.server as compat_http_server
89except ImportError:
90 import BaseHTTPServer as compat_http_server
91
953fed28
PH
92try:
93 compat_str = unicode # Python 2
94except NameError:
95 compat_str = str
96
8c25f81b 97try:
55139679 98 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
8c25f81b 99 from urllib.parse import unquote as compat_urllib_parse_unquote
aa99aa4e 100 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
55139679 101except ImportError: # Python 2
22603348
S
102 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
103 else re.compile('([\x00-\x7f]+)'))
3cc8b4c3 104
4d08161a 105 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
55139679
S
106 # implementations from cpython 3.4.3's stdlib. Python 2's version
107 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
108
c9c854ce 109 def compat_urllib_parse_unquote_to_bytes(string):
110 """unquote_to_bytes('abc%20def') -> b'abc def'."""
111 # Note: strings are encoded as UTF-8. This is only an issue if it contains
112 # unescaped non-ASCII characters, which URIs should not.
113 if not string:
114 # Is it a string-like object?
115 string.split
116 return b''
953fed28 117 if isinstance(string, compat_str):
c9c854ce 118 string = string.encode('utf-8')
55139679 119 bits = string.split(b'%')
c9c854ce 120 if len(bits) == 1:
121 return string
122 res = [bits[0]]
123 append = res.append
c9c854ce 124 for item in bits[1:]:
125 try:
55139679 126 append(compat_urllib_parse._hextochr[item[:2]])
c9c854ce 127 append(item[2:])
55139679 128 except KeyError:
c9c854ce 129 append(b'%')
130 append(item)
131 return b''.join(res)
132
a0f28f90 133 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
c9c854ce 134 """Replace %xx escapes by their single-character equivalent. The optional
135 encoding and errors parameters specify how to decode percent-encoded
136 sequences into Unicode characters, as accepted by the bytes.decode()
137 method.
138 By default, percent-encoded sequences are decoded with UTF-8, and invalid
139 sequences are replaced by a placeholder character.
140
141 unquote('abc%20def') -> 'abc def'.
142 """
c9c854ce 143 if '%' not in string:
144 string.split
145 return string
146 if encoding is None:
147 encoding = 'utf-8'
148 if errors is None:
149 errors = 'replace'
3cc8b4c3 150 bits = _asciire.split(string)
c9c854ce 151 res = [bits[0]]
152 append = res.append
153 for i in range(1, len(bits), 2):
55139679
S
154 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
155 append(bits[i + 1])
c9c854ce 156 return ''.join(res)
157
aa99aa4e
S
158 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
159 """Like unquote(), but also replace plus signs by spaces, as required for
160 unquoting HTML form values.
161
162 unquote_plus('%7e/abc+def') -> '~/abc def'
163 """
164 string = string.replace('+', ' ')
165 return compat_urllib_parse_unquote(string, encoding, errors)
166
0a67a363
YCH
167try:
168 from urllib.request import DataHandler as compat_urllib_request_DataHandler
169except ImportError: # Python < 3.4
170 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
171 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
172 def data_open(self, req):
173 # data URLs as specified in RFC 2397.
174 #
175 # ignores POSTed data
176 #
177 # syntax:
178 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
179 # mediatype := [ type "/" subtype ] *( ";" parameter )
180 # data := *urlchar
181 # parameter := attribute "=" value
182 url = req.get_full_url()
183
184 scheme, data = url.split(":", 1)
185 mediatype, data = data.split(",", 1)
186
187 # even base64 encoded data URLs might be quoted so unquote in any case:
188 data = compat_urllib_parse_unquote_to_bytes(data)
189 if mediatype.endswith(";base64"):
190 data = binascii.a2b_base64(data)
191 mediatype = mediatype[:-7]
192
193 if not mediatype:
194 mediatype = "text/plain;charset=US-ASCII"
195
196 headers = email.message_from_string(
197 "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
198
199 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
200
8f9312c3 201try:
0196149c 202 compat_basestring = basestring # Python 2
8f9312c3 203except NameError:
0196149c 204 compat_basestring = str
8f9312c3
PH
205
206try:
207 compat_chr = unichr # Python 2
208except NameError:
209 compat_chr = chr
210
211try:
212 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
213except ImportError: # Python 2.6
214 from xml.parsers.expat import ExpatError as compat_xml_parse_error
215
36e6f62c
JMF
216if sys.version_info[0] >= 3:
217 compat_etree_fromstring = xml.etree.ElementTree.fromstring
218else:
219 # on python 2.x the the attributes of a node are str objects instead of
220 # unicode
221 etree = xml.etree.ElementTree
222
223 # on 2.6 XML doesn't have a parser argument, function copied from CPython
224 # 2.7 source
225 def _XML(text, parser=None):
226 if not parser:
227 parser = etree.XMLParser(target=etree.TreeBuilder())
228 parser.feed(text)
229 return parser.close()
230
231 def _element_factory(*args, **kwargs):
232 el = etree.Element(*args, **kwargs)
233 for k, v in el.items():
234 el.set(k, v.decode('utf-8'))
235 return el
236
237 def compat_etree_fromstring(text):
238 return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
8c25f81b
PH
239
240try:
241 from urllib.parse import parse_qs as compat_parse_qs
5f6a1245 242except ImportError: # Python 2
8c25f81b
PH
243 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
244 # Python 2's version is apparently totally broken
245
246 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 247 encoding='utf-8', errors='replace'):
8f9312c3 248 qs, _coerce_result = qs, compat_str
8c25f81b
PH
249 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
250 r = []
251 for name_value in pairs:
252 if not name_value and not strict_parsing:
253 continue
254 nv = name_value.split('=', 1)
255 if len(nv) != 2:
256 if strict_parsing:
257 raise ValueError("bad query field: %r" % (name_value,))
258 # Handle case of a control-name with no equal sign
259 if keep_blank_values:
260 nv.append('')
261 else:
262 continue
263 if len(nv[1]) or keep_blank_values:
264 name = nv[0].replace('+', ' ')
265 name = compat_urllib_parse_unquote(
266 name, encoding=encoding, errors=errors)
267 name = _coerce_result(name)
268 value = nv[1].replace('+', ' ')
269 value = compat_urllib_parse_unquote(
270 value, encoding=encoding, errors=errors)
271 value = _coerce_result(value)
272 r.append((name, value))
273 return r
274
275 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 276 encoding='utf-8', errors='replace'):
8c25f81b
PH
277 parsed_result = {}
278 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
9e1a5b84 279 encoding=encoding, errors=errors)
8c25f81b
PH
280 for name, value in pairs:
281 if name in parsed_result:
282 parsed_result[name].append(value)
283 else:
284 parsed_result[name] = [value]
285 return parsed_result
286
8c25f81b
PH
287try:
288 from shlex import quote as shlex_quote
289except ImportError: # Python < 3.3
290 def shlex_quote(s):
7d4111ed
PH
291 if re.match(r'^[-_\w./]+$', s):
292 return s
293 else:
294 return "'" + s.replace("'", "'\"'\"'") + "'"
8c25f81b
PH
295
296
8df5ae15 297if sys.version_info >= (2, 7, 3):
51f579b6
S
298 compat_shlex_split = shlex.split
299else:
300 # Working around shlex issue with unicode strings on some python 2
301 # versions (see http://bugs.python.org/issue1548891)
302 def compat_shlex_split(s, comments=False, posix=True):
953fed28 303 if isinstance(s, compat_str):
51f579b6
S
304 s = s.encode('utf-8')
305 return shlex.split(s, comments, posix)
306
307
8c25f81b 308def compat_ord(c):
5f6a1245
JW
309 if type(c) is int:
310 return c
311 else:
312 return ord(c)
8c25f81b
PH
313
314
315if sys.version_info >= (3, 0):
316 compat_getenv = os.getenv
317 compat_expanduser = os.path.expanduser
318else:
319 # Environment variables should be decoded with filesystem encoding.
320 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
321
322 def compat_getenv(key, default=None):
323 from .utils import get_filesystem_encoding
324 env = os.getenv(key, default)
325 if env:
326 env = env.decode(get_filesystem_encoding())
327 return env
328
329 # HACK: The default implementations of os.path.expanduser from cpython do not decode
330 # environment variables with filesystem encoding. We will work around this by
331 # providing adjusted implementations.
332 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
333 # for different platforms with correct environment variables decoding.
334
335 if os.name == 'posix':
336 def compat_expanduser(path):
337 """Expand ~ and ~user constructions. If user or $HOME is unknown,
338 do nothing."""
339 if not path.startswith('~'):
340 return path
341 i = path.find('/', 1)
342 if i < 0:
343 i = len(path)
344 if i == 1:
345 if 'HOME' not in os.environ:
346 import pwd
347 userhome = pwd.getpwuid(os.getuid()).pw_dir
348 else:
349 userhome = compat_getenv('HOME')
350 else:
351 import pwd
352 try:
353 pwent = pwd.getpwnam(path[1:i])
354 except KeyError:
355 return path
356 userhome = pwent.pw_dir
357 userhome = userhome.rstrip('/')
358 return (userhome + path[i:]) or '/'
359 elif os.name == 'nt' or os.name == 'ce':
360 def compat_expanduser(path):
361 """Expand ~ and ~user constructs.
362
363 If user or $HOME is unknown, do nothing."""
364 if path[:1] != '~':
365 return path
366 i, n = 1, len(path)
367 while i < n and path[i] not in '/\\':
368 i = i + 1
369
370 if 'HOME' in os.environ:
371 userhome = compat_getenv('HOME')
372 elif 'USERPROFILE' in os.environ:
373 userhome = compat_getenv('USERPROFILE')
83e865a3 374 elif 'HOMEPATH' not in os.environ:
8c25f81b
PH
375 return path
376 else:
377 try:
378 drive = compat_getenv('HOMEDRIVE')
379 except KeyError:
380 drive = ''
381 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
382
5f6a1245 383 if i != 1: # ~user
8c25f81b
PH
384 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
385
386 return userhome + path[i:]
387 else:
388 compat_expanduser = os.path.expanduser
389
390
391if sys.version_info < (3, 0):
392 def compat_print(s):
393 from .utils import preferredencoding
394 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
395else:
396 def compat_print(s):
b061ea6e 397 assert isinstance(s, compat_str)
8c25f81b
PH
398 print(s)
399
400
401try:
402 subprocess_check_output = subprocess.check_output
403except AttributeError:
404 def subprocess_check_output(*args, **kwargs):
405 assert 'input' not in kwargs
406 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
407 output, _ = p.communicate()
408 ret = p.poll()
409 if ret:
410 raise subprocess.CalledProcessError(ret, p.args, output=output)
411 return output
412
413if sys.version_info < (3, 0) and sys.platform == 'win32':
414 def compat_getpass(prompt, *args, **kwargs):
415 if isinstance(prompt, compat_str):
baa70803 416 from .utils import preferredencoding
8c25f81b
PH
417 prompt = prompt.encode(preferredencoding())
418 return getpass.getpass(prompt, *args, **kwargs)
419else:
420 compat_getpass = getpass.getpass
421
c7b0add8
PH
422# Old 2.6 and 2.7 releases require kwargs to be bytes
423try:
c6973bd4
PH
424 def _testfunc(x):
425 pass
426 _testfunc(**{'x': 0})
c7b0add8
PH
427except TypeError:
428 def compat_kwargs(kwargs):
429 return dict((bytes(k), v) for k, v in kwargs.items())
430else:
431 compat_kwargs = lambda kwargs: kwargs
8c25f81b 432
e07e9313 433
be4a824d
PH
434if sys.version_info < (2, 7):
435 def compat_socket_create_connection(address, timeout, source_address=None):
436 host, port = address
437 err = None
438 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
439 af, socktype, proto, canonname, sa = res
440 sock = None
441 try:
442 sock = socket.socket(af, socktype, proto)
443 sock.settimeout(timeout)
444 if source_address:
445 sock.bind(source_address)
446 sock.connect(sa)
447 return sock
448 except socket.error as _:
449 err = _
450 if sock is not None:
451 sock.close()
452 if err is not None:
453 raise err
454 else:
8ad6b5ed 455 raise socket.error("getaddrinfo returns an empty list")
be4a824d
PH
456else:
457 compat_socket_create_connection = socket.create_connection
458
459
e07e9313
PH
460# Fix https://github.com/rg3/youtube-dl/issues/4223
461# See http://bugs.python.org/issue9161 for what is broken
462def workaround_optparse_bug9161():
07e378fa
PH
463 op = optparse.OptionParser()
464 og = optparse.OptionGroup(op, 'foo')
e07e9313 465 try:
07e378fa 466 og.add_option('-t')
b244b5c3 467 except TypeError:
e07e9313
PH
468 real_add_option = optparse.OptionGroup.add_option
469
470 def _compat_add_option(self, *args, **kwargs):
471 enc = lambda v: (
472 v.encode('ascii', 'replace') if isinstance(v, compat_str)
473 else v)
474 bargs = [enc(a) for a in args]
475 bkwargs = dict(
476 (k, enc(v)) for k, v in kwargs.items())
477 return real_add_option(self, *bargs, **bkwargs)
478 optparse.OptionGroup.add_option = _compat_add_option
479
003c69a8
JMF
480if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
481 compat_get_terminal_size = shutil.get_terminal_size
482else:
483 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
484
13118a50 485 def compat_get_terminal_size(fallback=(80, 24)):
4810c48d 486 columns = compat_getenv('COLUMNS')
003c69a8
JMF
487 if columns:
488 columns = int(columns)
489 else:
490 columns = None
4810c48d 491 lines = compat_getenv('LINES')
003c69a8
JMF
492 if lines:
493 lines = int(lines)
494 else:
495 lines = None
496
4810c48d 497 if columns is None or lines is None or columns <= 0 or lines <= 0:
13118a50
YCH
498 try:
499 sp = subprocess.Popen(
500 ['stty', 'size'],
501 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
502 out, err = sp.communicate()
f2dbc540 503 _lines, _columns = map(int, out.split())
13118a50
YCH
504 except Exception:
505 _columns, _lines = _terminal_size(*fallback)
506
4810c48d 507 if columns is None or columns <= 0:
13118a50 508 columns = _columns
4810c48d 509 if lines is None or lines <= 0:
13118a50 510 lines = _lines
003c69a8
JMF
511 return _terminal_size(columns, lines)
512
a0e060ac
YCH
513try:
514 itertools.count(start=0, step=1)
515 compat_itertools_count = itertools.count
516except TypeError: # Python 2.6
517 def compat_itertools_count(start=0, step=1):
518 n = start
519 while True:
520 yield n
521 n += step
e07e9313 522
67134eab
JMF
523if sys.version_info >= (3, 0):
524 from tokenize import tokenize as compat_tokenize_tokenize
525else:
526 from tokenize import generate_tokens as compat_tokenize_tokenize
e07e9313 527
8c25f81b
PH
528__all__ = [
529 'compat_HTTPError',
0196149c 530 'compat_basestring',
8c25f81b
PH
531 'compat_chr',
532 'compat_cookiejar',
799207e8 533 'compat_cookies',
36e6f62c 534 'compat_etree_fromstring',
8c25f81b 535 'compat_expanduser',
003c69a8 536 'compat_get_terminal_size',
8c25f81b
PH
537 'compat_getenv',
538 'compat_getpass',
539 'compat_html_entities',
8c25f81b 540 'compat_http_client',
83fda3c0 541 'compat_http_server',
a0e060ac 542 'compat_itertools_count',
c7b0add8 543 'compat_kwargs',
8c25f81b
PH
544 'compat_ord',
545 'compat_parse_qs',
546 'compat_print',
51f579b6 547 'compat_shlex_split',
be4a824d 548 'compat_socket_create_connection',
987493ae 549 'compat_str',
8c25f81b 550 'compat_subprocess_get_DEVNULL',
67134eab 551 'compat_tokenize_tokenize',
8c25f81b
PH
552 'compat_urllib_error',
553 'compat_urllib_parse',
554 'compat_urllib_parse_unquote',
aa99aa4e 555 'compat_urllib_parse_unquote_plus',
9fefc886 556 'compat_urllib_parse_unquote_to_bytes',
8c25f81b
PH
557 'compat_urllib_parse_urlparse',
558 'compat_urllib_request',
0a67a363
YCH
559 'compat_urllib_request_DataHandler',
560 'compat_urllib_response',
8c25f81b
PH
561 'compat_urlparse',
562 'compat_urlretrieve',
563 'compat_xml_parse_error',
564 'shlex_quote',
565 'subprocess_check_output',
e07e9313 566 'workaround_optparse_bug9161',
8c25f81b 567]