]> jfr.im git - yt-dlp.git/blame - youtube_dl/compat.py
[compat] compat_etree_fromstring: also decode the text attribute
[yt-dlp.git] / youtube_dl / compat.py
CommitLineData
451948b2
PH
1from __future__ import unicode_literals
2
0a67a363 3import binascii
003c69a8 4import collections
0a67a363 5import email
8c25f81b 6import getpass
0a67a363 7import io
e07e9313 8import optparse
8c25f81b 9import os
7d4111ed 10import re
51f579b6 11import shlex
003c69a8 12import shutil
be4a824d 13import socket
8c25f81b
PH
14import subprocess
15import sys
a0e060ac 16import itertools
36e6f62c 17import xml.etree.ElementTree
8c25f81b
PH
18
19
20try:
21 import urllib.request as compat_urllib_request
5f6a1245 22except ImportError: # Python 2
8c25f81b
PH
23 import urllib2 as compat_urllib_request
24
25try:
26 import urllib.error as compat_urllib_error
5f6a1245 27except ImportError: # Python 2
8c25f81b
PH
28 import urllib2 as compat_urllib_error
29
30try:
31 import urllib.parse as compat_urllib_parse
5f6a1245 32except ImportError: # Python 2
8c25f81b
PH
33 import urllib as compat_urllib_parse
34
35try:
36 from urllib.parse import urlparse as compat_urllib_parse_urlparse
5f6a1245 37except ImportError: # Python 2
8c25f81b
PH
38 from urlparse import urlparse as compat_urllib_parse_urlparse
39
40try:
41 import urllib.parse as compat_urlparse
5f6a1245 42except ImportError: # Python 2
8c25f81b
PH
43 import urlparse as compat_urlparse
44
0a67a363
YCH
45try:
46 import urllib.response as compat_urllib_response
47except ImportError: # Python 2
48 import urllib as compat_urllib_response
49
8c25f81b
PH
50try:
51 import http.cookiejar as compat_cookiejar
5f6a1245 52except ImportError: # Python 2
8c25f81b
PH
53 import cookielib as compat_cookiejar
54
799207e8 55try:
56 import http.cookies as compat_cookies
57except ImportError: # Python 2
58 import Cookie as compat_cookies
59
8c25f81b
PH
60try:
61 import html.entities as compat_html_entities
5f6a1245 62except ImportError: # Python 2
8c25f81b
PH
63 import htmlentitydefs as compat_html_entities
64
8c25f81b
PH
65try:
66 import http.client as compat_http_client
5f6a1245 67except ImportError: # Python 2
8c25f81b
PH
68 import httplib as compat_http_client
69
70try:
71 from urllib.error import HTTPError as compat_HTTPError
72except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
74
75try:
76 from urllib.request import urlretrieve as compat_urlretrieve
77except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
79
80
81try:
82 from subprocess import DEVNULL
83 compat_subprocess_get_DEVNULL = lambda: DEVNULL
84except ImportError:
85 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
86
83fda3c0
PH
87try:
88 import http.server as compat_http_server
89except ImportError:
90 import BaseHTTPServer as compat_http_server
91
953fed28
PH
92try:
93 compat_str = unicode # Python 2
94except NameError:
95 compat_str = str
96
8c25f81b 97try:
55139679 98 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
8c25f81b 99 from urllib.parse import unquote as compat_urllib_parse_unquote
aa99aa4e 100 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
55139679 101except ImportError: # Python 2
22603348
S
102 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
103 else re.compile('([\x00-\x7f]+)'))
3cc8b4c3 104
4d08161a 105 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
55139679
S
106 # implementations from cpython 3.4.3's stdlib. Python 2's version
107 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
108
c9c854ce 109 def compat_urllib_parse_unquote_to_bytes(string):
110 """unquote_to_bytes('abc%20def') -> b'abc def'."""
111 # Note: strings are encoded as UTF-8. This is only an issue if it contains
112 # unescaped non-ASCII characters, which URIs should not.
113 if not string:
114 # Is it a string-like object?
115 string.split
116 return b''
953fed28 117 if isinstance(string, compat_str):
c9c854ce 118 string = string.encode('utf-8')
55139679 119 bits = string.split(b'%')
c9c854ce 120 if len(bits) == 1:
121 return string
122 res = [bits[0]]
123 append = res.append
c9c854ce 124 for item in bits[1:]:
125 try:
55139679 126 append(compat_urllib_parse._hextochr[item[:2]])
c9c854ce 127 append(item[2:])
55139679 128 except KeyError:
c9c854ce 129 append(b'%')
130 append(item)
131 return b''.join(res)
132
a0f28f90 133 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
c9c854ce 134 """Replace %xx escapes by their single-character equivalent. The optional
135 encoding and errors parameters specify how to decode percent-encoded
136 sequences into Unicode characters, as accepted by the bytes.decode()
137 method.
138 By default, percent-encoded sequences are decoded with UTF-8, and invalid
139 sequences are replaced by a placeholder character.
140
141 unquote('abc%20def') -> 'abc def'.
142 """
c9c854ce 143 if '%' not in string:
144 string.split
145 return string
146 if encoding is None:
147 encoding = 'utf-8'
148 if errors is None:
149 errors = 'replace'
3cc8b4c3 150 bits = _asciire.split(string)
c9c854ce 151 res = [bits[0]]
152 append = res.append
153 for i in range(1, len(bits), 2):
55139679
S
154 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
155 append(bits[i + 1])
c9c854ce 156 return ''.join(res)
157
aa99aa4e
S
158 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
159 """Like unquote(), but also replace plus signs by spaces, as required for
160 unquoting HTML form values.
161
162 unquote_plus('%7e/abc+def') -> '~/abc def'
163 """
164 string = string.replace('+', ' ')
165 return compat_urllib_parse_unquote(string, encoding, errors)
166
0a67a363
YCH
167try:
168 from urllib.request import DataHandler as compat_urllib_request_DataHandler
169except ImportError: # Python < 3.4
170 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
171 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
172 def data_open(self, req):
173 # data URLs as specified in RFC 2397.
174 #
175 # ignores POSTed data
176 #
177 # syntax:
178 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
179 # mediatype := [ type "/" subtype ] *( ";" parameter )
180 # data := *urlchar
181 # parameter := attribute "=" value
182 url = req.get_full_url()
183
184 scheme, data = url.split(":", 1)
185 mediatype, data = data.split(",", 1)
186
187 # even base64 encoded data URLs might be quoted so unquote in any case:
188 data = compat_urllib_parse_unquote_to_bytes(data)
189 if mediatype.endswith(";base64"):
190 data = binascii.a2b_base64(data)
191 mediatype = mediatype[:-7]
192
193 if not mediatype:
194 mediatype = "text/plain;charset=US-ASCII"
195
196 headers = email.message_from_string(
197 "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
198
199 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
200
8f9312c3 201try:
0196149c 202 compat_basestring = basestring # Python 2
8f9312c3 203except NameError:
0196149c 204 compat_basestring = str
8f9312c3
PH
205
206try:
207 compat_chr = unichr # Python 2
208except NameError:
209 compat_chr = chr
210
211try:
212 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
213except ImportError: # Python 2.6
214 from xml.parsers.expat import ExpatError as compat_xml_parse_error
215
36e6f62c
JMF
216if sys.version_info[0] >= 3:
217 compat_etree_fromstring = xml.etree.ElementTree.fromstring
218else:
f7854627
JMF
219 # on python 2.x the attributes and text of a node aren't always unicode
220 # objects
36e6f62c
JMF
221 etree = xml.etree.ElementTree
222
f7854627
JMF
223 try:
224 _etree_iter = etree.Element.iter
225 except AttributeError: # Python <=2.6
226 def _etree_iter(root):
227 for el in root.findall('*'):
228 yield el
229 for sub in _etree_iter(el):
230 yield sub
231
36e6f62c
JMF
232 # on 2.6 XML doesn't have a parser argument, function copied from CPython
233 # 2.7 source
234 def _XML(text, parser=None):
235 if not parser:
236 parser = etree.XMLParser(target=etree.TreeBuilder())
237 parser.feed(text)
238 return parser.close()
239
240 def _element_factory(*args, **kwargs):
241 el = etree.Element(*args, **kwargs)
242 for k, v in el.items():
387db16a
JMF
243 if isinstance(v, bytes):
244 el.set(k, v.decode('utf-8'))
36e6f62c
JMF
245 return el
246
247 def compat_etree_fromstring(text):
f7854627
JMF
248 doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
249 for el in _etree_iter(doc):
250 if el.text is not None and isinstance(el.text, bytes):
251 el.text = el.text.decode('utf-8')
252 return doc
8c25f81b
PH
253
254try:
255 from urllib.parse import parse_qs as compat_parse_qs
5f6a1245 256except ImportError: # Python 2
8c25f81b
PH
257 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
258 # Python 2's version is apparently totally broken
259
260 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 261 encoding='utf-8', errors='replace'):
8f9312c3 262 qs, _coerce_result = qs, compat_str
8c25f81b
PH
263 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
264 r = []
265 for name_value in pairs:
266 if not name_value and not strict_parsing:
267 continue
268 nv = name_value.split('=', 1)
269 if len(nv) != 2:
270 if strict_parsing:
271 raise ValueError("bad query field: %r" % (name_value,))
272 # Handle case of a control-name with no equal sign
273 if keep_blank_values:
274 nv.append('')
275 else:
276 continue
277 if len(nv[1]) or keep_blank_values:
278 name = nv[0].replace('+', ' ')
279 name = compat_urllib_parse_unquote(
280 name, encoding=encoding, errors=errors)
281 name = _coerce_result(name)
282 value = nv[1].replace('+', ' ')
283 value = compat_urllib_parse_unquote(
284 value, encoding=encoding, errors=errors)
285 value = _coerce_result(value)
286 r.append((name, value))
287 return r
288
289 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 290 encoding='utf-8', errors='replace'):
8c25f81b
PH
291 parsed_result = {}
292 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
9e1a5b84 293 encoding=encoding, errors=errors)
8c25f81b
PH
294 for name, value in pairs:
295 if name in parsed_result:
296 parsed_result[name].append(value)
297 else:
298 parsed_result[name] = [value]
299 return parsed_result
300
8c25f81b
PH
301try:
302 from shlex import quote as shlex_quote
303except ImportError: # Python < 3.3
304 def shlex_quote(s):
7d4111ed
PH
305 if re.match(r'^[-_\w./]+$', s):
306 return s
307 else:
308 return "'" + s.replace("'", "'\"'\"'") + "'"
8c25f81b
PH
309
310
8df5ae15 311if sys.version_info >= (2, 7, 3):
51f579b6
S
312 compat_shlex_split = shlex.split
313else:
314 # Working around shlex issue with unicode strings on some python 2
315 # versions (see http://bugs.python.org/issue1548891)
316 def compat_shlex_split(s, comments=False, posix=True):
953fed28 317 if isinstance(s, compat_str):
51f579b6
S
318 s = s.encode('utf-8')
319 return shlex.split(s, comments, posix)
320
321
8c25f81b 322def compat_ord(c):
5f6a1245
JW
323 if type(c) is int:
324 return c
325 else:
326 return ord(c)
8c25f81b
PH
327
328
329if sys.version_info >= (3, 0):
330 compat_getenv = os.getenv
331 compat_expanduser = os.path.expanduser
332else:
333 # Environment variables should be decoded with filesystem encoding.
334 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
335
336 def compat_getenv(key, default=None):
337 from .utils import get_filesystem_encoding
338 env = os.getenv(key, default)
339 if env:
340 env = env.decode(get_filesystem_encoding())
341 return env
342
343 # HACK: The default implementations of os.path.expanduser from cpython do not decode
344 # environment variables with filesystem encoding. We will work around this by
345 # providing adjusted implementations.
346 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
347 # for different platforms with correct environment variables decoding.
348
349 if os.name == 'posix':
350 def compat_expanduser(path):
351 """Expand ~ and ~user constructions. If user or $HOME is unknown,
352 do nothing."""
353 if not path.startswith('~'):
354 return path
355 i = path.find('/', 1)
356 if i < 0:
357 i = len(path)
358 if i == 1:
359 if 'HOME' not in os.environ:
360 import pwd
361 userhome = pwd.getpwuid(os.getuid()).pw_dir
362 else:
363 userhome = compat_getenv('HOME')
364 else:
365 import pwd
366 try:
367 pwent = pwd.getpwnam(path[1:i])
368 except KeyError:
369 return path
370 userhome = pwent.pw_dir
371 userhome = userhome.rstrip('/')
372 return (userhome + path[i:]) or '/'
373 elif os.name == 'nt' or os.name == 'ce':
374 def compat_expanduser(path):
375 """Expand ~ and ~user constructs.
376
377 If user or $HOME is unknown, do nothing."""
378 if path[:1] != '~':
379 return path
380 i, n = 1, len(path)
381 while i < n and path[i] not in '/\\':
382 i = i + 1
383
384 if 'HOME' in os.environ:
385 userhome = compat_getenv('HOME')
386 elif 'USERPROFILE' in os.environ:
387 userhome = compat_getenv('USERPROFILE')
83e865a3 388 elif 'HOMEPATH' not in os.environ:
8c25f81b
PH
389 return path
390 else:
391 try:
392 drive = compat_getenv('HOMEDRIVE')
393 except KeyError:
394 drive = ''
395 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
396
5f6a1245 397 if i != 1: # ~user
8c25f81b
PH
398 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
399
400 return userhome + path[i:]
401 else:
402 compat_expanduser = os.path.expanduser
403
404
405if sys.version_info < (3, 0):
406 def compat_print(s):
407 from .utils import preferredencoding
408 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
409else:
410 def compat_print(s):
b061ea6e 411 assert isinstance(s, compat_str)
8c25f81b
PH
412 print(s)
413
414
415try:
416 subprocess_check_output = subprocess.check_output
417except AttributeError:
418 def subprocess_check_output(*args, **kwargs):
419 assert 'input' not in kwargs
420 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
421 output, _ = p.communicate()
422 ret = p.poll()
423 if ret:
424 raise subprocess.CalledProcessError(ret, p.args, output=output)
425 return output
426
427if sys.version_info < (3, 0) and sys.platform == 'win32':
428 def compat_getpass(prompt, *args, **kwargs):
429 if isinstance(prompt, compat_str):
baa70803 430 from .utils import preferredencoding
8c25f81b
PH
431 prompt = prompt.encode(preferredencoding())
432 return getpass.getpass(prompt, *args, **kwargs)
433else:
434 compat_getpass = getpass.getpass
435
c7b0add8
PH
436# Old 2.6 and 2.7 releases require kwargs to be bytes
437try:
c6973bd4
PH
438 def _testfunc(x):
439 pass
440 _testfunc(**{'x': 0})
c7b0add8
PH
441except TypeError:
442 def compat_kwargs(kwargs):
443 return dict((bytes(k), v) for k, v in kwargs.items())
444else:
445 compat_kwargs = lambda kwargs: kwargs
8c25f81b 446
e07e9313 447
be4a824d
PH
448if sys.version_info < (2, 7):
449 def compat_socket_create_connection(address, timeout, source_address=None):
450 host, port = address
451 err = None
452 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
453 af, socktype, proto, canonname, sa = res
454 sock = None
455 try:
456 sock = socket.socket(af, socktype, proto)
457 sock.settimeout(timeout)
458 if source_address:
459 sock.bind(source_address)
460 sock.connect(sa)
461 return sock
462 except socket.error as _:
463 err = _
464 if sock is not None:
465 sock.close()
466 if err is not None:
467 raise err
468 else:
8ad6b5ed 469 raise socket.error("getaddrinfo returns an empty list")
be4a824d
PH
470else:
471 compat_socket_create_connection = socket.create_connection
472
473
e07e9313
PH
474# Fix https://github.com/rg3/youtube-dl/issues/4223
475# See http://bugs.python.org/issue9161 for what is broken
476def workaround_optparse_bug9161():
07e378fa
PH
477 op = optparse.OptionParser()
478 og = optparse.OptionGroup(op, 'foo')
e07e9313 479 try:
07e378fa 480 og.add_option('-t')
b244b5c3 481 except TypeError:
e07e9313
PH
482 real_add_option = optparse.OptionGroup.add_option
483
484 def _compat_add_option(self, *args, **kwargs):
485 enc = lambda v: (
486 v.encode('ascii', 'replace') if isinstance(v, compat_str)
487 else v)
488 bargs = [enc(a) for a in args]
489 bkwargs = dict(
490 (k, enc(v)) for k, v in kwargs.items())
491 return real_add_option(self, *bargs, **bkwargs)
492 optparse.OptionGroup.add_option = _compat_add_option
493
003c69a8
JMF
494if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
495 compat_get_terminal_size = shutil.get_terminal_size
496else:
497 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
498
13118a50 499 def compat_get_terminal_size(fallback=(80, 24)):
4810c48d 500 columns = compat_getenv('COLUMNS')
003c69a8
JMF
501 if columns:
502 columns = int(columns)
503 else:
504 columns = None
4810c48d 505 lines = compat_getenv('LINES')
003c69a8
JMF
506 if lines:
507 lines = int(lines)
508 else:
509 lines = None
510
4810c48d 511 if columns is None or lines is None or columns <= 0 or lines <= 0:
13118a50
YCH
512 try:
513 sp = subprocess.Popen(
514 ['stty', 'size'],
515 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
516 out, err = sp.communicate()
f2dbc540 517 _lines, _columns = map(int, out.split())
13118a50
YCH
518 except Exception:
519 _columns, _lines = _terminal_size(*fallback)
520
4810c48d 521 if columns is None or columns <= 0:
13118a50 522 columns = _columns
4810c48d 523 if lines is None or lines <= 0:
13118a50 524 lines = _lines
003c69a8
JMF
525 return _terminal_size(columns, lines)
526
a0e060ac
YCH
527try:
528 itertools.count(start=0, step=1)
529 compat_itertools_count = itertools.count
530except TypeError: # Python 2.6
531 def compat_itertools_count(start=0, step=1):
532 n = start
533 while True:
534 yield n
535 n += step
e07e9313 536
67134eab
JMF
537if sys.version_info >= (3, 0):
538 from tokenize import tokenize as compat_tokenize_tokenize
539else:
540 from tokenize import generate_tokens as compat_tokenize_tokenize
e07e9313 541
8c25f81b
PH
542__all__ = [
543 'compat_HTTPError',
0196149c 544 'compat_basestring',
8c25f81b
PH
545 'compat_chr',
546 'compat_cookiejar',
799207e8 547 'compat_cookies',
36e6f62c 548 'compat_etree_fromstring',
8c25f81b 549 'compat_expanduser',
003c69a8 550 'compat_get_terminal_size',
8c25f81b
PH
551 'compat_getenv',
552 'compat_getpass',
553 'compat_html_entities',
8c25f81b 554 'compat_http_client',
83fda3c0 555 'compat_http_server',
a0e060ac 556 'compat_itertools_count',
c7b0add8 557 'compat_kwargs',
8c25f81b
PH
558 'compat_ord',
559 'compat_parse_qs',
560 'compat_print',
51f579b6 561 'compat_shlex_split',
be4a824d 562 'compat_socket_create_connection',
987493ae 563 'compat_str',
8c25f81b 564 'compat_subprocess_get_DEVNULL',
67134eab 565 'compat_tokenize_tokenize',
8c25f81b
PH
566 'compat_urllib_error',
567 'compat_urllib_parse',
568 'compat_urllib_parse_unquote',
aa99aa4e 569 'compat_urllib_parse_unquote_plus',
9fefc886 570 'compat_urllib_parse_unquote_to_bytes',
8c25f81b
PH
571 'compat_urllib_parse_urlparse',
572 'compat_urllib_request',
0a67a363
YCH
573 'compat_urllib_request_DataHandler',
574 'compat_urllib_response',
8c25f81b
PH
575 'compat_urlparse',
576 'compat_urlretrieve',
577 'compat_xml_parse_error',
578 'shlex_quote',
579 'subprocess_check_output',
e07e9313 580 'workaround_optparse_bug9161',
8c25f81b 581]