]> jfr.im git - yt-dlp.git/blame - youtube_dl/compat.py
[jython] Introduce compat_os_name
[yt-dlp.git] / youtube_dl / compat.py
CommitLineData
451948b2
PH
1from __future__ import unicode_literals
2
0a67a363 3import binascii
003c69a8 4import collections
0a67a363 5import email
8c25f81b 6import getpass
0a67a363 7import io
e07e9313 8import optparse
8c25f81b 9import os
7d4111ed 10import re
51f579b6 11import shlex
003c69a8 12import shutil
be4a824d 13import socket
8c25f81b
PH
14import subprocess
15import sys
a0e060ac 16import itertools
36e6f62c 17import xml.etree.ElementTree
8c25f81b
PH
18
19
20try:
21 import urllib.request as compat_urllib_request
5f6a1245 22except ImportError: # Python 2
8c25f81b
PH
23 import urllib2 as compat_urllib_request
24
25try:
26 import urllib.error as compat_urllib_error
5f6a1245 27except ImportError: # Python 2
8c25f81b
PH
28 import urllib2 as compat_urllib_error
29
30try:
31 import urllib.parse as compat_urllib_parse
5f6a1245 32except ImportError: # Python 2
8c25f81b
PH
33 import urllib as compat_urllib_parse
34
35try:
36 from urllib.parse import urlparse as compat_urllib_parse_urlparse
5f6a1245 37except ImportError: # Python 2
8c25f81b
PH
38 from urlparse import urlparse as compat_urllib_parse_urlparse
39
40try:
41 import urllib.parse as compat_urlparse
5f6a1245 42except ImportError: # Python 2
8c25f81b
PH
43 import urlparse as compat_urlparse
44
0a67a363
YCH
45try:
46 import urllib.response as compat_urllib_response
47except ImportError: # Python 2
48 import urllib as compat_urllib_response
49
8c25f81b
PH
50try:
51 import http.cookiejar as compat_cookiejar
5f6a1245 52except ImportError: # Python 2
8c25f81b
PH
53 import cookielib as compat_cookiejar
54
799207e8 55try:
56 import http.cookies as compat_cookies
57except ImportError: # Python 2
58 import Cookie as compat_cookies
59
8c25f81b
PH
60try:
61 import html.entities as compat_html_entities
5f6a1245 62except ImportError: # Python 2
8c25f81b
PH
63 import htmlentitydefs as compat_html_entities
64
8c25f81b
PH
65try:
66 import http.client as compat_http_client
5f6a1245 67except ImportError: # Python 2
8c25f81b
PH
68 import httplib as compat_http_client
69
70try:
71 from urllib.error import HTTPError as compat_HTTPError
72except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
74
75try:
76 from urllib.request import urlretrieve as compat_urlretrieve
77except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
79
80
81try:
82 from subprocess import DEVNULL
83 compat_subprocess_get_DEVNULL = lambda: DEVNULL
84except ImportError:
85 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
86
83fda3c0
PH
87try:
88 import http.server as compat_http_server
89except ImportError:
90 import BaseHTTPServer as compat_http_server
91
953fed28
PH
92try:
93 compat_str = unicode # Python 2
94except NameError:
95 compat_str = str
96
8c25f81b 97try:
55139679 98 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
8c25f81b 99 from urllib.parse import unquote as compat_urllib_parse_unquote
aa99aa4e 100 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
55139679 101except ImportError: # Python 2
22603348
S
102 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
103 else re.compile('([\x00-\x7f]+)'))
3cc8b4c3 104
4d08161a 105 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
55139679
S
106 # implementations from cpython 3.4.3's stdlib. Python 2's version
107 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
108
c9c854ce 109 def compat_urllib_parse_unquote_to_bytes(string):
110 """unquote_to_bytes('abc%20def') -> b'abc def'."""
111 # Note: strings are encoded as UTF-8. This is only an issue if it contains
112 # unescaped non-ASCII characters, which URIs should not.
113 if not string:
114 # Is it a string-like object?
115 string.split
116 return b''
953fed28 117 if isinstance(string, compat_str):
c9c854ce 118 string = string.encode('utf-8')
55139679 119 bits = string.split(b'%')
c9c854ce 120 if len(bits) == 1:
121 return string
122 res = [bits[0]]
123 append = res.append
c9c854ce 124 for item in bits[1:]:
125 try:
55139679 126 append(compat_urllib_parse._hextochr[item[:2]])
c9c854ce 127 append(item[2:])
55139679 128 except KeyError:
c9c854ce 129 append(b'%')
130 append(item)
131 return b''.join(res)
132
a0f28f90 133 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
c9c854ce 134 """Replace %xx escapes by their single-character equivalent. The optional
135 encoding and errors parameters specify how to decode percent-encoded
136 sequences into Unicode characters, as accepted by the bytes.decode()
137 method.
138 By default, percent-encoded sequences are decoded with UTF-8, and invalid
139 sequences are replaced by a placeholder character.
140
141 unquote('abc%20def') -> 'abc def'.
142 """
c9c854ce 143 if '%' not in string:
144 string.split
145 return string
146 if encoding is None:
147 encoding = 'utf-8'
148 if errors is None:
149 errors = 'replace'
3cc8b4c3 150 bits = _asciire.split(string)
c9c854ce 151 res = [bits[0]]
152 append = res.append
153 for i in range(1, len(bits), 2):
55139679
S
154 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
155 append(bits[i + 1])
c9c854ce 156 return ''.join(res)
157
aa99aa4e
S
158 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
159 """Like unquote(), but also replace plus signs by spaces, as required for
160 unquoting HTML form values.
161
162 unquote_plus('%7e/abc+def') -> '~/abc def'
163 """
164 string = string.replace('+', ' ')
165 return compat_urllib_parse_unquote(string, encoding, errors)
166
0a67a363
YCH
167try:
168 from urllib.request import DataHandler as compat_urllib_request_DataHandler
169except ImportError: # Python < 3.4
170 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
171 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
172 def data_open(self, req):
173 # data URLs as specified in RFC 2397.
174 #
175 # ignores POSTed data
176 #
177 # syntax:
178 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
179 # mediatype := [ type "/" subtype ] *( ";" parameter )
180 # data := *urlchar
181 # parameter := attribute "=" value
182 url = req.get_full_url()
183
611c1dd9
S
184 scheme, data = url.split(':', 1)
185 mediatype, data = data.split(',', 1)
0a67a363
YCH
186
187 # even base64 encoded data URLs might be quoted so unquote in any case:
188 data = compat_urllib_parse_unquote_to_bytes(data)
611c1dd9 189 if mediatype.endswith(';base64'):
0a67a363
YCH
190 data = binascii.a2b_base64(data)
191 mediatype = mediatype[:-7]
192
193 if not mediatype:
611c1dd9 194 mediatype = 'text/plain;charset=US-ASCII'
0a67a363
YCH
195
196 headers = email.message_from_string(
611c1dd9 197 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
0a67a363
YCH
198
199 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
200
8f9312c3 201try:
0196149c 202 compat_basestring = basestring # Python 2
8f9312c3 203except NameError:
0196149c 204 compat_basestring = str
8f9312c3
PH
205
206try:
207 compat_chr = unichr # Python 2
208except NameError:
209 compat_chr = chr
210
211try:
212 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
213except ImportError: # Python 2.6
214 from xml.parsers.expat import ExpatError as compat_xml_parse_error
215
36e6f62c
JMF
216if sys.version_info[0] >= 3:
217 compat_etree_fromstring = xml.etree.ElementTree.fromstring
218else:
ae37338e
JMF
219 # python 2.x tries to encode unicode strings with ascii (see the
220 # XMLParser._fixtext method)
36e6f62c
JMF
221 etree = xml.etree.ElementTree
222
f7854627
JMF
223 try:
224 _etree_iter = etree.Element.iter
225 except AttributeError: # Python <=2.6
226 def _etree_iter(root):
227 for el in root.findall('*'):
228 yield el
229 for sub in _etree_iter(el):
230 yield sub
231
36e6f62c
JMF
232 # on 2.6 XML doesn't have a parser argument, function copied from CPython
233 # 2.7 source
234 def _XML(text, parser=None):
235 if not parser:
236 parser = etree.XMLParser(target=etree.TreeBuilder())
237 parser.feed(text)
238 return parser.close()
239
240 def _element_factory(*args, **kwargs):
241 el = etree.Element(*args, **kwargs)
242 for k, v in el.items():
387db16a
JMF
243 if isinstance(v, bytes):
244 el.set(k, v.decode('utf-8'))
36e6f62c
JMF
245 return el
246
247 def compat_etree_fromstring(text):
f7854627
JMF
248 doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
249 for el in _etree_iter(doc):
250 if el.text is not None and isinstance(el.text, bytes):
251 el.text = el.text.decode('utf-8')
252 return doc
8c25f81b
PH
253
254try:
255 from urllib.parse import parse_qs as compat_parse_qs
5f6a1245 256except ImportError: # Python 2
8c25f81b
PH
257 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
258 # Python 2's version is apparently totally broken
259
260 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 261 encoding='utf-8', errors='replace'):
8f9312c3 262 qs, _coerce_result = qs, compat_str
8c25f81b
PH
263 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
264 r = []
265 for name_value in pairs:
266 if not name_value and not strict_parsing:
267 continue
268 nv = name_value.split('=', 1)
269 if len(nv) != 2:
270 if strict_parsing:
611c1dd9 271 raise ValueError('bad query field: %r' % (name_value,))
8c25f81b
PH
272 # Handle case of a control-name with no equal sign
273 if keep_blank_values:
274 nv.append('')
275 else:
276 continue
277 if len(nv[1]) or keep_blank_values:
278 name = nv[0].replace('+', ' ')
279 name = compat_urllib_parse_unquote(
280 name, encoding=encoding, errors=errors)
281 name = _coerce_result(name)
282 value = nv[1].replace('+', ' ')
283 value = compat_urllib_parse_unquote(
284 value, encoding=encoding, errors=errors)
285 value = _coerce_result(value)
286 r.append((name, value))
287 return r
288
289 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 290 encoding='utf-8', errors='replace'):
8c25f81b
PH
291 parsed_result = {}
292 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
9e1a5b84 293 encoding=encoding, errors=errors)
8c25f81b
PH
294 for name, value in pairs:
295 if name in parsed_result:
296 parsed_result[name].append(value)
297 else:
298 parsed_result[name] = [value]
299 return parsed_result
300
8c25f81b
PH
301try:
302 from shlex import quote as shlex_quote
303except ImportError: # Python < 3.3
304 def shlex_quote(s):
7d4111ed
PH
305 if re.match(r'^[-_\w./]+$', s):
306 return s
307 else:
308 return "'" + s.replace("'", "'\"'\"'") + "'"
8c25f81b
PH
309
310
8df5ae15 311if sys.version_info >= (2, 7, 3):
51f579b6
S
312 compat_shlex_split = shlex.split
313else:
314 # Working around shlex issue with unicode strings on some python 2
315 # versions (see http://bugs.python.org/issue1548891)
316 def compat_shlex_split(s, comments=False, posix=True):
953fed28 317 if isinstance(s, compat_str):
51f579b6
S
318 s = s.encode('utf-8')
319 return shlex.split(s, comments, posix)
320
321
8c25f81b 322def compat_ord(c):
5f6a1245
JW
323 if type(c) is int:
324 return c
325 else:
326 return ord(c)
8c25f81b
PH
327
328
e9c0cdd3
YCH
329compat_os_name = os._name if os.name == 'java' else os.name
330
331
8c25f81b
PH
332if sys.version_info >= (3, 0):
333 compat_getenv = os.getenv
334 compat_expanduser = os.path.expanduser
335else:
336 # Environment variables should be decoded with filesystem encoding.
337 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
338
339 def compat_getenv(key, default=None):
340 from .utils import get_filesystem_encoding
341 env = os.getenv(key, default)
342 if env:
343 env = env.decode(get_filesystem_encoding())
344 return env
345
346 # HACK: The default implementations of os.path.expanduser from cpython do not decode
347 # environment variables with filesystem encoding. We will work around this by
348 # providing adjusted implementations.
349 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
350 # for different platforms with correct environment variables decoding.
351
e9c0cdd3 352 if compat_os_name == 'posix':
8c25f81b
PH
353 def compat_expanduser(path):
354 """Expand ~ and ~user constructions. If user or $HOME is unknown,
355 do nothing."""
356 if not path.startswith('~'):
357 return path
358 i = path.find('/', 1)
359 if i < 0:
360 i = len(path)
361 if i == 1:
362 if 'HOME' not in os.environ:
363 import pwd
364 userhome = pwd.getpwuid(os.getuid()).pw_dir
365 else:
366 userhome = compat_getenv('HOME')
367 else:
368 import pwd
369 try:
370 pwent = pwd.getpwnam(path[1:i])
371 except KeyError:
372 return path
373 userhome = pwent.pw_dir
374 userhome = userhome.rstrip('/')
375 return (userhome + path[i:]) or '/'
e9c0cdd3 376 elif compat_os_name == 'nt' or compat_os_name == 'ce':
8c25f81b
PH
377 def compat_expanduser(path):
378 """Expand ~ and ~user constructs.
379
380 If user or $HOME is unknown, do nothing."""
381 if path[:1] != '~':
382 return path
383 i, n = 1, len(path)
384 while i < n and path[i] not in '/\\':
385 i = i + 1
386
387 if 'HOME' in os.environ:
388 userhome = compat_getenv('HOME')
389 elif 'USERPROFILE' in os.environ:
390 userhome = compat_getenv('USERPROFILE')
83e865a3 391 elif 'HOMEPATH' not in os.environ:
8c25f81b
PH
392 return path
393 else:
394 try:
395 drive = compat_getenv('HOMEDRIVE')
396 except KeyError:
397 drive = ''
398 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
399
5f6a1245 400 if i != 1: # ~user
8c25f81b
PH
401 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
402
403 return userhome + path[i:]
404 else:
405 compat_expanduser = os.path.expanduser
406
407
408if sys.version_info < (3, 0):
409 def compat_print(s):
410 from .utils import preferredencoding
411 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
412else:
413 def compat_print(s):
b061ea6e 414 assert isinstance(s, compat_str)
8c25f81b
PH
415 print(s)
416
417
418try:
419 subprocess_check_output = subprocess.check_output
420except AttributeError:
421 def subprocess_check_output(*args, **kwargs):
422 assert 'input' not in kwargs
423 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
424 output, _ = p.communicate()
425 ret = p.poll()
426 if ret:
427 raise subprocess.CalledProcessError(ret, p.args, output=output)
428 return output
429
430if sys.version_info < (3, 0) and sys.platform == 'win32':
431 def compat_getpass(prompt, *args, **kwargs):
432 if isinstance(prompt, compat_str):
baa70803 433 from .utils import preferredencoding
8c25f81b
PH
434 prompt = prompt.encode(preferredencoding())
435 return getpass.getpass(prompt, *args, **kwargs)
436else:
437 compat_getpass = getpass.getpass
438
614db89a 439# Python < 2.6.5 require kwargs to be bytes
c7b0add8 440try:
c6973bd4
PH
441 def _testfunc(x):
442 pass
443 _testfunc(**{'x': 0})
c7b0add8
PH
444except TypeError:
445 def compat_kwargs(kwargs):
446 return dict((bytes(k), v) for k, v in kwargs.items())
447else:
448 compat_kwargs = lambda kwargs: kwargs
8c25f81b 449
e07e9313 450
be4a824d
PH
451if sys.version_info < (2, 7):
452 def compat_socket_create_connection(address, timeout, source_address=None):
453 host, port = address
454 err = None
455 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
456 af, socktype, proto, canonname, sa = res
457 sock = None
458 try:
459 sock = socket.socket(af, socktype, proto)
460 sock.settimeout(timeout)
461 if source_address:
462 sock.bind(source_address)
463 sock.connect(sa)
464 return sock
465 except socket.error as _:
466 err = _
467 if sock is not None:
468 sock.close()
469 if err is not None:
470 raise err
471 else:
611c1dd9 472 raise socket.error('getaddrinfo returns an empty list')
be4a824d
PH
473else:
474 compat_socket_create_connection = socket.create_connection
475
476
e07e9313
PH
477# Fix https://github.com/rg3/youtube-dl/issues/4223
478# See http://bugs.python.org/issue9161 for what is broken
479def workaround_optparse_bug9161():
07e378fa
PH
480 op = optparse.OptionParser()
481 og = optparse.OptionGroup(op, 'foo')
e07e9313 482 try:
07e378fa 483 og.add_option('-t')
b244b5c3 484 except TypeError:
e07e9313
PH
485 real_add_option = optparse.OptionGroup.add_option
486
487 def _compat_add_option(self, *args, **kwargs):
488 enc = lambda v: (
489 v.encode('ascii', 'replace') if isinstance(v, compat_str)
490 else v)
491 bargs = [enc(a) for a in args]
492 bkwargs = dict(
493 (k, enc(v)) for k, v in kwargs.items())
494 return real_add_option(self, *bargs, **bkwargs)
495 optparse.OptionGroup.add_option = _compat_add_option
496
003c69a8
JMF
497if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
498 compat_get_terminal_size = shutil.get_terminal_size
499else:
500 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
501
13118a50 502 def compat_get_terminal_size(fallback=(80, 24)):
4810c48d 503 columns = compat_getenv('COLUMNS')
003c69a8
JMF
504 if columns:
505 columns = int(columns)
506 else:
507 columns = None
4810c48d 508 lines = compat_getenv('LINES')
003c69a8
JMF
509 if lines:
510 lines = int(lines)
511 else:
512 lines = None
513
4810c48d 514 if columns is None or lines is None or columns <= 0 or lines <= 0:
13118a50
YCH
515 try:
516 sp = subprocess.Popen(
517 ['stty', 'size'],
518 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
519 out, err = sp.communicate()
f2dbc540 520 _lines, _columns = map(int, out.split())
13118a50
YCH
521 except Exception:
522 _columns, _lines = _terminal_size(*fallback)
523
4810c48d 524 if columns is None or columns <= 0:
13118a50 525 columns = _columns
4810c48d 526 if lines is None or lines <= 0:
13118a50 527 lines = _lines
003c69a8
JMF
528 return _terminal_size(columns, lines)
529
a0e060ac
YCH
530try:
531 itertools.count(start=0, step=1)
532 compat_itertools_count = itertools.count
533except TypeError: # Python 2.6
534 def compat_itertools_count(start=0, step=1):
535 n = start
536 while True:
537 yield n
538 n += step
e07e9313 539
67134eab
JMF
540if sys.version_info >= (3, 0):
541 from tokenize import tokenize as compat_tokenize_tokenize
542else:
543 from tokenize import generate_tokens as compat_tokenize_tokenize
e07e9313 544
8c25f81b
PH
545__all__ = [
546 'compat_HTTPError',
0196149c 547 'compat_basestring',
8c25f81b
PH
548 'compat_chr',
549 'compat_cookiejar',
799207e8 550 'compat_cookies',
36e6f62c 551 'compat_etree_fromstring',
8c25f81b 552 'compat_expanduser',
003c69a8 553 'compat_get_terminal_size',
8c25f81b
PH
554 'compat_getenv',
555 'compat_getpass',
556 'compat_html_entities',
8c25f81b 557 'compat_http_client',
83fda3c0 558 'compat_http_server',
a0e060ac 559 'compat_itertools_count',
c7b0add8 560 'compat_kwargs',
8c25f81b 561 'compat_ord',
e9c0cdd3 562 'compat_os_name',
8c25f81b
PH
563 'compat_parse_qs',
564 'compat_print',
51f579b6 565 'compat_shlex_split',
be4a824d 566 'compat_socket_create_connection',
987493ae 567 'compat_str',
8c25f81b 568 'compat_subprocess_get_DEVNULL',
67134eab 569 'compat_tokenize_tokenize',
8c25f81b
PH
570 'compat_urllib_error',
571 'compat_urllib_parse',
572 'compat_urllib_parse_unquote',
aa99aa4e 573 'compat_urllib_parse_unquote_plus',
9fefc886 574 'compat_urllib_parse_unquote_to_bytes',
8c25f81b
PH
575 'compat_urllib_parse_urlparse',
576 'compat_urllib_request',
0a67a363
YCH
577 'compat_urllib_request_DataHandler',
578 'compat_urllib_response',
8c25f81b
PH
579 'compat_urlparse',
580 'compat_urlretrieve',
581 'compat_xml_parse_error',
582 'shlex_quote',
583 'subprocess_check_output',
e07e9313 584 'workaround_optparse_bug9161',
8c25f81b 585]