]> jfr.im git - yt-dlp.git/blame - youtube_dl/compat.py
[veehd] Use compat_urllib_parse_unquote
[yt-dlp.git] / youtube_dl / compat.py
CommitLineData
451948b2
PH
1from __future__ import unicode_literals
2
003c69a8 3import collections
8c25f81b 4import getpass
e07e9313 5import optparse
8c25f81b 6import os
7d4111ed 7import re
003c69a8 8import shutil
be4a824d 9import socket
8c25f81b
PH
10import subprocess
11import sys
a0e060ac 12import itertools
8c25f81b
PH
13
14
15try:
16 import urllib.request as compat_urllib_request
5f6a1245 17except ImportError: # Python 2
8c25f81b
PH
18 import urllib2 as compat_urllib_request
19
20try:
21 import urllib.error as compat_urllib_error
5f6a1245 22except ImportError: # Python 2
8c25f81b
PH
23 import urllib2 as compat_urllib_error
24
25try:
26 import urllib.parse as compat_urllib_parse
5f6a1245 27except ImportError: # Python 2
8c25f81b
PH
28 import urllib as compat_urllib_parse
29
30try:
31 from urllib.parse import urlparse as compat_urllib_parse_urlparse
5f6a1245 32except ImportError: # Python 2
8c25f81b
PH
33 from urlparse import urlparse as compat_urllib_parse_urlparse
34
35try:
36 import urllib.parse as compat_urlparse
5f6a1245 37except ImportError: # Python 2
8c25f81b
PH
38 import urlparse as compat_urlparse
39
40try:
41 import http.cookiejar as compat_cookiejar
5f6a1245 42except ImportError: # Python 2
8c25f81b
PH
43 import cookielib as compat_cookiejar
44
45try:
46 import html.entities as compat_html_entities
5f6a1245 47except ImportError: # Python 2
8c25f81b
PH
48 import htmlentitydefs as compat_html_entities
49
8c25f81b
PH
50try:
51 import http.client as compat_http_client
5f6a1245 52except ImportError: # Python 2
8c25f81b
PH
53 import httplib as compat_http_client
54
55try:
56 from urllib.error import HTTPError as compat_HTTPError
57except ImportError: # Python 2
58 from urllib2 import HTTPError as compat_HTTPError
59
60try:
61 from urllib.request import urlretrieve as compat_urlretrieve
62except ImportError: # Python 2
63 from urllib import urlretrieve as compat_urlretrieve
64
65
66try:
67 from subprocess import DEVNULL
68 compat_subprocess_get_DEVNULL = lambda: DEVNULL
69except ImportError:
70 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
71
83fda3c0
PH
72try:
73 import http.server as compat_http_server
74except ImportError:
75 import BaseHTTPServer as compat_http_server
76
8c25f81b 77try:
55139679 78 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
8c25f81b 79 from urllib.parse import unquote as compat_urllib_parse_unquote
aa99aa4e 80 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
55139679 81except ImportError: # Python 2
4d08161a 82 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
55139679
S
83 # implementations from cpython 3.4.3's stdlib. Python 2's version
84 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
85
c9c854ce 86 def compat_urllib_parse_unquote_to_bytes(string):
87 """unquote_to_bytes('abc%20def') -> b'abc def'."""
88 # Note: strings are encoded as UTF-8. This is only an issue if it contains
89 # unescaped non-ASCII characters, which URIs should not.
90 if not string:
91 # Is it a string-like object?
92 string.split
93 return b''
55139679 94 if isinstance(string, unicode):
c9c854ce 95 string = string.encode('utf-8')
55139679 96 bits = string.split(b'%')
c9c854ce 97 if len(bits) == 1:
98 return string
99 res = [bits[0]]
100 append = res.append
c9c854ce 101 for item in bits[1:]:
102 try:
55139679 103 append(compat_urllib_parse._hextochr[item[:2]])
c9c854ce 104 append(item[2:])
55139679 105 except KeyError:
c9c854ce 106 append(b'%')
107 append(item)
108 return b''.join(res)
109
a0f28f90 110 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
c9c854ce 111 """Replace %xx escapes by their single-character equivalent. The optional
112 encoding and errors parameters specify how to decode percent-encoded
113 sequences into Unicode characters, as accepted by the bytes.decode()
114 method.
115 By default, percent-encoded sequences are decoded with UTF-8, and invalid
116 sequences are replaced by a placeholder character.
117
118 unquote('abc%20def') -> 'abc def'.
119 """
c9c854ce 120 if '%' not in string:
121 string.split
122 return string
123 if encoding is None:
124 encoding = 'utf-8'
125 if errors is None:
126 errors = 'replace'
55139679 127 bits = compat_urllib_parse._asciire.split(string)
c9c854ce 128 res = [bits[0]]
129 append = res.append
130 for i in range(1, len(bits), 2):
55139679
S
131 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
132 append(bits[i + 1])
c9c854ce 133 return ''.join(res)
134
aa99aa4e
S
135 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
136 """Like unquote(), but also replace plus signs by spaces, as required for
137 unquoting HTML form values.
138
139 unquote_plus('%7e/abc+def') -> '~/abc def'
140 """
141 string = string.replace('+', ' ')
142 return compat_urllib_parse_unquote(string, encoding, errors)
143
8f9312c3
PH
144try:
145 compat_str = unicode # Python 2
146except NameError:
147 compat_str = str
148
149try:
0196149c 150 compat_basestring = basestring # Python 2
8f9312c3 151except NameError:
0196149c 152 compat_basestring = str
8f9312c3
PH
153
154try:
155 compat_chr = unichr # Python 2
156except NameError:
157 compat_chr = chr
158
159try:
160 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
161except ImportError: # Python 2.6
162 from xml.parsers.expat import ExpatError as compat_xml_parse_error
163
8c25f81b
PH
164
165try:
166 from urllib.parse import parse_qs as compat_parse_qs
5f6a1245 167except ImportError: # Python 2
8c25f81b
PH
168 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
169 # Python 2's version is apparently totally broken
170
171 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 172 encoding='utf-8', errors='replace'):
8f9312c3 173 qs, _coerce_result = qs, compat_str
8c25f81b
PH
174 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
175 r = []
176 for name_value in pairs:
177 if not name_value and not strict_parsing:
178 continue
179 nv = name_value.split('=', 1)
180 if len(nv) != 2:
181 if strict_parsing:
182 raise ValueError("bad query field: %r" % (name_value,))
183 # Handle case of a control-name with no equal sign
184 if keep_blank_values:
185 nv.append('')
186 else:
187 continue
188 if len(nv[1]) or keep_blank_values:
189 name = nv[0].replace('+', ' ')
190 name = compat_urllib_parse_unquote(
191 name, encoding=encoding, errors=errors)
192 name = _coerce_result(name)
193 value = nv[1].replace('+', ' ')
194 value = compat_urllib_parse_unquote(
195 value, encoding=encoding, errors=errors)
196 value = _coerce_result(value)
197 r.append((name, value))
198 return r
199
200 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 201 encoding='utf-8', errors='replace'):
8c25f81b
PH
202 parsed_result = {}
203 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
9e1a5b84 204 encoding=encoding, errors=errors)
8c25f81b
PH
205 for name, value in pairs:
206 if name in parsed_result:
207 parsed_result[name].append(value)
208 else:
209 parsed_result[name] = [value]
210 return parsed_result
211
8c25f81b
PH
212try:
213 from shlex import quote as shlex_quote
214except ImportError: # Python < 3.3
215 def shlex_quote(s):
7d4111ed
PH
216 if re.match(r'^[-_\w./]+$', s):
217 return s
218 else:
219 return "'" + s.replace("'", "'\"'\"'") + "'"
8c25f81b
PH
220
221
222def compat_ord(c):
5f6a1245
JW
223 if type(c) is int:
224 return c
225 else:
226 return ord(c)
8c25f81b
PH
227
228
229if sys.version_info >= (3, 0):
230 compat_getenv = os.getenv
231 compat_expanduser = os.path.expanduser
232else:
233 # Environment variables should be decoded with filesystem encoding.
234 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
235
236 def compat_getenv(key, default=None):
237 from .utils import get_filesystem_encoding
238 env = os.getenv(key, default)
239 if env:
240 env = env.decode(get_filesystem_encoding())
241 return env
242
243 # HACK: The default implementations of os.path.expanduser from cpython do not decode
244 # environment variables with filesystem encoding. We will work around this by
245 # providing adjusted implementations.
246 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
247 # for different platforms with correct environment variables decoding.
248
249 if os.name == 'posix':
250 def compat_expanduser(path):
251 """Expand ~ and ~user constructions. If user or $HOME is unknown,
252 do nothing."""
253 if not path.startswith('~'):
254 return path
255 i = path.find('/', 1)
256 if i < 0:
257 i = len(path)
258 if i == 1:
259 if 'HOME' not in os.environ:
260 import pwd
261 userhome = pwd.getpwuid(os.getuid()).pw_dir
262 else:
263 userhome = compat_getenv('HOME')
264 else:
265 import pwd
266 try:
267 pwent = pwd.getpwnam(path[1:i])
268 except KeyError:
269 return path
270 userhome = pwent.pw_dir
271 userhome = userhome.rstrip('/')
272 return (userhome + path[i:]) or '/'
273 elif os.name == 'nt' or os.name == 'ce':
274 def compat_expanduser(path):
275 """Expand ~ and ~user constructs.
276
277 If user or $HOME is unknown, do nothing."""
278 if path[:1] != '~':
279 return path
280 i, n = 1, len(path)
281 while i < n and path[i] not in '/\\':
282 i = i + 1
283
284 if 'HOME' in os.environ:
285 userhome = compat_getenv('HOME')
286 elif 'USERPROFILE' in os.environ:
287 userhome = compat_getenv('USERPROFILE')
83e865a3 288 elif 'HOMEPATH' not in os.environ:
8c25f81b
PH
289 return path
290 else:
291 try:
292 drive = compat_getenv('HOMEDRIVE')
293 except KeyError:
294 drive = ''
295 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
296
5f6a1245 297 if i != 1: # ~user
8c25f81b
PH
298 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
299
300 return userhome + path[i:]
301 else:
302 compat_expanduser = os.path.expanduser
303
304
305if sys.version_info < (3, 0):
306 def compat_print(s):
307 from .utils import preferredencoding
308 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
309else:
310 def compat_print(s):
b061ea6e 311 assert isinstance(s, compat_str)
8c25f81b
PH
312 print(s)
313
314
315try:
316 subprocess_check_output = subprocess.check_output
317except AttributeError:
318 def subprocess_check_output(*args, **kwargs):
319 assert 'input' not in kwargs
320 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
321 output, _ = p.communicate()
322 ret = p.poll()
323 if ret:
324 raise subprocess.CalledProcessError(ret, p.args, output=output)
325 return output
326
327if sys.version_info < (3, 0) and sys.platform == 'win32':
328 def compat_getpass(prompt, *args, **kwargs):
329 if isinstance(prompt, compat_str):
baa70803 330 from .utils import preferredencoding
8c25f81b
PH
331 prompt = prompt.encode(preferredencoding())
332 return getpass.getpass(prompt, *args, **kwargs)
333else:
334 compat_getpass = getpass.getpass
335
c7b0add8
PH
336# Old 2.6 and 2.7 releases require kwargs to be bytes
337try:
c6973bd4
PH
338 def _testfunc(x):
339 pass
340 _testfunc(**{'x': 0})
c7b0add8
PH
341except TypeError:
342 def compat_kwargs(kwargs):
343 return dict((bytes(k), v) for k, v in kwargs.items())
344else:
345 compat_kwargs = lambda kwargs: kwargs
8c25f81b 346
e07e9313 347
be4a824d
PH
348if sys.version_info < (2, 7):
349 def compat_socket_create_connection(address, timeout, source_address=None):
350 host, port = address
351 err = None
352 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
353 af, socktype, proto, canonname, sa = res
354 sock = None
355 try:
356 sock = socket.socket(af, socktype, proto)
357 sock.settimeout(timeout)
358 if source_address:
359 sock.bind(source_address)
360 sock.connect(sa)
361 return sock
362 except socket.error as _:
363 err = _
364 if sock is not None:
365 sock.close()
366 if err is not None:
367 raise err
368 else:
8ad6b5ed 369 raise socket.error("getaddrinfo returns an empty list")
be4a824d
PH
370else:
371 compat_socket_create_connection = socket.create_connection
372
373
e07e9313
PH
374# Fix https://github.com/rg3/youtube-dl/issues/4223
375# See http://bugs.python.org/issue9161 for what is broken
376def workaround_optparse_bug9161():
07e378fa
PH
377 op = optparse.OptionParser()
378 og = optparse.OptionGroup(op, 'foo')
e07e9313 379 try:
07e378fa 380 og.add_option('-t')
b244b5c3 381 except TypeError:
e07e9313
PH
382 real_add_option = optparse.OptionGroup.add_option
383
384 def _compat_add_option(self, *args, **kwargs):
385 enc = lambda v: (
386 v.encode('ascii', 'replace') if isinstance(v, compat_str)
387 else v)
388 bargs = [enc(a) for a in args]
389 bkwargs = dict(
390 (k, enc(v)) for k, v in kwargs.items())
391 return real_add_option(self, *bargs, **bkwargs)
392 optparse.OptionGroup.add_option = _compat_add_option
393
003c69a8
JMF
394if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
395 compat_get_terminal_size = shutil.get_terminal_size
396else:
397 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
398
399 def compat_get_terminal_size():
400 columns = compat_getenv('COLUMNS', None)
401 if columns:
402 columns = int(columns)
403 else:
404 columns = None
405 lines = compat_getenv('LINES', None)
406 if lines:
407 lines = int(lines)
408 else:
409 lines = None
410
411 try:
412 sp = subprocess.Popen(
413 ['stty', 'size'],
414 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
415 out, err = sp.communicate()
416 lines, columns = map(int, out.split())
70a1165b 417 except Exception:
003c69a8
JMF
418 pass
419 return _terminal_size(columns, lines)
420
a0e060ac
YCH
421try:
422 itertools.count(start=0, step=1)
423 compat_itertools_count = itertools.count
424except TypeError: # Python 2.6
425 def compat_itertools_count(start=0, step=1):
426 n = start
427 while True:
428 yield n
429 n += step
e07e9313 430
8c25f81b
PH
431__all__ = [
432 'compat_HTTPError',
0196149c 433 'compat_basestring',
8c25f81b
PH
434 'compat_chr',
435 'compat_cookiejar',
436 'compat_expanduser',
003c69a8 437 'compat_get_terminal_size',
8c25f81b
PH
438 'compat_getenv',
439 'compat_getpass',
440 'compat_html_entities',
8c25f81b 441 'compat_http_client',
83fda3c0 442 'compat_http_server',
a0e060ac 443 'compat_itertools_count',
c7b0add8 444 'compat_kwargs',
8c25f81b
PH
445 'compat_ord',
446 'compat_parse_qs',
447 'compat_print',
be4a824d 448 'compat_socket_create_connection',
987493ae 449 'compat_str',
8c25f81b
PH
450 'compat_subprocess_get_DEVNULL',
451 'compat_urllib_error',
452 'compat_urllib_parse',
453 'compat_urllib_parse_unquote',
aa99aa4e 454 'compat_urllib_parse_unquote_plus',
9fefc886 455 'compat_urllib_parse_unquote_to_bytes',
8c25f81b
PH
456 'compat_urllib_parse_urlparse',
457 'compat_urllib_request',
458 'compat_urlparse',
459 'compat_urlretrieve',
460 'compat_xml_parse_error',
461 'shlex_quote',
462 'subprocess_check_output',
e07e9313 463 'workaround_optparse_bug9161',
8c25f81b 464]