]> jfr.im git - yt-dlp.git/blame - youtube_dl/compat.py
[youporn] Extract comment count
[yt-dlp.git] / youtube_dl / compat.py
CommitLineData
451948b2
PH
1from __future__ import unicode_literals
2
0a67a363 3import binascii
003c69a8 4import collections
0a67a363 5import email
8c25f81b 6import getpass
0a67a363 7import io
e07e9313 8import optparse
8c25f81b 9import os
7d4111ed 10import re
51f579b6 11import shlex
003c69a8 12import shutil
be4a824d 13import socket
8c25f81b
PH
14import subprocess
15import sys
a0e060ac 16import itertools
8c25f81b
PH
17
18
19try:
20 import urllib.request as compat_urllib_request
5f6a1245 21except ImportError: # Python 2
8c25f81b
PH
22 import urllib2 as compat_urllib_request
23
24try:
25 import urllib.error as compat_urllib_error
5f6a1245 26except ImportError: # Python 2
8c25f81b
PH
27 import urllib2 as compat_urllib_error
28
29try:
30 import urllib.parse as compat_urllib_parse
5f6a1245 31except ImportError: # Python 2
8c25f81b
PH
32 import urllib as compat_urllib_parse
33
34try:
35 from urllib.parse import urlparse as compat_urllib_parse_urlparse
5f6a1245 36except ImportError: # Python 2
8c25f81b
PH
37 from urlparse import urlparse as compat_urllib_parse_urlparse
38
39try:
40 import urllib.parse as compat_urlparse
5f6a1245 41except ImportError: # Python 2
8c25f81b
PH
42 import urlparse as compat_urlparse
43
0a67a363
YCH
44try:
45 import urllib.response as compat_urllib_response
46except ImportError: # Python 2
47 import urllib as compat_urllib_response
48
8c25f81b
PH
49try:
50 import http.cookiejar as compat_cookiejar
5f6a1245 51except ImportError: # Python 2
8c25f81b
PH
52 import cookielib as compat_cookiejar
53
799207e8 54try:
55 import http.cookies as compat_cookies
56except ImportError: # Python 2
57 import Cookie as compat_cookies
58
8c25f81b
PH
59try:
60 import html.entities as compat_html_entities
5f6a1245 61except ImportError: # Python 2
8c25f81b
PH
62 import htmlentitydefs as compat_html_entities
63
8c25f81b
PH
64try:
65 import http.client as compat_http_client
5f6a1245 66except ImportError: # Python 2
8c25f81b
PH
67 import httplib as compat_http_client
68
69try:
70 from urllib.error import HTTPError as compat_HTTPError
71except ImportError: # Python 2
72 from urllib2 import HTTPError as compat_HTTPError
73
74try:
75 from urllib.request import urlretrieve as compat_urlretrieve
76except ImportError: # Python 2
77 from urllib import urlretrieve as compat_urlretrieve
78
79
80try:
81 from subprocess import DEVNULL
82 compat_subprocess_get_DEVNULL = lambda: DEVNULL
83except ImportError:
84 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
85
83fda3c0
PH
86try:
87 import http.server as compat_http_server
88except ImportError:
89 import BaseHTTPServer as compat_http_server
90
953fed28
PH
91try:
92 compat_str = unicode # Python 2
93except NameError:
94 compat_str = str
95
8c25f81b 96try:
55139679 97 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
8c25f81b 98 from urllib.parse import unquote as compat_urllib_parse_unquote
aa99aa4e 99 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
55139679 100except ImportError: # Python 2
22603348
S
101 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
102 else re.compile('([\x00-\x7f]+)'))
3cc8b4c3 103
4d08161a 104 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
55139679
S
105 # implementations from cpython 3.4.3's stdlib. Python 2's version
106 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
107
c9c854ce 108 def compat_urllib_parse_unquote_to_bytes(string):
109 """unquote_to_bytes('abc%20def') -> b'abc def'."""
110 # Note: strings are encoded as UTF-8. This is only an issue if it contains
111 # unescaped non-ASCII characters, which URIs should not.
112 if not string:
113 # Is it a string-like object?
114 string.split
115 return b''
953fed28 116 if isinstance(string, compat_str):
c9c854ce 117 string = string.encode('utf-8')
55139679 118 bits = string.split(b'%')
c9c854ce 119 if len(bits) == 1:
120 return string
121 res = [bits[0]]
122 append = res.append
c9c854ce 123 for item in bits[1:]:
124 try:
55139679 125 append(compat_urllib_parse._hextochr[item[:2]])
c9c854ce 126 append(item[2:])
55139679 127 except KeyError:
c9c854ce 128 append(b'%')
129 append(item)
130 return b''.join(res)
131
a0f28f90 132 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
c9c854ce 133 """Replace %xx escapes by their single-character equivalent. The optional
134 encoding and errors parameters specify how to decode percent-encoded
135 sequences into Unicode characters, as accepted by the bytes.decode()
136 method.
137 By default, percent-encoded sequences are decoded with UTF-8, and invalid
138 sequences are replaced by a placeholder character.
139
140 unquote('abc%20def') -> 'abc def'.
141 """
c9c854ce 142 if '%' not in string:
143 string.split
144 return string
145 if encoding is None:
146 encoding = 'utf-8'
147 if errors is None:
148 errors = 'replace'
3cc8b4c3 149 bits = _asciire.split(string)
c9c854ce 150 res = [bits[0]]
151 append = res.append
152 for i in range(1, len(bits), 2):
55139679
S
153 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
154 append(bits[i + 1])
c9c854ce 155 return ''.join(res)
156
aa99aa4e
S
157 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
158 """Like unquote(), but also replace plus signs by spaces, as required for
159 unquoting HTML form values.
160
161 unquote_plus('%7e/abc+def') -> '~/abc def'
162 """
163 string = string.replace('+', ' ')
164 return compat_urllib_parse_unquote(string, encoding, errors)
165
0a67a363
YCH
166try:
167 from urllib.request import DataHandler as compat_urllib_request_DataHandler
168except ImportError: # Python < 3.4
169 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
170 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
171 def data_open(self, req):
172 # data URLs as specified in RFC 2397.
173 #
174 # ignores POSTed data
175 #
176 # syntax:
177 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
178 # mediatype := [ type "/" subtype ] *( ";" parameter )
179 # data := *urlchar
180 # parameter := attribute "=" value
181 url = req.get_full_url()
182
183 scheme, data = url.split(":", 1)
184 mediatype, data = data.split(",", 1)
185
186 # even base64 encoded data URLs might be quoted so unquote in any case:
187 data = compat_urllib_parse_unquote_to_bytes(data)
188 if mediatype.endswith(";base64"):
189 data = binascii.a2b_base64(data)
190 mediatype = mediatype[:-7]
191
192 if not mediatype:
193 mediatype = "text/plain;charset=US-ASCII"
194
195 headers = email.message_from_string(
196 "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
197
198 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
199
8f9312c3 200try:
0196149c 201 compat_basestring = basestring # Python 2
8f9312c3 202except NameError:
0196149c 203 compat_basestring = str
8f9312c3
PH
204
205try:
206 compat_chr = unichr # Python 2
207except NameError:
208 compat_chr = chr
209
210try:
211 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
212except ImportError: # Python 2.6
213 from xml.parsers.expat import ExpatError as compat_xml_parse_error
214
8c25f81b
PH
215
216try:
217 from urllib.parse import parse_qs as compat_parse_qs
5f6a1245 218except ImportError: # Python 2
8c25f81b
PH
219 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
220 # Python 2's version is apparently totally broken
221
222 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 223 encoding='utf-8', errors='replace'):
8f9312c3 224 qs, _coerce_result = qs, compat_str
8c25f81b
PH
225 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
226 r = []
227 for name_value in pairs:
228 if not name_value and not strict_parsing:
229 continue
230 nv = name_value.split('=', 1)
231 if len(nv) != 2:
232 if strict_parsing:
233 raise ValueError("bad query field: %r" % (name_value,))
234 # Handle case of a control-name with no equal sign
235 if keep_blank_values:
236 nv.append('')
237 else:
238 continue
239 if len(nv[1]) or keep_blank_values:
240 name = nv[0].replace('+', ' ')
241 name = compat_urllib_parse_unquote(
242 name, encoding=encoding, errors=errors)
243 name = _coerce_result(name)
244 value = nv[1].replace('+', ' ')
245 value = compat_urllib_parse_unquote(
246 value, encoding=encoding, errors=errors)
247 value = _coerce_result(value)
248 r.append((name, value))
249 return r
250
251 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
9e1a5b84 252 encoding='utf-8', errors='replace'):
8c25f81b
PH
253 parsed_result = {}
254 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
9e1a5b84 255 encoding=encoding, errors=errors)
8c25f81b
PH
256 for name, value in pairs:
257 if name in parsed_result:
258 parsed_result[name].append(value)
259 else:
260 parsed_result[name] = [value]
261 return parsed_result
262
8c25f81b
PH
263try:
264 from shlex import quote as shlex_quote
265except ImportError: # Python < 3.3
266 def shlex_quote(s):
7d4111ed
PH
267 if re.match(r'^[-_\w./]+$', s):
268 return s
269 else:
270 return "'" + s.replace("'", "'\"'\"'") + "'"
8c25f81b
PH
271
272
8df5ae15 273if sys.version_info >= (2, 7, 3):
51f579b6
S
274 compat_shlex_split = shlex.split
275else:
276 # Working around shlex issue with unicode strings on some python 2
277 # versions (see http://bugs.python.org/issue1548891)
278 def compat_shlex_split(s, comments=False, posix=True):
953fed28 279 if isinstance(s, compat_str):
51f579b6
S
280 s = s.encode('utf-8')
281 return shlex.split(s, comments, posix)
282
283
8c25f81b 284def compat_ord(c):
5f6a1245
JW
285 if type(c) is int:
286 return c
287 else:
288 return ord(c)
8c25f81b
PH
289
290
291if sys.version_info >= (3, 0):
292 compat_getenv = os.getenv
293 compat_expanduser = os.path.expanduser
294else:
295 # Environment variables should be decoded with filesystem encoding.
296 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
297
298 def compat_getenv(key, default=None):
299 from .utils import get_filesystem_encoding
300 env = os.getenv(key, default)
301 if env:
302 env = env.decode(get_filesystem_encoding())
303 return env
304
305 # HACK: The default implementations of os.path.expanduser from cpython do not decode
306 # environment variables with filesystem encoding. We will work around this by
307 # providing adjusted implementations.
308 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
309 # for different platforms with correct environment variables decoding.
310
311 if os.name == 'posix':
312 def compat_expanduser(path):
313 """Expand ~ and ~user constructions. If user or $HOME is unknown,
314 do nothing."""
315 if not path.startswith('~'):
316 return path
317 i = path.find('/', 1)
318 if i < 0:
319 i = len(path)
320 if i == 1:
321 if 'HOME' not in os.environ:
322 import pwd
323 userhome = pwd.getpwuid(os.getuid()).pw_dir
324 else:
325 userhome = compat_getenv('HOME')
326 else:
327 import pwd
328 try:
329 pwent = pwd.getpwnam(path[1:i])
330 except KeyError:
331 return path
332 userhome = pwent.pw_dir
333 userhome = userhome.rstrip('/')
334 return (userhome + path[i:]) or '/'
335 elif os.name == 'nt' or os.name == 'ce':
336 def compat_expanduser(path):
337 """Expand ~ and ~user constructs.
338
339 If user or $HOME is unknown, do nothing."""
340 if path[:1] != '~':
341 return path
342 i, n = 1, len(path)
343 while i < n and path[i] not in '/\\':
344 i = i + 1
345
346 if 'HOME' in os.environ:
347 userhome = compat_getenv('HOME')
348 elif 'USERPROFILE' in os.environ:
349 userhome = compat_getenv('USERPROFILE')
83e865a3 350 elif 'HOMEPATH' not in os.environ:
8c25f81b
PH
351 return path
352 else:
353 try:
354 drive = compat_getenv('HOMEDRIVE')
355 except KeyError:
356 drive = ''
357 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
358
5f6a1245 359 if i != 1: # ~user
8c25f81b
PH
360 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
361
362 return userhome + path[i:]
363 else:
364 compat_expanduser = os.path.expanduser
365
366
367if sys.version_info < (3, 0):
368 def compat_print(s):
369 from .utils import preferredencoding
370 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
371else:
372 def compat_print(s):
b061ea6e 373 assert isinstance(s, compat_str)
8c25f81b
PH
374 print(s)
375
376
377try:
378 subprocess_check_output = subprocess.check_output
379except AttributeError:
380 def subprocess_check_output(*args, **kwargs):
381 assert 'input' not in kwargs
382 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
383 output, _ = p.communicate()
384 ret = p.poll()
385 if ret:
386 raise subprocess.CalledProcessError(ret, p.args, output=output)
387 return output
388
389if sys.version_info < (3, 0) and sys.platform == 'win32':
390 def compat_getpass(prompt, *args, **kwargs):
391 if isinstance(prompt, compat_str):
baa70803 392 from .utils import preferredencoding
8c25f81b
PH
393 prompt = prompt.encode(preferredencoding())
394 return getpass.getpass(prompt, *args, **kwargs)
395else:
396 compat_getpass = getpass.getpass
397
c7b0add8
PH
398# Old 2.6 and 2.7 releases require kwargs to be bytes
399try:
c6973bd4
PH
400 def _testfunc(x):
401 pass
402 _testfunc(**{'x': 0})
c7b0add8
PH
403except TypeError:
404 def compat_kwargs(kwargs):
405 return dict((bytes(k), v) for k, v in kwargs.items())
406else:
407 compat_kwargs = lambda kwargs: kwargs
8c25f81b 408
e07e9313 409
be4a824d
PH
410if sys.version_info < (2, 7):
411 def compat_socket_create_connection(address, timeout, source_address=None):
412 host, port = address
413 err = None
414 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
415 af, socktype, proto, canonname, sa = res
416 sock = None
417 try:
418 sock = socket.socket(af, socktype, proto)
419 sock.settimeout(timeout)
420 if source_address:
421 sock.bind(source_address)
422 sock.connect(sa)
423 return sock
424 except socket.error as _:
425 err = _
426 if sock is not None:
427 sock.close()
428 if err is not None:
429 raise err
430 else:
8ad6b5ed 431 raise socket.error("getaddrinfo returns an empty list")
be4a824d
PH
432else:
433 compat_socket_create_connection = socket.create_connection
434
435
e07e9313
PH
436# Fix https://github.com/rg3/youtube-dl/issues/4223
437# See http://bugs.python.org/issue9161 for what is broken
438def workaround_optparse_bug9161():
07e378fa
PH
439 op = optparse.OptionParser()
440 og = optparse.OptionGroup(op, 'foo')
e07e9313 441 try:
07e378fa 442 og.add_option('-t')
b244b5c3 443 except TypeError:
e07e9313
PH
444 real_add_option = optparse.OptionGroup.add_option
445
446 def _compat_add_option(self, *args, **kwargs):
447 enc = lambda v: (
448 v.encode('ascii', 'replace') if isinstance(v, compat_str)
449 else v)
450 bargs = [enc(a) for a in args]
451 bkwargs = dict(
452 (k, enc(v)) for k, v in kwargs.items())
453 return real_add_option(self, *bargs, **bkwargs)
454 optparse.OptionGroup.add_option = _compat_add_option
455
003c69a8
JMF
456if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
457 compat_get_terminal_size = shutil.get_terminal_size
458else:
459 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
460
13118a50 461 def compat_get_terminal_size(fallback=(80, 24)):
4810c48d 462 columns = compat_getenv('COLUMNS')
003c69a8
JMF
463 if columns:
464 columns = int(columns)
465 else:
466 columns = None
4810c48d 467 lines = compat_getenv('LINES')
003c69a8
JMF
468 if lines:
469 lines = int(lines)
470 else:
471 lines = None
472
4810c48d 473 if columns is None or lines is None or columns <= 0 or lines <= 0:
13118a50
YCH
474 try:
475 sp = subprocess.Popen(
476 ['stty', 'size'],
477 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
478 out, err = sp.communicate()
f2dbc540 479 _lines, _columns = map(int, out.split())
13118a50
YCH
480 except Exception:
481 _columns, _lines = _terminal_size(*fallback)
482
4810c48d 483 if columns is None or columns <= 0:
13118a50 484 columns = _columns
4810c48d 485 if lines is None or lines <= 0:
13118a50 486 lines = _lines
003c69a8
JMF
487 return _terminal_size(columns, lines)
488
a0e060ac
YCH
489try:
490 itertools.count(start=0, step=1)
491 compat_itertools_count = itertools.count
492except TypeError: # Python 2.6
493 def compat_itertools_count(start=0, step=1):
494 n = start
495 while True:
496 yield n
497 n += step
e07e9313 498
67134eab
JMF
499if sys.version_info >= (3, 0):
500 from tokenize import tokenize as compat_tokenize_tokenize
501else:
502 from tokenize import generate_tokens as compat_tokenize_tokenize
e07e9313 503
8c25f81b
PH
504__all__ = [
505 'compat_HTTPError',
0196149c 506 'compat_basestring',
8c25f81b
PH
507 'compat_chr',
508 'compat_cookiejar',
799207e8 509 'compat_cookies',
8c25f81b 510 'compat_expanduser',
003c69a8 511 'compat_get_terminal_size',
8c25f81b
PH
512 'compat_getenv',
513 'compat_getpass',
514 'compat_html_entities',
8c25f81b 515 'compat_http_client',
83fda3c0 516 'compat_http_server',
a0e060ac 517 'compat_itertools_count',
c7b0add8 518 'compat_kwargs',
8c25f81b
PH
519 'compat_ord',
520 'compat_parse_qs',
521 'compat_print',
51f579b6 522 'compat_shlex_split',
be4a824d 523 'compat_socket_create_connection',
987493ae 524 'compat_str',
8c25f81b 525 'compat_subprocess_get_DEVNULL',
67134eab 526 'compat_tokenize_tokenize',
8c25f81b
PH
527 'compat_urllib_error',
528 'compat_urllib_parse',
529 'compat_urllib_parse_unquote',
aa99aa4e 530 'compat_urllib_parse_unquote_plus',
9fefc886 531 'compat_urllib_parse_unquote_to_bytes',
8c25f81b
PH
532 'compat_urllib_parse_urlparse',
533 'compat_urllib_request',
0a67a363
YCH
534 'compat_urllib_request_DataHandler',
535 'compat_urllib_response',
8c25f81b
PH
536 'compat_urlparse',
537 'compat_urlretrieve',
538 'compat_xml_parse_error',
539 'shlex_quote',
540 'subprocess_check_output',
e07e9313 541 'workaround_optparse_bug9161',
8c25f81b 542]