]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[utils] Add sanitize_path
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
912b38b4 6import calendar
676eb3f2 7import codecs
62e609ab 8import contextlib
e3946f98 9import ctypes
c496ca96
PH
10import datetime
11import email.utils
f45c185f 12import errno
be4a824d 13import functools
d77c3dfd 14import gzip
b7ab0590 15import itertools
03f9daab 16import io
f4bfd65f 17import json
d77c3dfd 18import locale
02dbf93f 19import math
347de493 20import operator
d77c3dfd 21import os
4eb7f1d1 22import pipes
c496ca96 23import platform
d77c3dfd 24import re
13ebea79 25import ssl
c496ca96 26import socket
b53466e1 27import struct
1c088fa8 28import subprocess
d77c3dfd 29import sys
181c8655 30import tempfile
01951dda 31import traceback
bcf89ce6 32import xml.etree.ElementTree
d77c3dfd 33import zlib
d77c3dfd 34
8c25f81b 35from .compat import (
8f9312c3 36 compat_basestring,
8c25f81b 37 compat_chr,
8c25f81b 38 compat_html_entities,
be4a824d 39 compat_http_client,
8c25f81b 40 compat_parse_qs,
be4a824d 41 compat_socket_create_connection,
8c25f81b
PH
42 compat_str,
43 compat_urllib_error,
44 compat_urllib_parse,
45 compat_urllib_parse_urlparse,
46 compat_urllib_request,
47 compat_urlparse,
7d4111ed 48 shlex_quote,
8c25f81b 49)
4644ac55
S
50
51
468e2e92
FV
52# This is not clearly defined otherwise
53compiled_regex_type = type(re.compile(''))
54
3e669f36 55std_headers = {
18313934 56 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
57 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
58 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59 'Accept-Encoding': 'gzip, deflate',
60 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 61}
f427df17 62
5f6a1245 63
7105440c
YCH
64ENGLISH_MONTH_NAMES = [
65 'January', 'February', 'March', 'April', 'May', 'June',
66 'July', 'August', 'September', 'October', 'November', 'December']
67
68
d77c3dfd 69def preferredencoding():
59ae15a5 70 """Get preferred encoding.
d77c3dfd 71
59ae15a5
PH
72 Returns the best encoding scheme for the system, based on
73 locale.getpreferredencoding() and some further tweaks.
74 """
75 try:
76 pref = locale.getpreferredencoding()
28e614de 77 'TEST'.encode(pref)
59ae15a5
PH
78 except:
79 pref = 'UTF-8'
bae611f2 80
59ae15a5 81 return pref
d77c3dfd 82
f4bfd65f 83
181c8655 84def write_json_file(obj, fn):
1394646a 85 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 86
92120217 87 fn = encodeFilename(fn)
61ee5aeb 88 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
89 encoding = get_filesystem_encoding()
90 # os.path.basename returns a bytes object, but NamedTemporaryFile
91 # will fail if the filename contains non ascii characters unless we
92 # use a unicode object
93 path_basename = lambda f: os.path.basename(fn).decode(encoding)
94 # the same for os.path.dirname
95 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
96 else:
97 path_basename = os.path.basename
98 path_dirname = os.path.dirname
99
73159f99
S
100 args = {
101 'suffix': '.tmp',
ec5f6016
JMF
102 'prefix': path_basename(fn) + '.',
103 'dir': path_dirname(fn),
73159f99
S
104 'delete': False,
105 }
106
181c8655
PH
107 # In Python 2.x, json.dump expects a bytestream.
108 # In Python 3.x, it writes to a character stream
109 if sys.version_info < (3, 0):
73159f99 110 args['mode'] = 'wb'
181c8655 111 else:
73159f99
S
112 args.update({
113 'mode': 'w',
114 'encoding': 'utf-8',
115 })
116
117 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
118
119 try:
120 with tf:
121 json.dump(obj, tf)
1394646a
IK
122 if sys.platform == 'win32':
123 # Need to remove existing file on Windows, else os.rename raises
124 # WindowsError or FileExistsError.
125 try:
126 os.unlink(fn)
127 except OSError:
128 pass
181c8655
PH
129 os.rename(tf.name, fn)
130 except:
131 try:
132 os.remove(tf.name)
133 except OSError:
134 pass
135 raise
136
137
138if sys.version_info >= (2, 7):
59ae56fa
PH
139 def find_xpath_attr(node, xpath, key, val):
140 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
141 assert re.match(r'^[a-zA-Z-]+$', key)
142 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
ab4ee31e 143 expr = xpath + "[@%s='%s']" % (key, val)
59ae56fa
PH
144 return node.find(expr)
145else:
146 def find_xpath_attr(node, xpath, key, val):
4eefbfdb
PH
147 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
148 # .//node does not match if a node is a direct child of . !
8f9312c3 149 if isinstance(xpath, compat_str):
4eefbfdb
PH
150 xpath = xpath.encode('ascii')
151
59ae56fa
PH
152 for f in node.findall(xpath):
153 if f.attrib.get(key) == val:
154 return f
155 return None
156
d7e66d39
JMF
157# On python2.6 the xml.etree.ElementTree.Element methods don't support
158# the namespace parameter
5f6a1245
JW
159
160
d7e66d39
JMF
161def xpath_with_ns(path, ns_map):
162 components = [c.split(':') for c in path.split('/')]
163 replaced = []
164 for c in components:
165 if len(c) == 1:
166 replaced.append(c[0])
167 else:
168 ns, tag = c
169 replaced.append('{%s}%s' % (ns_map[ns], tag))
170 return '/'.join(replaced)
171
d77c3dfd 172
bf0ff932 173def xpath_text(node, xpath, name=None, fatal=False):
d74bebd5
PH
174 if sys.version_info < (2, 7): # Crazy 2.6
175 xpath = xpath.encode('ascii')
176
bf0ff932 177 n = node.find(xpath)
42bdd9d0 178 if n is None or n.text is None:
bf0ff932
PH
179 if fatal:
180 name = xpath if name is None else name
181 raise ExtractorError('Could not find XML element %s' % name)
182 else:
183 return None
184 return n.text
185
186
9e6dd238 187def get_element_by_id(id, html):
43e8fafd
ND
188 """Return the content of the tag with the specified ID in the passed HTML document"""
189 return get_element_by_attribute("id", id, html)
190
12ea2f30 191
43e8fafd
ND
192def get_element_by_attribute(attribute, value, html):
193 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 194
38285056
PH
195 m = re.search(r'''(?xs)
196 <([a-zA-Z0-9:._-]+)
197 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
198 \s+%s=['"]?%s['"]?
199 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 \s*>
201 (?P<content>.*?)
202 </\1>
203 ''' % (re.escape(attribute), re.escape(value)), html)
204
205 if not m:
206 return None
207 res = m.group('content')
208
209 if res.startswith('"') or res.startswith("'"):
210 res = res[1:-1]
a921f407 211
38285056 212 return unescapeHTML(res)
a921f407 213
9e6dd238
FV
214
215def clean_html(html):
59ae15a5 216 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
217
218 if html is None: # Convenience for sanitizing descriptions etc.
219 return html
220
59ae15a5
PH
221 # Newline vs <br />
222 html = html.replace('\n', ' ')
6b3aef80
FV
223 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
224 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
225 # Strip html tags
226 html = re.sub('<.*?>', '', html)
227 # Replace html entities
228 html = unescapeHTML(html)
7decf895 229 return html.strip()
9e6dd238
FV
230
231
d77c3dfd 232def sanitize_open(filename, open_mode):
59ae15a5
PH
233 """Try to open the given filename, and slightly tweak it if this fails.
234
235 Attempts to open the given filename. If this fails, it tries to change
236 the filename slightly, step by step, until it's either able to open it
237 or it fails and raises a final exception, like the standard open()
238 function.
239
240 It returns the tuple (stream, definitive_file_name).
241 """
242 try:
28e614de 243 if filename == '-':
59ae15a5
PH
244 if sys.platform == 'win32':
245 import msvcrt
246 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 247 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
248 stream = open(encodeFilename(filename), open_mode)
249 return (stream, filename)
250 except (IOError, OSError) as err:
f45c185f
PH
251 if err.errno in (errno.EACCES,):
252 raise
59ae15a5 253
f45c185f
PH
254 # In case of error, try to remove win32 forbidden chars
255 alt_filename = os.path.join(
b74e86f4
PH
256 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
257 for path_part in os.path.split(filename)
258 )
f45c185f
PH
259 if alt_filename == filename:
260 raise
261 else:
262 # An exception here should be caught in the caller
263 stream = open(encodeFilename(filename), open_mode)
264 return (stream, alt_filename)
d77c3dfd
FV
265
266
267def timeconvert(timestr):
59ae15a5
PH
268 """Convert RFC 2822 defined time string into system timestamp"""
269 timestamp = None
270 timetuple = email.utils.parsedate_tz(timestr)
271 if timetuple is not None:
272 timestamp = email.utils.mktime_tz(timetuple)
273 return timestamp
1c469a94 274
5f6a1245 275
796173d0 276def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
277 """Sanitizes a string so it could be used as part of a filename.
278 If restricted is set, use a stricter subset of allowed characters.
796173d0 279 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
280 """
281 def replace_insane(char):
282 if char == '?' or ord(char) < 32 or ord(char) == 127:
283 return ''
284 elif char == '"':
285 return '' if restricted else '\''
286 elif char == ':':
287 return '_-' if restricted else ' -'
288 elif char in '\\/|*<>':
289 return '_'
627dcfff 290 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
291 return '_'
292 if restricted and ord(char) > 127:
293 return '_'
294 return char
295
2aeb06d6
PH
296 # Handle timestamps
297 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 298 result = ''.join(map(replace_insane, s))
796173d0
PH
299 if not is_id:
300 while '__' in result:
301 result = result.replace('__', '_')
302 result = result.strip('_')
303 # Common case of "Foreign band name - English song title"
304 if restricted and result.startswith('-_'):
305 result = result[2:]
5a42414b
PH
306 if result.startswith('-'):
307 result = '_' + result[len('-'):]
a7440261 308 result = result.lstrip('.')
796173d0
PH
309 if not result:
310 result = '_'
59ae15a5 311 return result
d77c3dfd 312
5f6a1245 313
a2aaf4db
S
314def sanitize_path(s):
315 """Sanitizes and normalizes path on Windows"""
316 if sys.platform != 'win32':
317 return s
318 drive, _ = os.path.splitdrive(s)
319 unc, _ = os.path.splitunc(s)
320 unc_or_drive = unc or drive
321 norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
322 if unc_or_drive:
323 norm_path.pop(0)
324 sanitized_path = [
325 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
326 for path_part in norm_path]
327 if unc_or_drive:
328 sanitized_path.insert(0, unc_or_drive + os.path.sep)
329 return os.path.join(*sanitized_path)
330
331
d77c3dfd 332def orderedSet(iterable):
59ae15a5
PH
333 """ Remove all duplicates from the input iterable """
334 res = []
335 for el in iterable:
336 if el not in res:
337 res.append(el)
338 return res
d77c3dfd 339
912b38b4 340
4e408e47
PH
341def _htmlentity_transform(entity):
342 """Transforms an HTML entity to a character."""
343 # Known non-numeric HTML entity
344 if entity in compat_html_entities.name2codepoint:
345 return compat_chr(compat_html_entities.name2codepoint[entity])
346
347 mobj = re.match(r'#(x?[0-9]+)', entity)
348 if mobj is not None:
349 numstr = mobj.group(1)
28e614de 350 if numstr.startswith('x'):
4e408e47 351 base = 16
28e614de 352 numstr = '0%s' % numstr
4e408e47
PH
353 else:
354 base = 10
355 return compat_chr(int(numstr, base))
356
357 # Unknown entity in name, return its literal representation
28e614de 358 return ('&%s;' % entity)
4e408e47
PH
359
360
d77c3dfd 361def unescapeHTML(s):
912b38b4
PH
362 if s is None:
363 return None
364 assert type(s) == compat_str
d77c3dfd 365
4e408e47
PH
366 return re.sub(
367 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 368
8bf48f23
PH
369
370def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
371 """
372 @param s The name of the file
373 """
d77c3dfd 374
8bf48f23 375 assert type(s) == compat_str
d77c3dfd 376
59ae15a5
PH
377 # Python 3 has a Unicode API
378 if sys.version_info >= (3, 0):
379 return s
0f00efed 380
59ae15a5 381 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
28e614de 382 # Pass '' directly to use Unicode APIs on Windows 2000 and up
59ae15a5
PH
383 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
384 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
385 if not for_subprocess:
386 return s
387 else:
388 # For subprocess calls, encode with locale encoding
389 # Refer to http://stackoverflow.com/a/9951851/35070
390 encoding = preferredencoding()
59ae15a5 391 else:
6df40dcb 392 encoding = sys.getfilesystemencoding()
8bf48f23
PH
393 if encoding is None:
394 encoding = 'utf-8'
395 return s.encode(encoding, 'ignore')
396
f07b74fc
PH
397
398def encodeArgument(s):
399 if not isinstance(s, compat_str):
400 # Legacy code that uses byte strings
401 # Uncomment the following line after fixing all post processors
7af808a5 402 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
403 s = s.decode('ascii')
404 return encodeFilename(s, True)
405
406
8271226a
PH
407def decodeOption(optval):
408 if optval is None:
409 return optval
410 if isinstance(optval, bytes):
411 optval = optval.decode(preferredencoding())
412
413 assert isinstance(optval, compat_str)
414 return optval
1c256f70 415
5f6a1245 416
4539dd30
PH
417def formatSeconds(secs):
418 if secs > 3600:
419 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
420 elif secs > 60:
421 return '%d:%02d' % (secs // 60, secs % 60)
422 else:
423 return '%d' % secs
424
a0ddb8a2 425
be4a824d
PH
426def make_HTTPS_handler(params, **kwargs):
427 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 428 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 429 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 430 if opts_no_check_certificate:
be5f2c19 431 context.check_hostname = False
0db261ba 432 context.verify_mode = ssl.CERT_NONE
a2366922 433 try:
be4a824d 434 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
435 except TypeError:
436 # Python 2.7.8
437 # (create_default_context present but HTTPSHandler has no context=)
438 pass
439
440 if sys.version_info < (3, 2):
d7932313 441 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 442 else: # Python < 3.4
d7932313 443 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 444 context.verify_mode = (ssl.CERT_NONE
dca08720 445 if opts_no_check_certificate
ea6d901e 446 else ssl.CERT_REQUIRED)
303b479e 447 context.set_default_verify_paths()
be4a824d 448 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 449
732ea2f0 450
1c256f70
PH
451class ExtractorError(Exception):
452 """Error during info extraction."""
5f6a1245 453
d11271dd 454 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
455 """ tb, if given, is the original traceback (so that it can be printed out).
456 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
457 """
458
459 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
460 expected = True
d11271dd
PH
461 if video_id is not None:
462 msg = video_id + ': ' + msg
410f3e73 463 if cause:
28e614de 464 msg += ' (caused by %r)' % cause
9a82b238 465 if not expected:
732ea2f0
PH
466 if ytdl_is_updateable():
467 update_cmd = 'type youtube-dl -U to update'
468 else:
469 update_cmd = 'see https://yt-dl.org/update on how to update'
470 msg += '; please report this issue on https://yt-dl.org/bug .'
471 msg += ' Make sure you are using the latest version; %s.' % update_cmd
472 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
1c256f70 473 super(ExtractorError, self).__init__(msg)
d5979c5d 474
1c256f70 475 self.traceback = tb
8cc83b8d 476 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 477 self.cause = cause
d11271dd 478 self.video_id = video_id
1c256f70 479
01951dda
PH
480 def format_traceback(self):
481 if self.traceback is None:
482 return None
28e614de 483 return ''.join(traceback.format_tb(self.traceback))
01951dda 484
1c256f70 485
416c7fcb
PH
486class UnsupportedError(ExtractorError):
487 def __init__(self, url):
488 super(UnsupportedError, self).__init__(
489 'Unsupported URL: %s' % url, expected=True)
490 self.url = url
491
492
55b3e45b
JMF
493class RegexNotFoundError(ExtractorError):
494 """Error when a regex didn't match"""
495 pass
496
497
d77c3dfd 498class DownloadError(Exception):
59ae15a5 499 """Download Error exception.
d77c3dfd 500
59ae15a5
PH
501 This exception may be thrown by FileDownloader objects if they are not
502 configured to continue on errors. They will contain the appropriate
503 error message.
504 """
5f6a1245 505
8cc83b8d
FV
506 def __init__(self, msg, exc_info=None):
507 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
508 super(DownloadError, self).__init__(msg)
509 self.exc_info = exc_info
d77c3dfd
FV
510
511
512class SameFileError(Exception):
59ae15a5 513 """Same File exception.
d77c3dfd 514
59ae15a5
PH
515 This exception will be thrown by FileDownloader objects if they detect
516 multiple files would have to be downloaded to the same file on disk.
517 """
518 pass
d77c3dfd
FV
519
520
521class PostProcessingError(Exception):
59ae15a5 522 """Post Processing exception.
d77c3dfd 523
59ae15a5
PH
524 This exception may be raised by PostProcessor's .run() method to
525 indicate an error in the postprocessing task.
526 """
5f6a1245 527
7851b379
PH
528 def __init__(self, msg):
529 self.msg = msg
d77c3dfd 530
5f6a1245 531
d77c3dfd 532class MaxDownloadsReached(Exception):
59ae15a5
PH
533 """ --max-downloads limit has been reached. """
534 pass
d77c3dfd
FV
535
536
537class UnavailableVideoError(Exception):
59ae15a5 538 """Unavailable Format exception.
d77c3dfd 539
59ae15a5
PH
540 This exception will be thrown when a video is requested
541 in a format that is not available for that video.
542 """
543 pass
d77c3dfd
FV
544
545
546class ContentTooShortError(Exception):
59ae15a5 547 """Content Too Short exception.
d77c3dfd 548
59ae15a5
PH
549 This exception may be raised by FileDownloader objects when a file they
550 download is too small for what the server announced first, indicating
551 the connection was probably interrupted.
552 """
553 # Both in bytes
554 downloaded = None
555 expected = None
d77c3dfd 556
59ae15a5
PH
557 def __init__(self, downloaded, expected):
558 self.downloaded = downloaded
559 self.expected = expected
d77c3dfd 560
5f6a1245 561
c5a59d93 562def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
be4a824d
PH
563 hc = http_class(*args, **kwargs)
564 source_address = ydl_handler._params.get('source_address')
565 if source_address is not None:
566 sa = (source_address, 0)
567 if hasattr(hc, 'source_address'): # Python 2.7+
568 hc.source_address = sa
569 else: # Python 2.6
570 def _hc_connect(self, *args, **kwargs):
571 sock = compat_socket_create_connection(
572 (self.host, self.port), self.timeout, sa)
573 if is_https:
d7932313
PH
574 self.sock = ssl.wrap_socket(
575 sock, self.key_file, self.cert_file,
576 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
577 else:
578 self.sock = sock
579 hc.connect = functools.partial(_hc_connect, hc)
580
581 return hc
582
583
acebc9cd 584class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
585 """Handler for HTTP requests and responses.
586
587 This class, when installed with an OpenerDirector, automatically adds
588 the standard headers to every HTTP request and handles gzipped and
589 deflated responses from web servers. If compression is to be avoided in
590 a particular request, the original request in the program code only has
591 to include the HTTP header "Youtubedl-No-Compression", which will be
592 removed before making the real request.
593
594 Part of this code was copied from:
595
596 http://techknack.net/python-urllib2-handlers/
597
598 Andrew Rowls, the author of that code, agreed to release it to the
599 public domain.
600 """
601
be4a824d
PH
602 def __init__(self, params, *args, **kwargs):
603 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
604 self._params = params
605
606 def http_open(self, req):
607 return self.do_open(functools.partial(
c5a59d93 608 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
609 req)
610
59ae15a5
PH
611 @staticmethod
612 def deflate(data):
613 try:
614 return zlib.decompress(data, -zlib.MAX_WBITS)
615 except zlib.error:
616 return zlib.decompress(data)
617
618 @staticmethod
619 def addinfourl_wrapper(stream, headers, url, code):
620 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
621 return compat_urllib_request.addinfourl(stream, headers, url, code)
622 ret = compat_urllib_request.addinfourl(stream, headers, url)
623 ret.code = code
624 return ret
625
acebc9cd 626 def http_request(self, req):
33ac271b 627 for h, v in std_headers.items():
3d5f7a39
JK
628 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
629 # The dict keys are capitalized because of this bug by urllib
630 if h.capitalize() not in req.headers:
33ac271b 631 req.add_header(h, v)
59ae15a5
PH
632 if 'Youtubedl-no-compression' in req.headers:
633 if 'Accept-encoding' in req.headers:
634 del req.headers['Accept-encoding']
635 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
636
637 if sys.version_info < (2, 7) and '#' in req.get_full_url():
638 # Python 2.6 is brain-dead when it comes to fragments
639 req._Request__original = req._Request__original.partition('#')[0]
640 req._Request__r_type = req._Request__r_type.partition('#')[0]
641
59ae15a5
PH
642 return req
643
acebc9cd 644 def http_response(self, req, resp):
59ae15a5
PH
645 old_resp = resp
646 # gzip
647 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
648 content = resp.read()
649 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
650 try:
651 uncompressed = io.BytesIO(gz.read())
652 except IOError as original_ioerror:
653 # There may be junk add the end of the file
654 # See http://stackoverflow.com/q/4928560/35070 for details
655 for i in range(1, 1024):
656 try:
657 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
658 uncompressed = io.BytesIO(gz.read())
659 except IOError:
660 continue
661 break
662 else:
663 raise original_ioerror
664 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
665 resp.msg = old_resp.msg
666 # deflate
667 if resp.headers.get('Content-encoding', '') == 'deflate':
668 gz = io.BytesIO(self.deflate(resp.read()))
669 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
670 resp.msg = old_resp.msg
671 return resp
0f8d03f8 672
acebc9cd
PH
673 https_request = http_request
674 https_response = http_response
bf50b038 675
5de90176 676
be4a824d
PH
677class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
678 def __init__(self, params, https_conn_class=None, *args, **kwargs):
679 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
680 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
681 self._params = params
682
683 def https_open(self, req):
4f264c02
JMF
684 kwargs = {}
685 if hasattr(self, '_context'): # python > 2.6
686 kwargs['context'] = self._context
687 if hasattr(self, '_check_hostname'): # python 3.x
688 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
689 return self.do_open(functools.partial(
690 _create_http_connection, self, self._https_conn_class, True),
4f264c02 691 req, **kwargs)
be4a824d
PH
692
693
08b38d54 694def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
695 """ Return a UNIX timestamp from the given date """
696
697 if date_str is None:
698 return None
699
08b38d54
PH
700 if timezone is None:
701 m = re.search(
702 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
703 date_str)
704 if not m:
912b38b4
PH
705 timezone = datetime.timedelta()
706 else:
08b38d54
PH
707 date_str = date_str[:-len(m.group(0))]
708 if not m.group('sign'):
709 timezone = datetime.timedelta()
710 else:
711 sign = 1 if m.group('sign') == '+' else -1
712 timezone = datetime.timedelta(
713 hours=sign * int(m.group('hours')),
714 minutes=sign * int(m.group('minutes')))
6ad4013d 715 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 716 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
717 return calendar.timegm(dt.timetuple())
718
719
42bdd9d0 720def unified_strdate(date_str, day_first=True):
bf50b038 721 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
722
723 if date_str is None:
724 return None
bf50b038 725 upload_date = None
5f6a1245 726 # Replace commas
026fcc04 727 date_str = date_str.replace(',', ' ')
bf50b038 728 # %z (UTC offset) is only supported in python>=3.2
026fcc04 729 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 730 # Remove AM/PM + timezone
9bb8e0a3 731 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 732
19e1d359
JMF
733 format_expressions = [
734 '%d %B %Y',
0f99566c 735 '%d %b %Y',
19e1d359
JMF
736 '%B %d %Y',
737 '%b %d %Y',
78ff59d0
PP
738 '%b %dst %Y %I:%M%p',
739 '%b %dnd %Y %I:%M%p',
740 '%b %dth %Y %I:%M%p',
a69801e2 741 '%Y %m %d',
19e1d359 742 '%Y-%m-%d',
fe556f1b 743 '%Y/%m/%d',
19e1d359 744 '%Y/%m/%d %H:%M:%S',
5d73273f 745 '%Y-%m-%d %H:%M:%S',
e9be9a6a 746 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 747 '%d.%m.%Y %H:%M',
b047de6f 748 '%d.%m.%Y %H.%M',
19e1d359 749 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
750 '%Y-%m-%dT%H:%M:%S.%fZ',
751 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 752 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 753 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 754 '%Y-%m-%dT%H:%M',
19e1d359 755 ]
42bdd9d0
PH
756 if day_first:
757 format_expressions.extend([
776dc399
S
758 '%d.%m.%Y',
759 '%d/%m/%Y',
760 '%d/%m/%y',
42bdd9d0
PH
761 '%d/%m/%Y %H:%M:%S',
762 ])
763 else:
764 format_expressions.extend([
776dc399
S
765 '%m.%d.%Y',
766 '%m/%d/%Y',
767 '%m/%d/%y',
42bdd9d0
PH
768 '%m/%d/%Y %H:%M:%S',
769 ])
bf50b038
JMF
770 for expression in format_expressions:
771 try:
772 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 773 except ValueError:
bf50b038 774 pass
42393ce2
PH
775 if upload_date is None:
776 timetuple = email.utils.parsedate_tz(date_str)
777 if timetuple:
778 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
779 return upload_date
780
5f6a1245 781
28e614de 782def determine_ext(url, default_ext='unknown_video'):
f4776371
S
783 if url is None:
784 return default_ext
28e614de 785 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
786 if re.match(r'^[A-Za-z0-9]+$', guess):
787 return guess
788 else:
cbdbb766 789 return default_ext
73e79f2a 790
5f6a1245 791
d4051a8e 792def subtitles_filename(filename, sub_lang, sub_format):
28e614de 793 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 794
5f6a1245 795
bd558525 796def date_from_str(date_str):
37254abc
JMF
797 """
798 Return a datetime object from a string in the format YYYYMMDD or
799 (now|today)[+-][0-9](day|week|month|year)(s)?"""
800 today = datetime.date.today()
f8795e10 801 if date_str in ('now', 'today'):
37254abc 802 return today
f8795e10
PH
803 if date_str == 'yesterday':
804 return today - datetime.timedelta(days=1)
37254abc
JMF
805 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
806 if match is not None:
807 sign = match.group('sign')
808 time = int(match.group('time'))
809 if sign == '-':
810 time = -time
811 unit = match.group('unit')
5f6a1245 812 # A bad aproximation?
37254abc
JMF
813 if unit == 'month':
814 unit = 'day'
815 time *= 30
816 elif unit == 'year':
817 unit = 'day'
818 time *= 365
819 unit += 's'
820 delta = datetime.timedelta(**{unit: time})
821 return today + delta
bd558525 822 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
823
824
e63fc1be 825def hyphenate_date(date_str):
826 """
827 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
828 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
829 if match is not None:
830 return '-'.join(match.groups())
831 else:
832 return date_str
833
5f6a1245 834
bd558525
JMF
835class DateRange(object):
836 """Represents a time interval between two dates"""
5f6a1245 837
bd558525
JMF
838 def __init__(self, start=None, end=None):
839 """start and end must be strings in the format accepted by date"""
840 if start is not None:
841 self.start = date_from_str(start)
842 else:
843 self.start = datetime.datetime.min.date()
844 if end is not None:
845 self.end = date_from_str(end)
846 else:
847 self.end = datetime.datetime.max.date()
37254abc 848 if self.start > self.end:
bd558525 849 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 850
bd558525
JMF
851 @classmethod
852 def day(cls, day):
853 """Returns a range that only contains the given day"""
5f6a1245
JW
854 return cls(day, day)
855
bd558525
JMF
856 def __contains__(self, date):
857 """Check if the date is in the range"""
37254abc
JMF
858 if not isinstance(date, datetime.date):
859 date = date_from_str(date)
860 return self.start <= date <= self.end
5f6a1245 861
bd558525 862 def __str__(self):
5f6a1245 863 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
864
865
866def platform_name():
867 """ Returns the platform name as a compat_str """
868 res = platform.platform()
869 if isinstance(res, bytes):
870 res = res.decode(preferredencoding())
871
872 assert isinstance(res, compat_str)
873 return res
c257baff
PH
874
875
b58ddb32
PH
876def _windows_write_string(s, out):
877 """ Returns True if the string was written using special methods,
878 False if it has yet to be written out."""
879 # Adapted from http://stackoverflow.com/a/3259271/35070
880
881 import ctypes
882 import ctypes.wintypes
883
884 WIN_OUTPUT_IDS = {
885 1: -11,
886 2: -12,
887 }
888
a383a98a
PH
889 try:
890 fileno = out.fileno()
891 except AttributeError:
892 # If the output stream doesn't have a fileno, it's virtual
893 return False
aa42e873
PH
894 except io.UnsupportedOperation:
895 # Some strange Windows pseudo files?
896 return False
b58ddb32
PH
897 if fileno not in WIN_OUTPUT_IDS:
898 return False
899
e2f89ec7 900 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 901 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 902 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
903 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
904
e2f89ec7 905 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
906 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
907 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 908 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
909 written = ctypes.wintypes.DWORD(0)
910
6ac4e806 911 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
912 FILE_TYPE_CHAR = 0x0002
913 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 914 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
915 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
916 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 917 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
918 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
919
920 def not_a_console(handle):
921 if handle == INVALID_HANDLE_VALUE or handle is None:
922 return True
8fb3ac36
PH
923 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
924 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
925
926 if not_a_console(h):
927 return False
928
d1b9c912
PH
929 def next_nonbmp_pos(s):
930 try:
931 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
932 except StopIteration:
933 return len(s)
934
935 while s:
936 count = min(next_nonbmp_pos(s), 1024)
937
b58ddb32 938 ret = WriteConsoleW(
d1b9c912 939 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
940 if ret == 0:
941 raise OSError('Failed to write string')
d1b9c912
PH
942 if not count: # We just wrote a non-BMP character
943 assert written.value == 2
944 s = s[1:]
945 else:
946 assert written.value > 0
947 s = s[written.value:]
b58ddb32
PH
948 return True
949
950
734f90bb 951def write_string(s, out=None, encoding=None):
7459e3a2
PH
952 if out is None:
953 out = sys.stderr
8bf48f23 954 assert type(s) == compat_str
7459e3a2 955
b58ddb32
PH
956 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
957 if _windows_write_string(s, out):
958 return
959
7459e3a2
PH
960 if ('b' in getattr(out, 'mode', '') or
961 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
962 byt = s.encode(encoding or preferredencoding(), 'ignore')
963 out.write(byt)
964 elif hasattr(out, 'buffer'):
965 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
966 byt = s.encode(enc, 'ignore')
967 out.buffer.write(byt)
968 else:
8bf48f23 969 out.write(s)
7459e3a2
PH
970 out.flush()
971
972
48ea9cea
PH
973def bytes_to_intlist(bs):
974 if not bs:
975 return []
976 if isinstance(bs[0], int): # Python 3
977 return list(bs)
978 else:
979 return [ord(c) for c in bs]
980
c257baff 981
cba892fa 982def intlist_to_bytes(xs):
983 if not xs:
984 return b''
eb4157fd 985 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
986
987
c1c9a79c
PH
988# Cross-platform file locking
989if sys.platform == 'win32':
990 import ctypes.wintypes
991 import msvcrt
992
993 class OVERLAPPED(ctypes.Structure):
994 _fields_ = [
995 ('Internal', ctypes.wintypes.LPVOID),
996 ('InternalHigh', ctypes.wintypes.LPVOID),
997 ('Offset', ctypes.wintypes.DWORD),
998 ('OffsetHigh', ctypes.wintypes.DWORD),
999 ('hEvent', ctypes.wintypes.HANDLE),
1000 ]
1001
1002 kernel32 = ctypes.windll.kernel32
1003 LockFileEx = kernel32.LockFileEx
1004 LockFileEx.argtypes = [
1005 ctypes.wintypes.HANDLE, # hFile
1006 ctypes.wintypes.DWORD, # dwFlags
1007 ctypes.wintypes.DWORD, # dwReserved
1008 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1009 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1010 ctypes.POINTER(OVERLAPPED) # Overlapped
1011 ]
1012 LockFileEx.restype = ctypes.wintypes.BOOL
1013 UnlockFileEx = kernel32.UnlockFileEx
1014 UnlockFileEx.argtypes = [
1015 ctypes.wintypes.HANDLE, # hFile
1016 ctypes.wintypes.DWORD, # dwReserved
1017 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1018 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1019 ctypes.POINTER(OVERLAPPED) # Overlapped
1020 ]
1021 UnlockFileEx.restype = ctypes.wintypes.BOOL
1022 whole_low = 0xffffffff
1023 whole_high = 0x7fffffff
1024
1025 def _lock_file(f, exclusive):
1026 overlapped = OVERLAPPED()
1027 overlapped.Offset = 0
1028 overlapped.OffsetHigh = 0
1029 overlapped.hEvent = 0
1030 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1031 handle = msvcrt.get_osfhandle(f.fileno())
1032 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1033 whole_low, whole_high, f._lock_file_overlapped_p):
1034 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1035
1036 def _unlock_file(f):
1037 assert f._lock_file_overlapped_p
1038 handle = msvcrt.get_osfhandle(f.fileno())
1039 if not UnlockFileEx(handle, 0,
1040 whole_low, whole_high, f._lock_file_overlapped_p):
1041 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1042
1043else:
1044 import fcntl
1045
1046 def _lock_file(f, exclusive):
2582bebe 1047 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1048
1049 def _unlock_file(f):
2582bebe 1050 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1051
1052
1053class locked_file(object):
1054 def __init__(self, filename, mode, encoding=None):
1055 assert mode in ['r', 'a', 'w']
1056 self.f = io.open(filename, mode, encoding=encoding)
1057 self.mode = mode
1058
1059 def __enter__(self):
1060 exclusive = self.mode != 'r'
1061 try:
1062 _lock_file(self.f, exclusive)
1063 except IOError:
1064 self.f.close()
1065 raise
1066 return self
1067
1068 def __exit__(self, etype, value, traceback):
1069 try:
1070 _unlock_file(self.f)
1071 finally:
1072 self.f.close()
1073
1074 def __iter__(self):
1075 return iter(self.f)
1076
1077 def write(self, *args):
1078 return self.f.write(*args)
1079
1080 def read(self, *args):
1081 return self.f.read(*args)
4eb7f1d1
JMF
1082
1083
4644ac55
S
1084def get_filesystem_encoding():
1085 encoding = sys.getfilesystemencoding()
1086 return encoding if encoding is not None else 'utf-8'
1087
1088
4eb7f1d1 1089def shell_quote(args):
a6a173c2 1090 quoted_args = []
4644ac55 1091 encoding = get_filesystem_encoding()
a6a173c2
JMF
1092 for a in args:
1093 if isinstance(a, bytes):
1094 # We may get a filename encoded with 'encodeFilename'
1095 a = a.decode(encoding)
1096 quoted_args.append(pipes.quote(a))
28e614de 1097 return ' '.join(quoted_args)
9d4660ca
PH
1098
1099
f4d96df0
PH
1100def takewhile_inclusive(pred, seq):
1101 """ Like itertools.takewhile, but include the latest evaluated element
1102 (the first element so that Not pred(e)) """
1103 for e in seq:
1104 yield e
1105 if not pred(e):
1106 return
1107
1108
9d4660ca
PH
1109def smuggle_url(url, data):
1110 """ Pass additional data in a URL for internal use. """
1111
1112 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1113 {'__youtubedl_smuggle': json.dumps(data)})
1114 return url + '#' + sdata
9d4660ca
PH
1115
1116
79f82953 1117def unsmuggle_url(smug_url, default=None):
83e865a3 1118 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1119 return smug_url, default
28e614de
PH
1120 url, _, sdata = smug_url.rpartition('#')
1121 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1122 data = json.loads(jsond)
1123 return url, data
02dbf93f
PH
1124
1125
02dbf93f
PH
1126def format_bytes(bytes):
1127 if bytes is None:
28e614de 1128 return 'N/A'
02dbf93f
PH
1129 if type(bytes) is str:
1130 bytes = float(bytes)
1131 if bytes == 0.0:
1132 exponent = 0
1133 else:
1134 exponent = int(math.log(bytes, 1024.0))
28e614de 1135 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1136 converted = float(bytes) / float(1024 ** exponent)
28e614de 1137 return '%.2f%s' % (converted, suffix)
f53c966a 1138
1c088fa8 1139
be64b5b0
PH
1140def parse_filesize(s):
1141 if s is None:
1142 return None
1143
1144 # The lower-case forms are of course incorrect and inofficial,
1145 # but we support those too
1146 _UNIT_TABLE = {
1147 'B': 1,
1148 'b': 1,
1149 'KiB': 1024,
1150 'KB': 1000,
1151 'kB': 1024,
1152 'Kb': 1000,
1153 'MiB': 1024 ** 2,
1154 'MB': 1000 ** 2,
1155 'mB': 1024 ** 2,
1156 'Mb': 1000 ** 2,
1157 'GiB': 1024 ** 3,
1158 'GB': 1000 ** 3,
1159 'gB': 1024 ** 3,
1160 'Gb': 1000 ** 3,
1161 'TiB': 1024 ** 4,
1162 'TB': 1000 ** 4,
1163 'tB': 1024 ** 4,
1164 'Tb': 1000 ** 4,
1165 'PiB': 1024 ** 5,
1166 'PB': 1000 ** 5,
1167 'pB': 1024 ** 5,
1168 'Pb': 1000 ** 5,
1169 'EiB': 1024 ** 6,
1170 'EB': 1000 ** 6,
1171 'eB': 1024 ** 6,
1172 'Eb': 1000 ** 6,
1173 'ZiB': 1024 ** 7,
1174 'ZB': 1000 ** 7,
1175 'zB': 1024 ** 7,
1176 'Zb': 1000 ** 7,
1177 'YiB': 1024 ** 8,
1178 'YB': 1000 ** 8,
1179 'yB': 1024 ** 8,
1180 'Yb': 1000 ** 8,
1181 }
1182
1183 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1184 m = re.match(
1185 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1186 if not m:
1187 return None
1188
4349c07d
PH
1189 num_str = m.group('num').replace(',', '.')
1190 mult = _UNIT_TABLE[m.group('unit')]
1191 return int(float(num_str) * mult)
be64b5b0
PH
1192
1193
caefb1de
PH
1194def month_by_name(name):
1195 """ Return the number of a month by (locale-independently) English name """
1196
caefb1de 1197 try:
7105440c
YCH
1198 return ENGLISH_MONTH_NAMES.index(name) + 1
1199 except ValueError:
1200 return None
1201
1202
1203def month_by_abbreviation(abbrev):
1204 """ Return the number of a month by (locale-independently) English
1205 abbreviations """
1206
1207 try:
1208 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1209 except ValueError:
1210 return None
18258362
JMF
1211
1212
5aafe895 1213def fix_xml_ampersands(xml_str):
18258362 1214 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1215 return re.sub(
1216 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1217 '&amp;',
5aafe895 1218 xml_str)
e3946f98
PH
1219
1220
1221def setproctitle(title):
8bf48f23 1222 assert isinstance(title, compat_str)
e3946f98
PH
1223 try:
1224 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1225 except OSError:
1226 return
6eefe533
PH
1227 title_bytes = title.encode('utf-8')
1228 buf = ctypes.create_string_buffer(len(title_bytes))
1229 buf.value = title_bytes
e3946f98 1230 try:
6eefe533 1231 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1232 except AttributeError:
1233 return # Strange libc, just skip this
d7dda168
PH
1234
1235
1236def remove_start(s, start):
1237 if s.startswith(start):
1238 return s[len(start):]
1239 return s
29eb5174
PH
1240
1241
2b9faf55
PH
1242def remove_end(s, end):
1243 if s.endswith(end):
1244 return s[:-len(end)]
1245 return s
1246
1247
29eb5174 1248def url_basename(url):
9b8aaeed 1249 path = compat_urlparse.urlparse(url).path
28e614de 1250 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1251
1252
1253class HEADRequest(compat_urllib_request.Request):
1254 def get_method(self):
1255 return "HEAD"
7217e148
PH
1256
1257
9732d77e 1258def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1259 if get_attr:
1260 if v is not None:
1261 v = getattr(v, get_attr, None)
9572013d
PH
1262 if v == '':
1263 v = None
9732d77e
PH
1264 return default if v is None else (int(v) * invscale // scale)
1265
9572013d 1266
40a90862
JMF
1267def str_or_none(v, default=None):
1268 return default if v is None else compat_str(v)
1269
9732d77e
PH
1270
1271def str_to_int(int_str):
48d4681e 1272 """ A more relaxed version of int_or_none """
9732d77e
PH
1273 if int_str is None:
1274 return None
28e614de 1275 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1276 return int(int_str)
608d11f5
PH
1277
1278
9732d77e
PH
1279def float_or_none(v, scale=1, invscale=1, default=None):
1280 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1281
1282
608d11f5 1283def parse_duration(s):
8f9312c3 1284 if not isinstance(s, compat_basestring):
608d11f5
PH
1285 return None
1286
ca7b3246
S
1287 s = s.strip()
1288
608d11f5 1289 m = re.match(
9d22a7df 1290 r'''(?ix)(?:P?T)?
e8df5cee
PH
1291 (?:
1292 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1293 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1294
3e675fab 1295 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
6a68bb57 1296 (?:
8f4b58d7
PH
1297 (?:
1298 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1299 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1300 )?
6a68bb57
PH
1301 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1302 )?
e8df5cee
PH
1303 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1304 )$''', s)
608d11f5
PH
1305 if not m:
1306 return None
e8df5cee
PH
1307 res = 0
1308 if m.group('only_mins'):
1309 return float_or_none(m.group('only_mins'), invscale=60)
1310 if m.group('only_hours'):
1311 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1312 if m.group('secs'):
1313 res += int(m.group('secs'))
3e675fab
PH
1314 if m.group('mins_reversed'):
1315 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1316 if m.group('mins'):
1317 res += int(m.group('mins')) * 60
e8df5cee
PH
1318 if m.group('hours'):
1319 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1320 if m.group('hours_reversed'):
1321 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1322 if m.group('days'):
1323 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1324 if m.group('ms'):
1325 res += float(m.group('ms'))
608d11f5 1326 return res
91d7d0b3
JMF
1327
1328
1329def prepend_extension(filename, ext):
5f6a1245 1330 name, real_ext = os.path.splitext(filename)
28e614de 1331 return '{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1332
1333
1334def check_executable(exe, args=[]):
1335 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1336 args can be a list of arguments for a short output (like -version) """
1337 try:
1338 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1339 except OSError:
1340 return False
1341 return exe
b7ab0590
PH
1342
1343
95807118 1344def get_exe_version(exe, args=['--version'],
cae97f65 1345 version_re=None, unrecognized='present'):
95807118
PH
1346 """ Returns the version of the specified executable,
1347 or False if the executable is not present """
1348 try:
cae97f65 1349 out, _ = subprocess.Popen(
95807118
PH
1350 [exe] + args,
1351 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1352 except OSError:
1353 return False
cae97f65
PH
1354 if isinstance(out, bytes): # Python 2.x
1355 out = out.decode('ascii', 'ignore')
1356 return detect_exe_version(out, version_re, unrecognized)
1357
1358
1359def detect_exe_version(output, version_re=None, unrecognized='present'):
1360 assert isinstance(output, compat_str)
1361 if version_re is None:
1362 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1363 m = re.search(version_re, output)
95807118
PH
1364 if m:
1365 return m.group(1)
1366 else:
1367 return unrecognized
1368
1369
b7ab0590 1370class PagedList(object):
dd26ced1
PH
1371 def __len__(self):
1372 # This is only useful for tests
1373 return len(self.getslice())
1374
9c44d242
PH
1375
1376class OnDemandPagedList(PagedList):
1377 def __init__(self, pagefunc, pagesize):
1378 self._pagefunc = pagefunc
1379 self._pagesize = pagesize
1380
b7ab0590
PH
1381 def getslice(self, start=0, end=None):
1382 res = []
1383 for pagenum in itertools.count(start // self._pagesize):
1384 firstid = pagenum * self._pagesize
1385 nextfirstid = pagenum * self._pagesize + self._pagesize
1386 if start >= nextfirstid:
1387 continue
1388
1389 page_results = list(self._pagefunc(pagenum))
1390
1391 startv = (
1392 start % self._pagesize
1393 if firstid <= start < nextfirstid
1394 else 0)
1395
1396 endv = (
1397 ((end - 1) % self._pagesize) + 1
1398 if (end is not None and firstid <= end <= nextfirstid)
1399 else None)
1400
1401 if startv != 0 or endv is not None:
1402 page_results = page_results[startv:endv]
1403 res.extend(page_results)
1404
1405 # A little optimization - if current page is not "full", ie. does
1406 # not contain page_size videos then we can assume that this page
1407 # is the last one - there are no more ids on further pages -
1408 # i.e. no need to query again.
1409 if len(page_results) + startv < self._pagesize:
1410 break
1411
1412 # If we got the whole page, but the next page is not interesting,
1413 # break out early as well
1414 if end == nextfirstid:
1415 break
1416 return res
81c2f20b
PH
1417
1418
9c44d242
PH
1419class InAdvancePagedList(PagedList):
1420 def __init__(self, pagefunc, pagecount, pagesize):
1421 self._pagefunc = pagefunc
1422 self._pagecount = pagecount
1423 self._pagesize = pagesize
1424
1425 def getslice(self, start=0, end=None):
1426 res = []
1427 start_page = start // self._pagesize
1428 end_page = (
1429 self._pagecount if end is None else (end // self._pagesize + 1))
1430 skip_elems = start - start_page * self._pagesize
1431 only_more = None if end is None else end - start
1432 for pagenum in range(start_page, end_page):
1433 page = list(self._pagefunc(pagenum))
1434 if skip_elems:
1435 page = page[skip_elems:]
1436 skip_elems = None
1437 if only_more is not None:
1438 if len(page) < only_more:
1439 only_more -= len(page)
1440 else:
1441 page = page[:only_more]
1442 res.extend(page)
1443 break
1444 res.extend(page)
1445 return res
1446
1447
81c2f20b 1448def uppercase_escape(s):
676eb3f2 1449 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1450 return re.sub(
a612753d 1451 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1452 lambda m: unicode_escape(m.group(0))[0],
1453 s)
b53466e1 1454
d05cfe06
S
1455
1456def escape_rfc3986(s):
1457 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1458 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1459 s = s.encode('utf-8')
ecc0c5ee 1460 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1461
1462
1463def escape_url(url):
1464 """Escape URL as suggested by RFC 3986"""
1465 url_parsed = compat_urllib_parse_urlparse(url)
1466 return url_parsed._replace(
1467 path=escape_rfc3986(url_parsed.path),
1468 params=escape_rfc3986(url_parsed.params),
1469 query=escape_rfc3986(url_parsed.query),
1470 fragment=escape_rfc3986(url_parsed.fragment)
1471 ).geturl()
1472
b53466e1 1473try:
28e614de 1474 struct.pack('!I', 0)
b53466e1
PH
1475except TypeError:
1476 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1477 def struct_pack(spec, *args):
1478 if isinstance(spec, compat_str):
1479 spec = spec.encode('ascii')
1480 return struct.pack(spec, *args)
1481
1482 def struct_unpack(spec, *args):
1483 if isinstance(spec, compat_str):
1484 spec = spec.encode('ascii')
1485 return struct.unpack(spec, *args)
1486else:
1487 struct_pack = struct.pack
1488 struct_unpack = struct.unpack
62e609ab
PH
1489
1490
1491def read_batch_urls(batch_fd):
1492 def fixup(url):
1493 if not isinstance(url, compat_str):
1494 url = url.decode('utf-8', 'replace')
28e614de 1495 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1496 if url.startswith(BOM_UTF8):
1497 url = url[len(BOM_UTF8):]
1498 url = url.strip()
1499 if url.startswith(('#', ';', ']')):
1500 return False
1501 return url
1502
1503 with contextlib.closing(batch_fd) as fd:
1504 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1505
1506
1507def urlencode_postdata(*args, **kargs):
1508 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1509
1510
0990305d
PH
1511try:
1512 etree_iter = xml.etree.ElementTree.Element.iter
1513except AttributeError: # Python <=2.6
1514 etree_iter = lambda n: n.findall('.//*')
1515
1516
bcf89ce6
PH
1517def parse_xml(s):
1518 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1519 def doctype(self, name, pubid, system):
1520 pass # Ignore doctypes
1521
1522 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1523 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1524 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1525 # Fix up XML parser in Python 2.x
1526 if sys.version_info < (3, 0):
1527 for n in etree_iter(tree):
1528 if n.text is not None:
1529 if not isinstance(n.text, compat_str):
1530 n.text = n.text.decode('utf-8')
1531 return tree
e68301af
PH
1532
1533
a1a530b0
PH
1534US_RATINGS = {
1535 'G': 0,
1536 'PG': 10,
1537 'PG-13': 13,
1538 'R': 16,
1539 'NC': 18,
1540}
fac55558
PH
1541
1542
146c80e2
S
1543def parse_age_limit(s):
1544 if s is None:
d838b1bd 1545 return None
146c80e2 1546 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1547 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1548
1549
fac55558 1550def strip_jsonp(code):
609a61e3
PH
1551 return re.sub(
1552 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1553
1554
e05f6939
PH
1555def js_to_json(code):
1556 def fix_kv(m):
e7b6d122
PH
1557 v = m.group(0)
1558 if v in ('true', 'false', 'null'):
1559 return v
1560 if v.startswith('"'):
1561 return v
1562 if v.startswith("'"):
1563 v = v[1:-1]
1564 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1565 '\\\\': '\\\\',
1566 "\\'": "'",
1567 '"': '\\"',
1568 }[m.group(0)], v)
1569 return '"%s"' % v
e05f6939
PH
1570
1571 res = re.sub(r'''(?x)
d305dd73
PH
1572 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1573 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1574 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939
PH
1575 ''', fix_kv, code)
1576 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1577 return res
1578
1579
478c2c61
PH
1580def qualities(quality_ids):
1581 """ Get a numeric quality value out of a list of possible values """
1582 def q(qid):
1583 try:
1584 return quality_ids.index(qid)
1585 except ValueError:
1586 return -1
1587 return q
1588
acd69589
PH
1589
1590DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1591
a020a0dc
PH
1592
1593def limit_length(s, length):
1594 """ Add ellipses to overly long strings """
1595 if s is None:
1596 return None
1597 ELLIPSES = '...'
1598 if len(s) > length:
1599 return s[:length - len(ELLIPSES)] + ELLIPSES
1600 return s
48844745
PH
1601
1602
1603def version_tuple(v):
5f9b8394 1604 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1605
1606
1607def is_outdated_version(version, limit, assume_new=True):
1608 if not version:
1609 return not assume_new
1610 try:
1611 return version_tuple(version) < version_tuple(limit)
1612 except ValueError:
1613 return not assume_new
732ea2f0
PH
1614
1615
1616def ytdl_is_updateable():
1617 """ Returns if youtube-dl can be updated with -U """
1618 from zipimport import zipimporter
1619
1620 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1621
1622
1623def args_to_str(args):
1624 # Get a short string representation for a subprocess command
1625 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1626
1627
c460bdd5
PH
1628def mimetype2ext(mt):
1629 _, _, res = mt.rpartition('/')
1630
1631 return {
1632 'x-ms-wmv': 'wmv',
1633 'x-mp4-fragmented': 'mp4',
1634 }.get(res, res)
1635
1636
2ccd1b10
PH
1637def urlhandle_detect_ext(url_handle):
1638 try:
1639 url_handle.headers
1640 getheader = lambda h: url_handle.headers[h]
1641 except AttributeError: # Python < 3
1642 getheader = url_handle.info().getheader
1643
b55ee18f
PH
1644 cd = getheader('Content-Disposition')
1645 if cd:
1646 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1647 if m:
1648 e = determine_ext(m.group('filename'), default_ext=None)
1649 if e:
1650 return e
1651
c460bdd5 1652 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1653
1654
1655def age_restricted(content_limit, age_limit):
1656 """ Returns True iff the content should be blocked """
1657
1658 if age_limit is None: # No limit set
1659 return False
1660 if content_limit is None:
1661 return False # Content available for everyone
1662 return age_limit < content_limit
61ca9a80
PH
1663
1664
1665def is_html(first_bytes):
1666 """ Detect whether a file contains HTML by examining its first bytes. """
1667
1668 BOMS = [
1669 (b'\xef\xbb\xbf', 'utf-8'),
1670 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1671 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1672 (b'\xff\xfe', 'utf-16-le'),
1673 (b'\xfe\xff', 'utf-16-be'),
1674 ]
1675 for bom, enc in BOMS:
1676 if first_bytes.startswith(bom):
1677 s = first_bytes[len(bom):].decode(enc, 'replace')
1678 break
1679 else:
1680 s = first_bytes.decode('utf-8', 'replace')
1681
1682 return re.match(r'^\s*<', s)
a055469f
PH
1683
1684
1685def determine_protocol(info_dict):
1686 protocol = info_dict.get('protocol')
1687 if protocol is not None:
1688 return protocol
1689
1690 url = info_dict['url']
1691 if url.startswith('rtmp'):
1692 return 'rtmp'
1693 elif url.startswith('mms'):
1694 return 'mms'
1695 elif url.startswith('rtsp'):
1696 return 'rtsp'
1697
1698 ext = determine_ext(url)
1699 if ext == 'm3u8':
1700 return 'm3u8'
1701 elif ext == 'f4m':
1702 return 'f4m'
1703
1704 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1705
1706
1707def render_table(header_row, data):
1708 """ Render a list of rows, each as a list of values """
1709 table = [header_row] + data
1710 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1711 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1712 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1713
1714
1715def _match_one(filter_part, dct):
1716 COMPARISON_OPERATORS = {
1717 '<': operator.lt,
1718 '<=': operator.le,
1719 '>': operator.gt,
1720 '>=': operator.ge,
1721 '=': operator.eq,
1722 '!=': operator.ne,
1723 }
1724 operator_rex = re.compile(r'''(?x)\s*
1725 (?P<key>[a-z_]+)
1726 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1727 (?:
1728 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1729 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1730 )
1731 \s*$
1732 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1733 m = operator_rex.search(filter_part)
1734 if m:
1735 op = COMPARISON_OPERATORS[m.group('op')]
1736 if m.group('strval') is not None:
1737 if m.group('op') not in ('=', '!='):
1738 raise ValueError(
1739 'Operator %s does not support string values!' % m.group('op'))
1740 comparison_value = m.group('strval')
1741 else:
1742 try:
1743 comparison_value = int(m.group('intval'))
1744 except ValueError:
1745 comparison_value = parse_filesize(m.group('intval'))
1746 if comparison_value is None:
1747 comparison_value = parse_filesize(m.group('intval') + 'B')
1748 if comparison_value is None:
1749 raise ValueError(
1750 'Invalid integer value %r in filter part %r' % (
1751 m.group('intval'), filter_part))
1752 actual_value = dct.get(m.group('key'))
1753 if actual_value is None:
1754 return m.group('none_inclusive')
1755 return op(actual_value, comparison_value)
1756
1757 UNARY_OPERATORS = {
1758 '': lambda v: v is not None,
1759 '!': lambda v: v is None,
1760 }
1761 operator_rex = re.compile(r'''(?x)\s*
1762 (?P<op>%s)\s*(?P<key>[a-z_]+)
1763 \s*$
1764 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1765 m = operator_rex.search(filter_part)
1766 if m:
1767 op = UNARY_OPERATORS[m.group('op')]
1768 actual_value = dct.get(m.group('key'))
1769 return op(actual_value)
1770
1771 raise ValueError('Invalid filter part %r' % filter_part)
1772
1773
1774def match_str(filter_str, dct):
1775 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1776
1777 return all(
1778 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1779
1780
1781def match_filter_func(filter_str):
1782 def _match_func(info_dict):
1783 if match_str(filter_str, info_dict):
1784 return None
1785 else:
1786 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1787 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1788 return _match_func
91410c9b
PH
1789
1790
1791class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
1792 def __init__(self, proxies=None):
1793 # Set default handlers
1794 for type in ('http', 'https'):
1795 setattr(self, '%s_open' % type,
1796 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1797 meth(r, proxy, type))
1798 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1799
91410c9b 1800 def proxy_open(self, req, proxy, type):
2461f79d 1801 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
1802 if req_proxy is not None:
1803 proxy = req_proxy
2461f79d
PH
1804 del req.headers['Ytdl-request-proxy']
1805
1806 if proxy == '__noproxy__':
1807 return None # No Proxy
91410c9b
PH
1808 return compat_urllib_request.ProxyHandler.proxy_open(
1809 self, req, proxy, type)