]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[utils] Disallow trailing dot in sanitize_path for a path part
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
912b38b4 6import calendar
676eb3f2 7import codecs
62e609ab 8import contextlib
e3946f98 9import ctypes
c496ca96
PH
10import datetime
11import email.utils
f45c185f 12import errno
be4a824d 13import functools
d77c3dfd 14import gzip
b7ab0590 15import itertools
03f9daab 16import io
f4bfd65f 17import json
d77c3dfd 18import locale
02dbf93f 19import math
347de493 20import operator
d77c3dfd 21import os
4eb7f1d1 22import pipes
c496ca96 23import platform
d77c3dfd 24import re
13ebea79 25import ssl
c496ca96 26import socket
b53466e1 27import struct
1c088fa8 28import subprocess
d77c3dfd 29import sys
181c8655 30import tempfile
01951dda 31import traceback
bcf89ce6 32import xml.etree.ElementTree
d77c3dfd 33import zlib
d77c3dfd 34
8c25f81b 35from .compat import (
8f9312c3 36 compat_basestring,
8c25f81b 37 compat_chr,
8c25f81b 38 compat_html_entities,
be4a824d 39 compat_http_client,
8c25f81b 40 compat_parse_qs,
be4a824d 41 compat_socket_create_connection,
8c25f81b
PH
42 compat_str,
43 compat_urllib_error,
44 compat_urllib_parse,
45 compat_urllib_parse_urlparse,
46 compat_urllib_request,
47 compat_urlparse,
7d4111ed 48 shlex_quote,
8c25f81b 49)
4644ac55
S
50
51
468e2e92
FV
52# This is not clearly defined otherwise
53compiled_regex_type = type(re.compile(''))
54
3e669f36 55std_headers = {
18313934 56 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
57 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
58 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59 'Accept-Encoding': 'gzip, deflate',
60 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 61}
f427df17 62
5f6a1245 63
7105440c
YCH
64ENGLISH_MONTH_NAMES = [
65 'January', 'February', 'March', 'April', 'May', 'June',
66 'July', 'August', 'September', 'October', 'November', 'December']
67
68
d77c3dfd 69def preferredencoding():
59ae15a5 70 """Get preferred encoding.
d77c3dfd 71
59ae15a5
PH
72 Returns the best encoding scheme for the system, based on
73 locale.getpreferredencoding() and some further tweaks.
74 """
75 try:
76 pref = locale.getpreferredencoding()
28e614de 77 'TEST'.encode(pref)
59ae15a5
PH
78 except:
79 pref = 'UTF-8'
bae611f2 80
59ae15a5 81 return pref
d77c3dfd 82
f4bfd65f 83
181c8655 84def write_json_file(obj, fn):
1394646a 85 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 86
92120217 87 fn = encodeFilename(fn)
61ee5aeb 88 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
89 encoding = get_filesystem_encoding()
90 # os.path.basename returns a bytes object, but NamedTemporaryFile
91 # will fail if the filename contains non ascii characters unless we
92 # use a unicode object
93 path_basename = lambda f: os.path.basename(fn).decode(encoding)
94 # the same for os.path.dirname
95 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
96 else:
97 path_basename = os.path.basename
98 path_dirname = os.path.dirname
99
73159f99
S
100 args = {
101 'suffix': '.tmp',
ec5f6016
JMF
102 'prefix': path_basename(fn) + '.',
103 'dir': path_dirname(fn),
73159f99
S
104 'delete': False,
105 }
106
181c8655
PH
107 # In Python 2.x, json.dump expects a bytestream.
108 # In Python 3.x, it writes to a character stream
109 if sys.version_info < (3, 0):
73159f99 110 args['mode'] = 'wb'
181c8655 111 else:
73159f99
S
112 args.update({
113 'mode': 'w',
114 'encoding': 'utf-8',
115 })
116
117 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
118
119 try:
120 with tf:
121 json.dump(obj, tf)
1394646a
IK
122 if sys.platform == 'win32':
123 # Need to remove existing file on Windows, else os.rename raises
124 # WindowsError or FileExistsError.
125 try:
126 os.unlink(fn)
127 except OSError:
128 pass
181c8655
PH
129 os.rename(tf.name, fn)
130 except:
131 try:
132 os.remove(tf.name)
133 except OSError:
134 pass
135 raise
136
137
138if sys.version_info >= (2, 7):
59ae56fa
PH
139 def find_xpath_attr(node, xpath, key, val):
140 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
141 assert re.match(r'^[a-zA-Z-]+$', key)
142 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
ab4ee31e 143 expr = xpath + "[@%s='%s']" % (key, val)
59ae56fa
PH
144 return node.find(expr)
145else:
146 def find_xpath_attr(node, xpath, key, val):
4eefbfdb
PH
147 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
148 # .//node does not match if a node is a direct child of . !
8f9312c3 149 if isinstance(xpath, compat_str):
4eefbfdb
PH
150 xpath = xpath.encode('ascii')
151
59ae56fa
PH
152 for f in node.findall(xpath):
153 if f.attrib.get(key) == val:
154 return f
155 return None
156
d7e66d39
JMF
157# On python2.6 the xml.etree.ElementTree.Element methods don't support
158# the namespace parameter
5f6a1245
JW
159
160
d7e66d39
JMF
161def xpath_with_ns(path, ns_map):
162 components = [c.split(':') for c in path.split('/')]
163 replaced = []
164 for c in components:
165 if len(c) == 1:
166 replaced.append(c[0])
167 else:
168 ns, tag = c
169 replaced.append('{%s}%s' % (ns_map[ns], tag))
170 return '/'.join(replaced)
171
d77c3dfd 172
bf0ff932 173def xpath_text(node, xpath, name=None, fatal=False):
d74bebd5
PH
174 if sys.version_info < (2, 7): # Crazy 2.6
175 xpath = xpath.encode('ascii')
176
bf0ff932 177 n = node.find(xpath)
42bdd9d0 178 if n is None or n.text is None:
bf0ff932
PH
179 if fatal:
180 name = xpath if name is None else name
181 raise ExtractorError('Could not find XML element %s' % name)
182 else:
183 return None
184 return n.text
185
186
9e6dd238 187def get_element_by_id(id, html):
43e8fafd
ND
188 """Return the content of the tag with the specified ID in the passed HTML document"""
189 return get_element_by_attribute("id", id, html)
190
12ea2f30 191
43e8fafd
ND
192def get_element_by_attribute(attribute, value, html):
193 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 194
38285056
PH
195 m = re.search(r'''(?xs)
196 <([a-zA-Z0-9:._-]+)
197 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
198 \s+%s=['"]?%s['"]?
199 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 \s*>
201 (?P<content>.*?)
202 </\1>
203 ''' % (re.escape(attribute), re.escape(value)), html)
204
205 if not m:
206 return None
207 res = m.group('content')
208
209 if res.startswith('"') or res.startswith("'"):
210 res = res[1:-1]
a921f407 211
38285056 212 return unescapeHTML(res)
a921f407 213
9e6dd238
FV
214
215def clean_html(html):
59ae15a5 216 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
217
218 if html is None: # Convenience for sanitizing descriptions etc.
219 return html
220
59ae15a5
PH
221 # Newline vs <br />
222 html = html.replace('\n', ' ')
6b3aef80
FV
223 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
224 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
225 # Strip html tags
226 html = re.sub('<.*?>', '', html)
227 # Replace html entities
228 html = unescapeHTML(html)
7decf895 229 return html.strip()
9e6dd238
FV
230
231
d77c3dfd 232def sanitize_open(filename, open_mode):
59ae15a5
PH
233 """Try to open the given filename, and slightly tweak it if this fails.
234
235 Attempts to open the given filename. If this fails, it tries to change
236 the filename slightly, step by step, until it's either able to open it
237 or it fails and raises a final exception, like the standard open()
238 function.
239
240 It returns the tuple (stream, definitive_file_name).
241 """
242 try:
28e614de 243 if filename == '-':
59ae15a5
PH
244 if sys.platform == 'win32':
245 import msvcrt
246 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 247 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
248 stream = open(encodeFilename(filename), open_mode)
249 return (stream, filename)
250 except (IOError, OSError) as err:
f45c185f
PH
251 if err.errno in (errno.EACCES,):
252 raise
59ae15a5 253
f45c185f 254 # In case of error, try to remove win32 forbidden chars
d55de57b 255 alt_filename = sanitize_path(filename)
f45c185f
PH
256 if alt_filename == filename:
257 raise
258 else:
259 # An exception here should be caught in the caller
d55de57b 260 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 261 return (stream, alt_filename)
d77c3dfd
FV
262
263
264def timeconvert(timestr):
59ae15a5
PH
265 """Convert RFC 2822 defined time string into system timestamp"""
266 timestamp = None
267 timetuple = email.utils.parsedate_tz(timestr)
268 if timetuple is not None:
269 timestamp = email.utils.mktime_tz(timetuple)
270 return timestamp
1c469a94 271
5f6a1245 272
796173d0 273def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
274 """Sanitizes a string so it could be used as part of a filename.
275 If restricted is set, use a stricter subset of allowed characters.
796173d0 276 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
277 """
278 def replace_insane(char):
279 if char == '?' or ord(char) < 32 or ord(char) == 127:
280 return ''
281 elif char == '"':
282 return '' if restricted else '\''
283 elif char == ':':
284 return '_-' if restricted else ' -'
285 elif char in '\\/|*<>':
286 return '_'
627dcfff 287 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
288 return '_'
289 if restricted and ord(char) > 127:
290 return '_'
291 return char
292
2aeb06d6
PH
293 # Handle timestamps
294 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 295 result = ''.join(map(replace_insane, s))
796173d0
PH
296 if not is_id:
297 while '__' in result:
298 result = result.replace('__', '_')
299 result = result.strip('_')
300 # Common case of "Foreign band name - English song title"
301 if restricted and result.startswith('-_'):
302 result = result[2:]
5a42414b
PH
303 if result.startswith('-'):
304 result = '_' + result[len('-'):]
a7440261 305 result = result.lstrip('.')
796173d0
PH
306 if not result:
307 result = '_'
59ae15a5 308 return result
d77c3dfd 309
5f6a1245 310
a2aaf4db
S
311def sanitize_path(s):
312 """Sanitizes and normalizes path on Windows"""
313 if sys.platform != 'win32':
314 return s
315 drive, _ = os.path.splitdrive(s)
316 unc, _ = os.path.splitunc(s)
317 unc_or_drive = unc or drive
318 norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
319 if unc_or_drive:
320 norm_path.pop(0)
321 sanitized_path = [
f18ef2d1 322 re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db
S
323 for path_part in norm_path]
324 if unc_or_drive:
325 sanitized_path.insert(0, unc_or_drive + os.path.sep)
326 return os.path.join(*sanitized_path)
327
328
d77c3dfd 329def orderedSet(iterable):
59ae15a5
PH
330 """ Remove all duplicates from the input iterable """
331 res = []
332 for el in iterable:
333 if el not in res:
334 res.append(el)
335 return res
d77c3dfd 336
912b38b4 337
4e408e47
PH
338def _htmlentity_transform(entity):
339 """Transforms an HTML entity to a character."""
340 # Known non-numeric HTML entity
341 if entity in compat_html_entities.name2codepoint:
342 return compat_chr(compat_html_entities.name2codepoint[entity])
343
344 mobj = re.match(r'#(x?[0-9]+)', entity)
345 if mobj is not None:
346 numstr = mobj.group(1)
28e614de 347 if numstr.startswith('x'):
4e408e47 348 base = 16
28e614de 349 numstr = '0%s' % numstr
4e408e47
PH
350 else:
351 base = 10
352 return compat_chr(int(numstr, base))
353
354 # Unknown entity in name, return its literal representation
28e614de 355 return ('&%s;' % entity)
4e408e47
PH
356
357
d77c3dfd 358def unescapeHTML(s):
912b38b4
PH
359 if s is None:
360 return None
361 assert type(s) == compat_str
d77c3dfd 362
4e408e47
PH
363 return re.sub(
364 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 365
8bf48f23
PH
366
367def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
368 """
369 @param s The name of the file
370 """
d77c3dfd 371
8bf48f23 372 assert type(s) == compat_str
d77c3dfd 373
59ae15a5
PH
374 # Python 3 has a Unicode API
375 if sys.version_info >= (3, 0):
376 return s
0f00efed 377
59ae15a5 378 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
28e614de 379 # Pass '' directly to use Unicode APIs on Windows 2000 and up
59ae15a5
PH
380 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
381 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
382 if not for_subprocess:
383 return s
384 else:
385 # For subprocess calls, encode with locale encoding
386 # Refer to http://stackoverflow.com/a/9951851/35070
387 encoding = preferredencoding()
59ae15a5 388 else:
6df40dcb 389 encoding = sys.getfilesystemencoding()
8bf48f23
PH
390 if encoding is None:
391 encoding = 'utf-8'
392 return s.encode(encoding, 'ignore')
393
f07b74fc
PH
394
395def encodeArgument(s):
396 if not isinstance(s, compat_str):
397 # Legacy code that uses byte strings
398 # Uncomment the following line after fixing all post processors
7af808a5 399 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
400 s = s.decode('ascii')
401 return encodeFilename(s, True)
402
403
8271226a
PH
404def decodeOption(optval):
405 if optval is None:
406 return optval
407 if isinstance(optval, bytes):
408 optval = optval.decode(preferredencoding())
409
410 assert isinstance(optval, compat_str)
411 return optval
1c256f70 412
5f6a1245 413
4539dd30
PH
414def formatSeconds(secs):
415 if secs > 3600:
416 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
417 elif secs > 60:
418 return '%d:%02d' % (secs // 60, secs % 60)
419 else:
420 return '%d' % secs
421
a0ddb8a2 422
be4a824d
PH
423def make_HTTPS_handler(params, **kwargs):
424 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 425 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 426 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 427 if opts_no_check_certificate:
be5f2c19 428 context.check_hostname = False
0db261ba 429 context.verify_mode = ssl.CERT_NONE
a2366922 430 try:
be4a824d 431 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
432 except TypeError:
433 # Python 2.7.8
434 # (create_default_context present but HTTPSHandler has no context=)
435 pass
436
437 if sys.version_info < (3, 2):
d7932313 438 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 439 else: # Python < 3.4
d7932313 440 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 441 context.verify_mode = (ssl.CERT_NONE
dca08720 442 if opts_no_check_certificate
ea6d901e 443 else ssl.CERT_REQUIRED)
303b479e 444 context.set_default_verify_paths()
be4a824d 445 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 446
732ea2f0 447
1c256f70
PH
448class ExtractorError(Exception):
449 """Error during info extraction."""
5f6a1245 450
d11271dd 451 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
452 """ tb, if given, is the original traceback (so that it can be printed out).
453 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
454 """
455
456 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
457 expected = True
d11271dd
PH
458 if video_id is not None:
459 msg = video_id + ': ' + msg
410f3e73 460 if cause:
28e614de 461 msg += ' (caused by %r)' % cause
9a82b238 462 if not expected:
732ea2f0
PH
463 if ytdl_is_updateable():
464 update_cmd = 'type youtube-dl -U to update'
465 else:
466 update_cmd = 'see https://yt-dl.org/update on how to update'
467 msg += '; please report this issue on https://yt-dl.org/bug .'
468 msg += ' Make sure you are using the latest version; %s.' % update_cmd
469 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
1c256f70 470 super(ExtractorError, self).__init__(msg)
d5979c5d 471
1c256f70 472 self.traceback = tb
8cc83b8d 473 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 474 self.cause = cause
d11271dd 475 self.video_id = video_id
1c256f70 476
01951dda
PH
477 def format_traceback(self):
478 if self.traceback is None:
479 return None
28e614de 480 return ''.join(traceback.format_tb(self.traceback))
01951dda 481
1c256f70 482
416c7fcb
PH
483class UnsupportedError(ExtractorError):
484 def __init__(self, url):
485 super(UnsupportedError, self).__init__(
486 'Unsupported URL: %s' % url, expected=True)
487 self.url = url
488
489
55b3e45b
JMF
490class RegexNotFoundError(ExtractorError):
491 """Error when a regex didn't match"""
492 pass
493
494
d77c3dfd 495class DownloadError(Exception):
59ae15a5 496 """Download Error exception.
d77c3dfd 497
59ae15a5
PH
498 This exception may be thrown by FileDownloader objects if they are not
499 configured to continue on errors. They will contain the appropriate
500 error message.
501 """
5f6a1245 502
8cc83b8d
FV
503 def __init__(self, msg, exc_info=None):
504 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
505 super(DownloadError, self).__init__(msg)
506 self.exc_info = exc_info
d77c3dfd
FV
507
508
509class SameFileError(Exception):
59ae15a5 510 """Same File exception.
d77c3dfd 511
59ae15a5
PH
512 This exception will be thrown by FileDownloader objects if they detect
513 multiple files would have to be downloaded to the same file on disk.
514 """
515 pass
d77c3dfd
FV
516
517
518class PostProcessingError(Exception):
59ae15a5 519 """Post Processing exception.
d77c3dfd 520
59ae15a5
PH
521 This exception may be raised by PostProcessor's .run() method to
522 indicate an error in the postprocessing task.
523 """
5f6a1245 524
7851b379
PH
525 def __init__(self, msg):
526 self.msg = msg
d77c3dfd 527
5f6a1245 528
d77c3dfd 529class MaxDownloadsReached(Exception):
59ae15a5
PH
530 """ --max-downloads limit has been reached. """
531 pass
d77c3dfd
FV
532
533
534class UnavailableVideoError(Exception):
59ae15a5 535 """Unavailable Format exception.
d77c3dfd 536
59ae15a5
PH
537 This exception will be thrown when a video is requested
538 in a format that is not available for that video.
539 """
540 pass
d77c3dfd
FV
541
542
543class ContentTooShortError(Exception):
59ae15a5 544 """Content Too Short exception.
d77c3dfd 545
59ae15a5
PH
546 This exception may be raised by FileDownloader objects when a file they
547 download is too small for what the server announced first, indicating
548 the connection was probably interrupted.
549 """
550 # Both in bytes
551 downloaded = None
552 expected = None
d77c3dfd 553
59ae15a5
PH
554 def __init__(self, downloaded, expected):
555 self.downloaded = downloaded
556 self.expected = expected
d77c3dfd 557
5f6a1245 558
c5a59d93 559def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
be4a824d
PH
560 hc = http_class(*args, **kwargs)
561 source_address = ydl_handler._params.get('source_address')
562 if source_address is not None:
563 sa = (source_address, 0)
564 if hasattr(hc, 'source_address'): # Python 2.7+
565 hc.source_address = sa
566 else: # Python 2.6
567 def _hc_connect(self, *args, **kwargs):
568 sock = compat_socket_create_connection(
569 (self.host, self.port), self.timeout, sa)
570 if is_https:
d7932313
PH
571 self.sock = ssl.wrap_socket(
572 sock, self.key_file, self.cert_file,
573 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
574 else:
575 self.sock = sock
576 hc.connect = functools.partial(_hc_connect, hc)
577
578 return hc
579
580
acebc9cd 581class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
582 """Handler for HTTP requests and responses.
583
584 This class, when installed with an OpenerDirector, automatically adds
585 the standard headers to every HTTP request and handles gzipped and
586 deflated responses from web servers. If compression is to be avoided in
587 a particular request, the original request in the program code only has
588 to include the HTTP header "Youtubedl-No-Compression", which will be
589 removed before making the real request.
590
591 Part of this code was copied from:
592
593 http://techknack.net/python-urllib2-handlers/
594
595 Andrew Rowls, the author of that code, agreed to release it to the
596 public domain.
597 """
598
be4a824d
PH
599 def __init__(self, params, *args, **kwargs):
600 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
601 self._params = params
602
603 def http_open(self, req):
604 return self.do_open(functools.partial(
c5a59d93 605 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
606 req)
607
59ae15a5
PH
608 @staticmethod
609 def deflate(data):
610 try:
611 return zlib.decompress(data, -zlib.MAX_WBITS)
612 except zlib.error:
613 return zlib.decompress(data)
614
615 @staticmethod
616 def addinfourl_wrapper(stream, headers, url, code):
617 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
618 return compat_urllib_request.addinfourl(stream, headers, url, code)
619 ret = compat_urllib_request.addinfourl(stream, headers, url)
620 ret.code = code
621 return ret
622
acebc9cd 623 def http_request(self, req):
33ac271b 624 for h, v in std_headers.items():
3d5f7a39
JK
625 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
626 # The dict keys are capitalized because of this bug by urllib
627 if h.capitalize() not in req.headers:
33ac271b 628 req.add_header(h, v)
59ae15a5
PH
629 if 'Youtubedl-no-compression' in req.headers:
630 if 'Accept-encoding' in req.headers:
631 del req.headers['Accept-encoding']
632 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
633
634 if sys.version_info < (2, 7) and '#' in req.get_full_url():
635 # Python 2.6 is brain-dead when it comes to fragments
636 req._Request__original = req._Request__original.partition('#')[0]
637 req._Request__r_type = req._Request__r_type.partition('#')[0]
638
59ae15a5
PH
639 return req
640
acebc9cd 641 def http_response(self, req, resp):
59ae15a5
PH
642 old_resp = resp
643 # gzip
644 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
645 content = resp.read()
646 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
647 try:
648 uncompressed = io.BytesIO(gz.read())
649 except IOError as original_ioerror:
650 # There may be junk add the end of the file
651 # See http://stackoverflow.com/q/4928560/35070 for details
652 for i in range(1, 1024):
653 try:
654 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
655 uncompressed = io.BytesIO(gz.read())
656 except IOError:
657 continue
658 break
659 else:
660 raise original_ioerror
661 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
662 resp.msg = old_resp.msg
663 # deflate
664 if resp.headers.get('Content-encoding', '') == 'deflate':
665 gz = io.BytesIO(self.deflate(resp.read()))
666 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
667 resp.msg = old_resp.msg
668 return resp
0f8d03f8 669
acebc9cd
PH
670 https_request = http_request
671 https_response = http_response
bf50b038 672
5de90176 673
be4a824d
PH
674class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
675 def __init__(self, params, https_conn_class=None, *args, **kwargs):
676 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
677 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
678 self._params = params
679
680 def https_open(self, req):
4f264c02
JMF
681 kwargs = {}
682 if hasattr(self, '_context'): # python > 2.6
683 kwargs['context'] = self._context
684 if hasattr(self, '_check_hostname'): # python 3.x
685 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
686 return self.do_open(functools.partial(
687 _create_http_connection, self, self._https_conn_class, True),
4f264c02 688 req, **kwargs)
be4a824d
PH
689
690
08b38d54 691def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
692 """ Return a UNIX timestamp from the given date """
693
694 if date_str is None:
695 return None
696
08b38d54
PH
697 if timezone is None:
698 m = re.search(
699 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
700 date_str)
701 if not m:
912b38b4
PH
702 timezone = datetime.timedelta()
703 else:
08b38d54
PH
704 date_str = date_str[:-len(m.group(0))]
705 if not m.group('sign'):
706 timezone = datetime.timedelta()
707 else:
708 sign = 1 if m.group('sign') == '+' else -1
709 timezone = datetime.timedelta(
710 hours=sign * int(m.group('hours')),
711 minutes=sign * int(m.group('minutes')))
6ad4013d 712 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 713 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
714 return calendar.timegm(dt.timetuple())
715
716
42bdd9d0 717def unified_strdate(date_str, day_first=True):
bf50b038 718 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
719
720 if date_str is None:
721 return None
bf50b038 722 upload_date = None
5f6a1245 723 # Replace commas
026fcc04 724 date_str = date_str.replace(',', ' ')
bf50b038 725 # %z (UTC offset) is only supported in python>=3.2
026fcc04 726 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 727 # Remove AM/PM + timezone
9bb8e0a3 728 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 729
19e1d359
JMF
730 format_expressions = [
731 '%d %B %Y',
0f99566c 732 '%d %b %Y',
19e1d359
JMF
733 '%B %d %Y',
734 '%b %d %Y',
78ff59d0
PP
735 '%b %dst %Y %I:%M%p',
736 '%b %dnd %Y %I:%M%p',
737 '%b %dth %Y %I:%M%p',
a69801e2 738 '%Y %m %d',
19e1d359 739 '%Y-%m-%d',
fe556f1b 740 '%Y/%m/%d',
19e1d359 741 '%Y/%m/%d %H:%M:%S',
5d73273f 742 '%Y-%m-%d %H:%M:%S',
e9be9a6a 743 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 744 '%d.%m.%Y %H:%M',
b047de6f 745 '%d.%m.%Y %H.%M',
19e1d359 746 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
747 '%Y-%m-%dT%H:%M:%S.%fZ',
748 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 749 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 750 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 751 '%Y-%m-%dT%H:%M',
19e1d359 752 ]
42bdd9d0
PH
753 if day_first:
754 format_expressions.extend([
776dc399
S
755 '%d.%m.%Y',
756 '%d/%m/%Y',
757 '%d/%m/%y',
42bdd9d0
PH
758 '%d/%m/%Y %H:%M:%S',
759 ])
760 else:
761 format_expressions.extend([
776dc399
S
762 '%m.%d.%Y',
763 '%m/%d/%Y',
764 '%m/%d/%y',
42bdd9d0
PH
765 '%m/%d/%Y %H:%M:%S',
766 ])
bf50b038
JMF
767 for expression in format_expressions:
768 try:
769 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 770 except ValueError:
bf50b038 771 pass
42393ce2
PH
772 if upload_date is None:
773 timetuple = email.utils.parsedate_tz(date_str)
774 if timetuple:
775 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
776 return upload_date
777
5f6a1245 778
28e614de 779def determine_ext(url, default_ext='unknown_video'):
f4776371
S
780 if url is None:
781 return default_ext
28e614de 782 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
783 if re.match(r'^[A-Za-z0-9]+$', guess):
784 return guess
785 else:
cbdbb766 786 return default_ext
73e79f2a 787
5f6a1245 788
d4051a8e 789def subtitles_filename(filename, sub_lang, sub_format):
28e614de 790 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 791
5f6a1245 792
bd558525 793def date_from_str(date_str):
37254abc
JMF
794 """
795 Return a datetime object from a string in the format YYYYMMDD or
796 (now|today)[+-][0-9](day|week|month|year)(s)?"""
797 today = datetime.date.today()
f8795e10 798 if date_str in ('now', 'today'):
37254abc 799 return today
f8795e10
PH
800 if date_str == 'yesterday':
801 return today - datetime.timedelta(days=1)
37254abc
JMF
802 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
803 if match is not None:
804 sign = match.group('sign')
805 time = int(match.group('time'))
806 if sign == '-':
807 time = -time
808 unit = match.group('unit')
5f6a1245 809 # A bad aproximation?
37254abc
JMF
810 if unit == 'month':
811 unit = 'day'
812 time *= 30
813 elif unit == 'year':
814 unit = 'day'
815 time *= 365
816 unit += 's'
817 delta = datetime.timedelta(**{unit: time})
818 return today + delta
bd558525 819 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
820
821
e63fc1be 822def hyphenate_date(date_str):
823 """
824 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
825 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
826 if match is not None:
827 return '-'.join(match.groups())
828 else:
829 return date_str
830
5f6a1245 831
bd558525
JMF
832class DateRange(object):
833 """Represents a time interval between two dates"""
5f6a1245 834
bd558525
JMF
835 def __init__(self, start=None, end=None):
836 """start and end must be strings in the format accepted by date"""
837 if start is not None:
838 self.start = date_from_str(start)
839 else:
840 self.start = datetime.datetime.min.date()
841 if end is not None:
842 self.end = date_from_str(end)
843 else:
844 self.end = datetime.datetime.max.date()
37254abc 845 if self.start > self.end:
bd558525 846 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 847
bd558525
JMF
848 @classmethod
849 def day(cls, day):
850 """Returns a range that only contains the given day"""
5f6a1245
JW
851 return cls(day, day)
852
bd558525
JMF
853 def __contains__(self, date):
854 """Check if the date is in the range"""
37254abc
JMF
855 if not isinstance(date, datetime.date):
856 date = date_from_str(date)
857 return self.start <= date <= self.end
5f6a1245 858
bd558525 859 def __str__(self):
5f6a1245 860 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
861
862
863def platform_name():
864 """ Returns the platform name as a compat_str """
865 res = platform.platform()
866 if isinstance(res, bytes):
867 res = res.decode(preferredencoding())
868
869 assert isinstance(res, compat_str)
870 return res
c257baff
PH
871
872
b58ddb32
PH
873def _windows_write_string(s, out):
874 """ Returns True if the string was written using special methods,
875 False if it has yet to be written out."""
876 # Adapted from http://stackoverflow.com/a/3259271/35070
877
878 import ctypes
879 import ctypes.wintypes
880
881 WIN_OUTPUT_IDS = {
882 1: -11,
883 2: -12,
884 }
885
a383a98a
PH
886 try:
887 fileno = out.fileno()
888 except AttributeError:
889 # If the output stream doesn't have a fileno, it's virtual
890 return False
aa42e873
PH
891 except io.UnsupportedOperation:
892 # Some strange Windows pseudo files?
893 return False
b58ddb32
PH
894 if fileno not in WIN_OUTPUT_IDS:
895 return False
896
e2f89ec7 897 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 898 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 899 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
900 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
901
e2f89ec7 902 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
903 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
904 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 905 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
906 written = ctypes.wintypes.DWORD(0)
907
6ac4e806 908 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
909 FILE_TYPE_CHAR = 0x0002
910 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 911 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
912 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
913 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 914 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
915 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
916
917 def not_a_console(handle):
918 if handle == INVALID_HANDLE_VALUE or handle is None:
919 return True
8fb3ac36
PH
920 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
921 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
922
923 if not_a_console(h):
924 return False
925
d1b9c912
PH
926 def next_nonbmp_pos(s):
927 try:
928 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
929 except StopIteration:
930 return len(s)
931
932 while s:
933 count = min(next_nonbmp_pos(s), 1024)
934
b58ddb32 935 ret = WriteConsoleW(
d1b9c912 936 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
937 if ret == 0:
938 raise OSError('Failed to write string')
d1b9c912
PH
939 if not count: # We just wrote a non-BMP character
940 assert written.value == 2
941 s = s[1:]
942 else:
943 assert written.value > 0
944 s = s[written.value:]
b58ddb32
PH
945 return True
946
947
734f90bb 948def write_string(s, out=None, encoding=None):
7459e3a2
PH
949 if out is None:
950 out = sys.stderr
8bf48f23 951 assert type(s) == compat_str
7459e3a2 952
b58ddb32
PH
953 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
954 if _windows_write_string(s, out):
955 return
956
7459e3a2
PH
957 if ('b' in getattr(out, 'mode', '') or
958 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
959 byt = s.encode(encoding or preferredencoding(), 'ignore')
960 out.write(byt)
961 elif hasattr(out, 'buffer'):
962 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
963 byt = s.encode(enc, 'ignore')
964 out.buffer.write(byt)
965 else:
8bf48f23 966 out.write(s)
7459e3a2
PH
967 out.flush()
968
969
48ea9cea
PH
970def bytes_to_intlist(bs):
971 if not bs:
972 return []
973 if isinstance(bs[0], int): # Python 3
974 return list(bs)
975 else:
976 return [ord(c) for c in bs]
977
c257baff 978
cba892fa 979def intlist_to_bytes(xs):
980 if not xs:
981 return b''
eb4157fd 982 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
983
984
c1c9a79c
PH
985# Cross-platform file locking
986if sys.platform == 'win32':
987 import ctypes.wintypes
988 import msvcrt
989
990 class OVERLAPPED(ctypes.Structure):
991 _fields_ = [
992 ('Internal', ctypes.wintypes.LPVOID),
993 ('InternalHigh', ctypes.wintypes.LPVOID),
994 ('Offset', ctypes.wintypes.DWORD),
995 ('OffsetHigh', ctypes.wintypes.DWORD),
996 ('hEvent', ctypes.wintypes.HANDLE),
997 ]
998
999 kernel32 = ctypes.windll.kernel32
1000 LockFileEx = kernel32.LockFileEx
1001 LockFileEx.argtypes = [
1002 ctypes.wintypes.HANDLE, # hFile
1003 ctypes.wintypes.DWORD, # dwFlags
1004 ctypes.wintypes.DWORD, # dwReserved
1005 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1006 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1007 ctypes.POINTER(OVERLAPPED) # Overlapped
1008 ]
1009 LockFileEx.restype = ctypes.wintypes.BOOL
1010 UnlockFileEx = kernel32.UnlockFileEx
1011 UnlockFileEx.argtypes = [
1012 ctypes.wintypes.HANDLE, # hFile
1013 ctypes.wintypes.DWORD, # dwReserved
1014 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1015 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1016 ctypes.POINTER(OVERLAPPED) # Overlapped
1017 ]
1018 UnlockFileEx.restype = ctypes.wintypes.BOOL
1019 whole_low = 0xffffffff
1020 whole_high = 0x7fffffff
1021
1022 def _lock_file(f, exclusive):
1023 overlapped = OVERLAPPED()
1024 overlapped.Offset = 0
1025 overlapped.OffsetHigh = 0
1026 overlapped.hEvent = 0
1027 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1028 handle = msvcrt.get_osfhandle(f.fileno())
1029 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1030 whole_low, whole_high, f._lock_file_overlapped_p):
1031 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1032
1033 def _unlock_file(f):
1034 assert f._lock_file_overlapped_p
1035 handle = msvcrt.get_osfhandle(f.fileno())
1036 if not UnlockFileEx(handle, 0,
1037 whole_low, whole_high, f._lock_file_overlapped_p):
1038 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1039
1040else:
1041 import fcntl
1042
1043 def _lock_file(f, exclusive):
2582bebe 1044 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1045
1046 def _unlock_file(f):
2582bebe 1047 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1048
1049
1050class locked_file(object):
1051 def __init__(self, filename, mode, encoding=None):
1052 assert mode in ['r', 'a', 'w']
1053 self.f = io.open(filename, mode, encoding=encoding)
1054 self.mode = mode
1055
1056 def __enter__(self):
1057 exclusive = self.mode != 'r'
1058 try:
1059 _lock_file(self.f, exclusive)
1060 except IOError:
1061 self.f.close()
1062 raise
1063 return self
1064
1065 def __exit__(self, etype, value, traceback):
1066 try:
1067 _unlock_file(self.f)
1068 finally:
1069 self.f.close()
1070
1071 def __iter__(self):
1072 return iter(self.f)
1073
1074 def write(self, *args):
1075 return self.f.write(*args)
1076
1077 def read(self, *args):
1078 return self.f.read(*args)
4eb7f1d1
JMF
1079
1080
4644ac55
S
1081def get_filesystem_encoding():
1082 encoding = sys.getfilesystemencoding()
1083 return encoding if encoding is not None else 'utf-8'
1084
1085
4eb7f1d1 1086def shell_quote(args):
a6a173c2 1087 quoted_args = []
4644ac55 1088 encoding = get_filesystem_encoding()
a6a173c2
JMF
1089 for a in args:
1090 if isinstance(a, bytes):
1091 # We may get a filename encoded with 'encodeFilename'
1092 a = a.decode(encoding)
1093 quoted_args.append(pipes.quote(a))
28e614de 1094 return ' '.join(quoted_args)
9d4660ca
PH
1095
1096
f4d96df0
PH
1097def takewhile_inclusive(pred, seq):
1098 """ Like itertools.takewhile, but include the latest evaluated element
1099 (the first element so that Not pred(e)) """
1100 for e in seq:
1101 yield e
1102 if not pred(e):
1103 return
1104
1105
9d4660ca
PH
1106def smuggle_url(url, data):
1107 """ Pass additional data in a URL for internal use. """
1108
1109 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1110 {'__youtubedl_smuggle': json.dumps(data)})
1111 return url + '#' + sdata
9d4660ca
PH
1112
1113
79f82953 1114def unsmuggle_url(smug_url, default=None):
83e865a3 1115 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1116 return smug_url, default
28e614de
PH
1117 url, _, sdata = smug_url.rpartition('#')
1118 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1119 data = json.loads(jsond)
1120 return url, data
02dbf93f
PH
1121
1122
02dbf93f
PH
1123def format_bytes(bytes):
1124 if bytes is None:
28e614de 1125 return 'N/A'
02dbf93f
PH
1126 if type(bytes) is str:
1127 bytes = float(bytes)
1128 if bytes == 0.0:
1129 exponent = 0
1130 else:
1131 exponent = int(math.log(bytes, 1024.0))
28e614de 1132 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1133 converted = float(bytes) / float(1024 ** exponent)
28e614de 1134 return '%.2f%s' % (converted, suffix)
f53c966a 1135
1c088fa8 1136
be64b5b0
PH
1137def parse_filesize(s):
1138 if s is None:
1139 return None
1140
1141 # The lower-case forms are of course incorrect and inofficial,
1142 # but we support those too
1143 _UNIT_TABLE = {
1144 'B': 1,
1145 'b': 1,
1146 'KiB': 1024,
1147 'KB': 1000,
1148 'kB': 1024,
1149 'Kb': 1000,
1150 'MiB': 1024 ** 2,
1151 'MB': 1000 ** 2,
1152 'mB': 1024 ** 2,
1153 'Mb': 1000 ** 2,
1154 'GiB': 1024 ** 3,
1155 'GB': 1000 ** 3,
1156 'gB': 1024 ** 3,
1157 'Gb': 1000 ** 3,
1158 'TiB': 1024 ** 4,
1159 'TB': 1000 ** 4,
1160 'tB': 1024 ** 4,
1161 'Tb': 1000 ** 4,
1162 'PiB': 1024 ** 5,
1163 'PB': 1000 ** 5,
1164 'pB': 1024 ** 5,
1165 'Pb': 1000 ** 5,
1166 'EiB': 1024 ** 6,
1167 'EB': 1000 ** 6,
1168 'eB': 1024 ** 6,
1169 'Eb': 1000 ** 6,
1170 'ZiB': 1024 ** 7,
1171 'ZB': 1000 ** 7,
1172 'zB': 1024 ** 7,
1173 'Zb': 1000 ** 7,
1174 'YiB': 1024 ** 8,
1175 'YB': 1000 ** 8,
1176 'yB': 1024 ** 8,
1177 'Yb': 1000 ** 8,
1178 }
1179
1180 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1181 m = re.match(
1182 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1183 if not m:
1184 return None
1185
4349c07d
PH
1186 num_str = m.group('num').replace(',', '.')
1187 mult = _UNIT_TABLE[m.group('unit')]
1188 return int(float(num_str) * mult)
be64b5b0
PH
1189
1190
caefb1de
PH
1191def month_by_name(name):
1192 """ Return the number of a month by (locale-independently) English name """
1193
caefb1de 1194 try:
7105440c
YCH
1195 return ENGLISH_MONTH_NAMES.index(name) + 1
1196 except ValueError:
1197 return None
1198
1199
1200def month_by_abbreviation(abbrev):
1201 """ Return the number of a month by (locale-independently) English
1202 abbreviations """
1203
1204 try:
1205 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1206 except ValueError:
1207 return None
18258362
JMF
1208
1209
5aafe895 1210def fix_xml_ampersands(xml_str):
18258362 1211 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1212 return re.sub(
1213 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1214 '&amp;',
5aafe895 1215 xml_str)
e3946f98
PH
1216
1217
1218def setproctitle(title):
8bf48f23 1219 assert isinstance(title, compat_str)
e3946f98
PH
1220 try:
1221 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1222 except OSError:
1223 return
6eefe533
PH
1224 title_bytes = title.encode('utf-8')
1225 buf = ctypes.create_string_buffer(len(title_bytes))
1226 buf.value = title_bytes
e3946f98 1227 try:
6eefe533 1228 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1229 except AttributeError:
1230 return # Strange libc, just skip this
d7dda168
PH
1231
1232
1233def remove_start(s, start):
1234 if s.startswith(start):
1235 return s[len(start):]
1236 return s
29eb5174
PH
1237
1238
2b9faf55
PH
1239def remove_end(s, end):
1240 if s.endswith(end):
1241 return s[:-len(end)]
1242 return s
1243
1244
29eb5174 1245def url_basename(url):
9b8aaeed 1246 path = compat_urlparse.urlparse(url).path
28e614de 1247 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1248
1249
1250class HEADRequest(compat_urllib_request.Request):
1251 def get_method(self):
1252 return "HEAD"
7217e148
PH
1253
1254
9732d77e 1255def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1256 if get_attr:
1257 if v is not None:
1258 v = getattr(v, get_attr, None)
9572013d
PH
1259 if v == '':
1260 v = None
9732d77e
PH
1261 return default if v is None else (int(v) * invscale // scale)
1262
9572013d 1263
40a90862
JMF
1264def str_or_none(v, default=None):
1265 return default if v is None else compat_str(v)
1266
9732d77e
PH
1267
1268def str_to_int(int_str):
48d4681e 1269 """ A more relaxed version of int_or_none """
9732d77e
PH
1270 if int_str is None:
1271 return None
28e614de 1272 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1273 return int(int_str)
608d11f5
PH
1274
1275
9732d77e
PH
1276def float_or_none(v, scale=1, invscale=1, default=None):
1277 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1278
1279
608d11f5 1280def parse_duration(s):
8f9312c3 1281 if not isinstance(s, compat_basestring):
608d11f5
PH
1282 return None
1283
ca7b3246
S
1284 s = s.strip()
1285
608d11f5 1286 m = re.match(
9d22a7df 1287 r'''(?ix)(?:P?T)?
e8df5cee
PH
1288 (?:
1289 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1290 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1291
3e675fab 1292 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
6a68bb57 1293 (?:
8f4b58d7
PH
1294 (?:
1295 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1296 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1297 )?
6a68bb57
PH
1298 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1299 )?
e8df5cee
PH
1300 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1301 )$''', s)
608d11f5
PH
1302 if not m:
1303 return None
e8df5cee
PH
1304 res = 0
1305 if m.group('only_mins'):
1306 return float_or_none(m.group('only_mins'), invscale=60)
1307 if m.group('only_hours'):
1308 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1309 if m.group('secs'):
1310 res += int(m.group('secs'))
3e675fab
PH
1311 if m.group('mins_reversed'):
1312 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1313 if m.group('mins'):
1314 res += int(m.group('mins')) * 60
e8df5cee
PH
1315 if m.group('hours'):
1316 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1317 if m.group('hours_reversed'):
1318 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1319 if m.group('days'):
1320 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1321 if m.group('ms'):
1322 res += float(m.group('ms'))
608d11f5 1323 return res
91d7d0b3
JMF
1324
1325
1326def prepend_extension(filename, ext):
5f6a1245 1327 name, real_ext = os.path.splitext(filename)
28e614de 1328 return '{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1329
1330
1331def check_executable(exe, args=[]):
1332 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1333 args can be a list of arguments for a short output (like -version) """
1334 try:
1335 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1336 except OSError:
1337 return False
1338 return exe
b7ab0590
PH
1339
1340
95807118 1341def get_exe_version(exe, args=['--version'],
cae97f65 1342 version_re=None, unrecognized='present'):
95807118
PH
1343 """ Returns the version of the specified executable,
1344 or False if the executable is not present """
1345 try:
cae97f65 1346 out, _ = subprocess.Popen(
95807118
PH
1347 [exe] + args,
1348 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1349 except OSError:
1350 return False
cae97f65
PH
1351 if isinstance(out, bytes): # Python 2.x
1352 out = out.decode('ascii', 'ignore')
1353 return detect_exe_version(out, version_re, unrecognized)
1354
1355
1356def detect_exe_version(output, version_re=None, unrecognized='present'):
1357 assert isinstance(output, compat_str)
1358 if version_re is None:
1359 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1360 m = re.search(version_re, output)
95807118
PH
1361 if m:
1362 return m.group(1)
1363 else:
1364 return unrecognized
1365
1366
b7ab0590 1367class PagedList(object):
dd26ced1
PH
1368 def __len__(self):
1369 # This is only useful for tests
1370 return len(self.getslice())
1371
9c44d242
PH
1372
1373class OnDemandPagedList(PagedList):
1374 def __init__(self, pagefunc, pagesize):
1375 self._pagefunc = pagefunc
1376 self._pagesize = pagesize
1377
b7ab0590
PH
1378 def getslice(self, start=0, end=None):
1379 res = []
1380 for pagenum in itertools.count(start // self._pagesize):
1381 firstid = pagenum * self._pagesize
1382 nextfirstid = pagenum * self._pagesize + self._pagesize
1383 if start >= nextfirstid:
1384 continue
1385
1386 page_results = list(self._pagefunc(pagenum))
1387
1388 startv = (
1389 start % self._pagesize
1390 if firstid <= start < nextfirstid
1391 else 0)
1392
1393 endv = (
1394 ((end - 1) % self._pagesize) + 1
1395 if (end is not None and firstid <= end <= nextfirstid)
1396 else None)
1397
1398 if startv != 0 or endv is not None:
1399 page_results = page_results[startv:endv]
1400 res.extend(page_results)
1401
1402 # A little optimization - if current page is not "full", ie. does
1403 # not contain page_size videos then we can assume that this page
1404 # is the last one - there are no more ids on further pages -
1405 # i.e. no need to query again.
1406 if len(page_results) + startv < self._pagesize:
1407 break
1408
1409 # If we got the whole page, but the next page is not interesting,
1410 # break out early as well
1411 if end == nextfirstid:
1412 break
1413 return res
81c2f20b
PH
1414
1415
9c44d242
PH
1416class InAdvancePagedList(PagedList):
1417 def __init__(self, pagefunc, pagecount, pagesize):
1418 self._pagefunc = pagefunc
1419 self._pagecount = pagecount
1420 self._pagesize = pagesize
1421
1422 def getslice(self, start=0, end=None):
1423 res = []
1424 start_page = start // self._pagesize
1425 end_page = (
1426 self._pagecount if end is None else (end // self._pagesize + 1))
1427 skip_elems = start - start_page * self._pagesize
1428 only_more = None if end is None else end - start
1429 for pagenum in range(start_page, end_page):
1430 page = list(self._pagefunc(pagenum))
1431 if skip_elems:
1432 page = page[skip_elems:]
1433 skip_elems = None
1434 if only_more is not None:
1435 if len(page) < only_more:
1436 only_more -= len(page)
1437 else:
1438 page = page[:only_more]
1439 res.extend(page)
1440 break
1441 res.extend(page)
1442 return res
1443
1444
81c2f20b 1445def uppercase_escape(s):
676eb3f2 1446 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1447 return re.sub(
a612753d 1448 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1449 lambda m: unicode_escape(m.group(0))[0],
1450 s)
b53466e1 1451
d05cfe06
S
1452
1453def escape_rfc3986(s):
1454 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1455 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1456 s = s.encode('utf-8')
ecc0c5ee 1457 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1458
1459
1460def escape_url(url):
1461 """Escape URL as suggested by RFC 3986"""
1462 url_parsed = compat_urllib_parse_urlparse(url)
1463 return url_parsed._replace(
1464 path=escape_rfc3986(url_parsed.path),
1465 params=escape_rfc3986(url_parsed.params),
1466 query=escape_rfc3986(url_parsed.query),
1467 fragment=escape_rfc3986(url_parsed.fragment)
1468 ).geturl()
1469
b53466e1 1470try:
28e614de 1471 struct.pack('!I', 0)
b53466e1
PH
1472except TypeError:
1473 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1474 def struct_pack(spec, *args):
1475 if isinstance(spec, compat_str):
1476 spec = spec.encode('ascii')
1477 return struct.pack(spec, *args)
1478
1479 def struct_unpack(spec, *args):
1480 if isinstance(spec, compat_str):
1481 spec = spec.encode('ascii')
1482 return struct.unpack(spec, *args)
1483else:
1484 struct_pack = struct.pack
1485 struct_unpack = struct.unpack
62e609ab
PH
1486
1487
1488def read_batch_urls(batch_fd):
1489 def fixup(url):
1490 if not isinstance(url, compat_str):
1491 url = url.decode('utf-8', 'replace')
28e614de 1492 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1493 if url.startswith(BOM_UTF8):
1494 url = url[len(BOM_UTF8):]
1495 url = url.strip()
1496 if url.startswith(('#', ';', ']')):
1497 return False
1498 return url
1499
1500 with contextlib.closing(batch_fd) as fd:
1501 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1502
1503
1504def urlencode_postdata(*args, **kargs):
1505 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1506
1507
0990305d
PH
1508try:
1509 etree_iter = xml.etree.ElementTree.Element.iter
1510except AttributeError: # Python <=2.6
1511 etree_iter = lambda n: n.findall('.//*')
1512
1513
bcf89ce6
PH
1514def parse_xml(s):
1515 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1516 def doctype(self, name, pubid, system):
1517 pass # Ignore doctypes
1518
1519 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1520 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1521 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1522 # Fix up XML parser in Python 2.x
1523 if sys.version_info < (3, 0):
1524 for n in etree_iter(tree):
1525 if n.text is not None:
1526 if not isinstance(n.text, compat_str):
1527 n.text = n.text.decode('utf-8')
1528 return tree
e68301af
PH
1529
1530
a1a530b0
PH
1531US_RATINGS = {
1532 'G': 0,
1533 'PG': 10,
1534 'PG-13': 13,
1535 'R': 16,
1536 'NC': 18,
1537}
fac55558
PH
1538
1539
146c80e2
S
1540def parse_age_limit(s):
1541 if s is None:
d838b1bd 1542 return None
146c80e2 1543 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1544 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1545
1546
fac55558 1547def strip_jsonp(code):
609a61e3
PH
1548 return re.sub(
1549 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1550
1551
e05f6939
PH
1552def js_to_json(code):
1553 def fix_kv(m):
e7b6d122
PH
1554 v = m.group(0)
1555 if v in ('true', 'false', 'null'):
1556 return v
1557 if v.startswith('"'):
1558 return v
1559 if v.startswith("'"):
1560 v = v[1:-1]
1561 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1562 '\\\\': '\\\\',
1563 "\\'": "'",
1564 '"': '\\"',
1565 }[m.group(0)], v)
1566 return '"%s"' % v
e05f6939
PH
1567
1568 res = re.sub(r'''(?x)
d305dd73
PH
1569 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1570 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1571 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939
PH
1572 ''', fix_kv, code)
1573 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1574 return res
1575
1576
478c2c61
PH
1577def qualities(quality_ids):
1578 """ Get a numeric quality value out of a list of possible values """
1579 def q(qid):
1580 try:
1581 return quality_ids.index(qid)
1582 except ValueError:
1583 return -1
1584 return q
1585
acd69589
PH
1586
1587DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1588
a020a0dc
PH
1589
1590def limit_length(s, length):
1591 """ Add ellipses to overly long strings """
1592 if s is None:
1593 return None
1594 ELLIPSES = '...'
1595 if len(s) > length:
1596 return s[:length - len(ELLIPSES)] + ELLIPSES
1597 return s
48844745
PH
1598
1599
1600def version_tuple(v):
5f9b8394 1601 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1602
1603
1604def is_outdated_version(version, limit, assume_new=True):
1605 if not version:
1606 return not assume_new
1607 try:
1608 return version_tuple(version) < version_tuple(limit)
1609 except ValueError:
1610 return not assume_new
732ea2f0
PH
1611
1612
1613def ytdl_is_updateable():
1614 """ Returns if youtube-dl can be updated with -U """
1615 from zipimport import zipimporter
1616
1617 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1618
1619
1620def args_to_str(args):
1621 # Get a short string representation for a subprocess command
1622 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1623
1624
c460bdd5
PH
1625def mimetype2ext(mt):
1626 _, _, res = mt.rpartition('/')
1627
1628 return {
1629 'x-ms-wmv': 'wmv',
1630 'x-mp4-fragmented': 'mp4',
1631 }.get(res, res)
1632
1633
2ccd1b10
PH
1634def urlhandle_detect_ext(url_handle):
1635 try:
1636 url_handle.headers
1637 getheader = lambda h: url_handle.headers[h]
1638 except AttributeError: # Python < 3
1639 getheader = url_handle.info().getheader
1640
b55ee18f
PH
1641 cd = getheader('Content-Disposition')
1642 if cd:
1643 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1644 if m:
1645 e = determine_ext(m.group('filename'), default_ext=None)
1646 if e:
1647 return e
1648
c460bdd5 1649 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1650
1651
1652def age_restricted(content_limit, age_limit):
1653 """ Returns True iff the content should be blocked """
1654
1655 if age_limit is None: # No limit set
1656 return False
1657 if content_limit is None:
1658 return False # Content available for everyone
1659 return age_limit < content_limit
61ca9a80
PH
1660
1661
1662def is_html(first_bytes):
1663 """ Detect whether a file contains HTML by examining its first bytes. """
1664
1665 BOMS = [
1666 (b'\xef\xbb\xbf', 'utf-8'),
1667 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1668 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1669 (b'\xff\xfe', 'utf-16-le'),
1670 (b'\xfe\xff', 'utf-16-be'),
1671 ]
1672 for bom, enc in BOMS:
1673 if first_bytes.startswith(bom):
1674 s = first_bytes[len(bom):].decode(enc, 'replace')
1675 break
1676 else:
1677 s = first_bytes.decode('utf-8', 'replace')
1678
1679 return re.match(r'^\s*<', s)
a055469f
PH
1680
1681
1682def determine_protocol(info_dict):
1683 protocol = info_dict.get('protocol')
1684 if protocol is not None:
1685 return protocol
1686
1687 url = info_dict['url']
1688 if url.startswith('rtmp'):
1689 return 'rtmp'
1690 elif url.startswith('mms'):
1691 return 'mms'
1692 elif url.startswith('rtsp'):
1693 return 'rtsp'
1694
1695 ext = determine_ext(url)
1696 if ext == 'm3u8':
1697 return 'm3u8'
1698 elif ext == 'f4m':
1699 return 'f4m'
1700
1701 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1702
1703
1704def render_table(header_row, data):
1705 """ Render a list of rows, each as a list of values """
1706 table = [header_row] + data
1707 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1708 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1709 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1710
1711
1712def _match_one(filter_part, dct):
1713 COMPARISON_OPERATORS = {
1714 '<': operator.lt,
1715 '<=': operator.le,
1716 '>': operator.gt,
1717 '>=': operator.ge,
1718 '=': operator.eq,
1719 '!=': operator.ne,
1720 }
1721 operator_rex = re.compile(r'''(?x)\s*
1722 (?P<key>[a-z_]+)
1723 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1724 (?:
1725 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1726 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1727 )
1728 \s*$
1729 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1730 m = operator_rex.search(filter_part)
1731 if m:
1732 op = COMPARISON_OPERATORS[m.group('op')]
1733 if m.group('strval') is not None:
1734 if m.group('op') not in ('=', '!='):
1735 raise ValueError(
1736 'Operator %s does not support string values!' % m.group('op'))
1737 comparison_value = m.group('strval')
1738 else:
1739 try:
1740 comparison_value = int(m.group('intval'))
1741 except ValueError:
1742 comparison_value = parse_filesize(m.group('intval'))
1743 if comparison_value is None:
1744 comparison_value = parse_filesize(m.group('intval') + 'B')
1745 if comparison_value is None:
1746 raise ValueError(
1747 'Invalid integer value %r in filter part %r' % (
1748 m.group('intval'), filter_part))
1749 actual_value = dct.get(m.group('key'))
1750 if actual_value is None:
1751 return m.group('none_inclusive')
1752 return op(actual_value, comparison_value)
1753
1754 UNARY_OPERATORS = {
1755 '': lambda v: v is not None,
1756 '!': lambda v: v is None,
1757 }
1758 operator_rex = re.compile(r'''(?x)\s*
1759 (?P<op>%s)\s*(?P<key>[a-z_]+)
1760 \s*$
1761 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1762 m = operator_rex.search(filter_part)
1763 if m:
1764 op = UNARY_OPERATORS[m.group('op')]
1765 actual_value = dct.get(m.group('key'))
1766 return op(actual_value)
1767
1768 raise ValueError('Invalid filter part %r' % filter_part)
1769
1770
1771def match_str(filter_str, dct):
1772 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1773
1774 return all(
1775 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1776
1777
1778def match_filter_func(filter_str):
1779 def _match_func(info_dict):
1780 if match_str(filter_str, info_dict):
1781 return None
1782 else:
1783 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1784 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1785 return _match_func
91410c9b
PH
1786
1787
1788class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
1789 def __init__(self, proxies=None):
1790 # Set default handlers
1791 for type in ('http', 'https'):
1792 setattr(self, '%s_open' % type,
1793 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1794 meth(r, proxy, type))
1795 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1796
91410c9b 1797 def proxy_open(self, req, proxy, type):
2461f79d 1798 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
1799 if req_proxy is not None:
1800 proxy = req_proxy
2461f79d
PH
1801 del req.headers['Ytdl-request-proxy']
1802
1803 if proxy == '__noproxy__':
1804 return None # No Proxy
91410c9b
PH
1805 return compat_urllib_request.ProxyHandler.proxy_open(
1806 self, req, proxy, type)