]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[cspan] Extract subtitles
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
912b38b4 6import calendar
676eb3f2 7import codecs
62e609ab 8import contextlib
e3946f98 9import ctypes
c496ca96
PH
10import datetime
11import email.utils
f45c185f 12import errno
be4a824d 13import functools
d77c3dfd 14import gzip
b7ab0590 15import itertools
03f9daab 16import io
f4bfd65f 17import json
d77c3dfd 18import locale
02dbf93f 19import math
347de493 20import operator
d77c3dfd 21import os
4eb7f1d1 22import pipes
c496ca96 23import platform
d77c3dfd 24import re
13ebea79 25import ssl
c496ca96 26import socket
b53466e1 27import struct
1c088fa8 28import subprocess
d77c3dfd 29import sys
181c8655 30import tempfile
01951dda 31import traceback
bcf89ce6 32import xml.etree.ElementTree
d77c3dfd 33import zlib
d77c3dfd 34
8c25f81b 35from .compat import (
8f9312c3 36 compat_basestring,
8c25f81b 37 compat_chr,
8c25f81b 38 compat_html_entities,
be4a824d 39 compat_http_client,
8c25f81b 40 compat_parse_qs,
be4a824d 41 compat_socket_create_connection,
8c25f81b
PH
42 compat_str,
43 compat_urllib_error,
44 compat_urllib_parse,
45 compat_urllib_parse_urlparse,
46 compat_urllib_request,
47 compat_urlparse,
7d4111ed 48 shlex_quote,
8c25f81b 49)
4644ac55
S
50
51
468e2e92
FV
52# This is not clearly defined otherwise
53compiled_regex_type = type(re.compile(''))
54
3e669f36 55std_headers = {
18313934 56 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
57 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
58 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59 'Accept-Encoding': 'gzip, deflate',
60 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 61}
f427df17 62
5f6a1245 63
7105440c
YCH
64ENGLISH_MONTH_NAMES = [
65 'January', 'February', 'March', 'April', 'May', 'June',
66 'July', 'August', 'September', 'October', 'November', 'December']
67
68
d77c3dfd 69def preferredencoding():
59ae15a5 70 """Get preferred encoding.
d77c3dfd 71
59ae15a5
PH
72 Returns the best encoding scheme for the system, based on
73 locale.getpreferredencoding() and some further tweaks.
74 """
75 try:
76 pref = locale.getpreferredencoding()
28e614de 77 'TEST'.encode(pref)
70a1165b 78 except Exception:
59ae15a5 79 pref = 'UTF-8'
bae611f2 80
59ae15a5 81 return pref
d77c3dfd 82
f4bfd65f 83
181c8655 84def write_json_file(obj, fn):
1394646a 85 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 86
92120217 87 fn = encodeFilename(fn)
61ee5aeb 88 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
89 encoding = get_filesystem_encoding()
90 # os.path.basename returns a bytes object, but NamedTemporaryFile
91 # will fail if the filename contains non ascii characters unless we
92 # use a unicode object
93 path_basename = lambda f: os.path.basename(fn).decode(encoding)
94 # the same for os.path.dirname
95 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
96 else:
97 path_basename = os.path.basename
98 path_dirname = os.path.dirname
99
73159f99
S
100 args = {
101 'suffix': '.tmp',
ec5f6016
JMF
102 'prefix': path_basename(fn) + '.',
103 'dir': path_dirname(fn),
73159f99
S
104 'delete': False,
105 }
106
181c8655
PH
107 # In Python 2.x, json.dump expects a bytestream.
108 # In Python 3.x, it writes to a character stream
109 if sys.version_info < (3, 0):
73159f99 110 args['mode'] = 'wb'
181c8655 111 else:
73159f99
S
112 args.update({
113 'mode': 'w',
114 'encoding': 'utf-8',
115 })
116
117 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
118
119 try:
120 with tf:
121 json.dump(obj, tf)
1394646a
IK
122 if sys.platform == 'win32':
123 # Need to remove existing file on Windows, else os.rename raises
124 # WindowsError or FileExistsError.
125 try:
126 os.unlink(fn)
127 except OSError:
128 pass
181c8655 129 os.rename(tf.name, fn)
70a1165b 130 except Exception:
181c8655
PH
131 try:
132 os.remove(tf.name)
133 except OSError:
134 pass
135 raise
136
137
138if sys.version_info >= (2, 7):
59ae56fa
PH
139 def find_xpath_attr(node, xpath, key, val):
140 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
141 assert re.match(r'^[a-zA-Z-]+$', key)
142 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
ab4ee31e 143 expr = xpath + "[@%s='%s']" % (key, val)
59ae56fa
PH
144 return node.find(expr)
145else:
146 def find_xpath_attr(node, xpath, key, val):
4eefbfdb
PH
147 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
148 # .//node does not match if a node is a direct child of . !
8f9312c3 149 if isinstance(xpath, compat_str):
4eefbfdb
PH
150 xpath = xpath.encode('ascii')
151
59ae56fa
PH
152 for f in node.findall(xpath):
153 if f.attrib.get(key) == val:
154 return f
155 return None
156
d7e66d39
JMF
157# On python2.6 the xml.etree.ElementTree.Element methods don't support
158# the namespace parameter
5f6a1245
JW
159
160
d7e66d39
JMF
161def xpath_with_ns(path, ns_map):
162 components = [c.split(':') for c in path.split('/')]
163 replaced = []
164 for c in components:
165 if len(c) == 1:
166 replaced.append(c[0])
167 else:
168 ns, tag = c
169 replaced.append('{%s}%s' % (ns_map[ns], tag))
170 return '/'.join(replaced)
171
d77c3dfd 172
bf0ff932 173def xpath_text(node, xpath, name=None, fatal=False):
d74bebd5
PH
174 if sys.version_info < (2, 7): # Crazy 2.6
175 xpath = xpath.encode('ascii')
176
bf0ff932 177 n = node.find(xpath)
42bdd9d0 178 if n is None or n.text is None:
bf0ff932
PH
179 if fatal:
180 name = xpath if name is None else name
181 raise ExtractorError('Could not find XML element %s' % name)
182 else:
183 return None
184 return n.text
185
186
9e6dd238 187def get_element_by_id(id, html):
43e8fafd
ND
188 """Return the content of the tag with the specified ID in the passed HTML document"""
189 return get_element_by_attribute("id", id, html)
190
12ea2f30 191
43e8fafd
ND
192def get_element_by_attribute(attribute, value, html):
193 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 194
38285056
PH
195 m = re.search(r'''(?xs)
196 <([a-zA-Z0-9:._-]+)
197 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
198 \s+%s=['"]?%s['"]?
199 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 \s*>
201 (?P<content>.*?)
202 </\1>
203 ''' % (re.escape(attribute), re.escape(value)), html)
204
205 if not m:
206 return None
207 res = m.group('content')
208
209 if res.startswith('"') or res.startswith("'"):
210 res = res[1:-1]
a921f407 211
38285056 212 return unescapeHTML(res)
a921f407 213
9e6dd238
FV
214
215def clean_html(html):
59ae15a5 216 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
217
218 if html is None: # Convenience for sanitizing descriptions etc.
219 return html
220
59ae15a5
PH
221 # Newline vs <br />
222 html = html.replace('\n', ' ')
6b3aef80
FV
223 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
224 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
225 # Strip html tags
226 html = re.sub('<.*?>', '', html)
227 # Replace html entities
228 html = unescapeHTML(html)
7decf895 229 return html.strip()
9e6dd238
FV
230
231
d77c3dfd 232def sanitize_open(filename, open_mode):
59ae15a5
PH
233 """Try to open the given filename, and slightly tweak it if this fails.
234
235 Attempts to open the given filename. If this fails, it tries to change
236 the filename slightly, step by step, until it's either able to open it
237 or it fails and raises a final exception, like the standard open()
238 function.
239
240 It returns the tuple (stream, definitive_file_name).
241 """
242 try:
28e614de 243 if filename == '-':
59ae15a5
PH
244 if sys.platform == 'win32':
245 import msvcrt
246 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 247 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
248 stream = open(encodeFilename(filename), open_mode)
249 return (stream, filename)
250 except (IOError, OSError) as err:
f45c185f
PH
251 if err.errno in (errno.EACCES,):
252 raise
59ae15a5 253
f45c185f 254 # In case of error, try to remove win32 forbidden chars
d55de57b 255 alt_filename = sanitize_path(filename)
f45c185f
PH
256 if alt_filename == filename:
257 raise
258 else:
259 # An exception here should be caught in the caller
d55de57b 260 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 261 return (stream, alt_filename)
d77c3dfd
FV
262
263
264def timeconvert(timestr):
59ae15a5
PH
265 """Convert RFC 2822 defined time string into system timestamp"""
266 timestamp = None
267 timetuple = email.utils.parsedate_tz(timestr)
268 if timetuple is not None:
269 timestamp = email.utils.mktime_tz(timetuple)
270 return timestamp
1c469a94 271
5f6a1245 272
796173d0 273def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
274 """Sanitizes a string so it could be used as part of a filename.
275 If restricted is set, use a stricter subset of allowed characters.
796173d0 276 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
277 """
278 def replace_insane(char):
279 if char == '?' or ord(char) < 32 or ord(char) == 127:
280 return ''
281 elif char == '"':
282 return '' if restricted else '\''
283 elif char == ':':
284 return '_-' if restricted else ' -'
285 elif char in '\\/|*<>':
286 return '_'
627dcfff 287 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
288 return '_'
289 if restricted and ord(char) > 127:
290 return '_'
291 return char
292
2aeb06d6
PH
293 # Handle timestamps
294 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 295 result = ''.join(map(replace_insane, s))
796173d0
PH
296 if not is_id:
297 while '__' in result:
298 result = result.replace('__', '_')
299 result = result.strip('_')
300 # Common case of "Foreign band name - English song title"
301 if restricted and result.startswith('-_'):
302 result = result[2:]
5a42414b
PH
303 if result.startswith('-'):
304 result = '_' + result[len('-'):]
a7440261 305 result = result.lstrip('.')
796173d0
PH
306 if not result:
307 result = '_'
59ae15a5 308 return result
d77c3dfd 309
5f6a1245 310
a2aaf4db
S
311def sanitize_path(s):
312 """Sanitizes and normalizes path on Windows"""
313 if sys.platform != 'win32':
314 return s
be531ef1
S
315 drive_or_unc, _ = os.path.splitdrive(s)
316 if sys.version_info < (2, 7) and not drive_or_unc:
317 drive_or_unc, _ = os.path.splitunc(s)
318 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
319 if drive_or_unc:
a2aaf4db
S
320 norm_path.pop(0)
321 sanitized_path = [
2ebfeaca 322 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db 323 for path_part in norm_path]
be531ef1
S
324 if drive_or_unc:
325 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
326 return os.path.join(*sanitized_path)
327
328
92a4793b
S
329def sanitize_url_path_consecutive_slashes(url):
330 """Collapses consecutive slashes in URLs' path"""
331 parsed_url = list(compat_urlparse.urlparse(url))
332 parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
333 return compat_urlparse.urlunparse(parsed_url)
334
335
d77c3dfd 336def orderedSet(iterable):
59ae15a5
PH
337 """ Remove all duplicates from the input iterable """
338 res = []
339 for el in iterable:
340 if el not in res:
341 res.append(el)
342 return res
d77c3dfd 343
912b38b4 344
4e408e47
PH
345def _htmlentity_transform(entity):
346 """Transforms an HTML entity to a character."""
347 # Known non-numeric HTML entity
348 if entity in compat_html_entities.name2codepoint:
349 return compat_chr(compat_html_entities.name2codepoint[entity])
350
91757b0f 351 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
352 if mobj is not None:
353 numstr = mobj.group(1)
28e614de 354 if numstr.startswith('x'):
4e408e47 355 base = 16
28e614de 356 numstr = '0%s' % numstr
4e408e47
PH
357 else:
358 base = 10
359 return compat_chr(int(numstr, base))
360
361 # Unknown entity in name, return its literal representation
28e614de 362 return ('&%s;' % entity)
4e408e47
PH
363
364
d77c3dfd 365def unescapeHTML(s):
912b38b4
PH
366 if s is None:
367 return None
368 assert type(s) == compat_str
d77c3dfd 369
4e408e47
PH
370 return re.sub(
371 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 372
8bf48f23
PH
373
374def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
375 """
376 @param s The name of the file
377 """
d77c3dfd 378
8bf48f23 379 assert type(s) == compat_str
d77c3dfd 380
59ae15a5
PH
381 # Python 3 has a Unicode API
382 if sys.version_info >= (3, 0):
383 return s
0f00efed 384
59ae15a5 385 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
28e614de 386 # Pass '' directly to use Unicode APIs on Windows 2000 and up
59ae15a5
PH
387 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
388 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
389 if not for_subprocess:
390 return s
391 else:
392 # For subprocess calls, encode with locale encoding
393 # Refer to http://stackoverflow.com/a/9951851/35070
394 encoding = preferredencoding()
59ae15a5 395 else:
6df40dcb 396 encoding = sys.getfilesystemencoding()
8bf48f23
PH
397 if encoding is None:
398 encoding = 'utf-8'
399 return s.encode(encoding, 'ignore')
400
f07b74fc
PH
401
402def encodeArgument(s):
403 if not isinstance(s, compat_str):
404 # Legacy code that uses byte strings
405 # Uncomment the following line after fixing all post processors
7af808a5 406 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
407 s = s.decode('ascii')
408 return encodeFilename(s, True)
409
410
8271226a
PH
411def decodeOption(optval):
412 if optval is None:
413 return optval
414 if isinstance(optval, bytes):
415 optval = optval.decode(preferredencoding())
416
417 assert isinstance(optval, compat_str)
418 return optval
1c256f70 419
5f6a1245 420
4539dd30
PH
421def formatSeconds(secs):
422 if secs > 3600:
423 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
424 elif secs > 60:
425 return '%d:%02d' % (secs // 60, secs % 60)
426 else:
427 return '%d' % secs
428
a0ddb8a2 429
be4a824d
PH
430def make_HTTPS_handler(params, **kwargs):
431 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 432 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 433 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 434 if opts_no_check_certificate:
be5f2c19 435 context.check_hostname = False
0db261ba 436 context.verify_mode = ssl.CERT_NONE
a2366922 437 try:
be4a824d 438 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
439 except TypeError:
440 # Python 2.7.8
441 # (create_default_context present but HTTPSHandler has no context=)
442 pass
443
444 if sys.version_info < (3, 2):
d7932313 445 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 446 else: # Python < 3.4
d7932313 447 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 448 context.verify_mode = (ssl.CERT_NONE
dca08720 449 if opts_no_check_certificate
ea6d901e 450 else ssl.CERT_REQUIRED)
303b479e 451 context.set_default_verify_paths()
be4a824d 452 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 453
732ea2f0 454
08f2a92c
JMF
455def bug_reports_message():
456 if ytdl_is_updateable():
457 update_cmd = 'type youtube-dl -U to update'
458 else:
459 update_cmd = 'see https://yt-dl.org/update on how to update'
460 msg = '; please report this issue on https://yt-dl.org/bug .'
461 msg += ' Make sure you are using the latest version; %s.' % update_cmd
462 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
463 return msg
464
465
1c256f70
PH
466class ExtractorError(Exception):
467 """Error during info extraction."""
5f6a1245 468
d11271dd 469 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
470 """ tb, if given, is the original traceback (so that it can be printed out).
471 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
472 """
473
474 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
475 expected = True
d11271dd
PH
476 if video_id is not None:
477 msg = video_id + ': ' + msg
410f3e73 478 if cause:
28e614de 479 msg += ' (caused by %r)' % cause
9a82b238 480 if not expected:
08f2a92c 481 msg += bug_reports_message()
1c256f70 482 super(ExtractorError, self).__init__(msg)
d5979c5d 483
1c256f70 484 self.traceback = tb
8cc83b8d 485 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 486 self.cause = cause
d11271dd 487 self.video_id = video_id
1c256f70 488
01951dda
PH
489 def format_traceback(self):
490 if self.traceback is None:
491 return None
28e614de 492 return ''.join(traceback.format_tb(self.traceback))
01951dda 493
1c256f70 494
416c7fcb
PH
495class UnsupportedError(ExtractorError):
496 def __init__(self, url):
497 super(UnsupportedError, self).__init__(
498 'Unsupported URL: %s' % url, expected=True)
499 self.url = url
500
501
55b3e45b
JMF
502class RegexNotFoundError(ExtractorError):
503 """Error when a regex didn't match"""
504 pass
505
506
d77c3dfd 507class DownloadError(Exception):
59ae15a5 508 """Download Error exception.
d77c3dfd 509
59ae15a5
PH
510 This exception may be thrown by FileDownloader objects if they are not
511 configured to continue on errors. They will contain the appropriate
512 error message.
513 """
5f6a1245 514
8cc83b8d
FV
515 def __init__(self, msg, exc_info=None):
516 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
517 super(DownloadError, self).__init__(msg)
518 self.exc_info = exc_info
d77c3dfd
FV
519
520
521class SameFileError(Exception):
59ae15a5 522 """Same File exception.
d77c3dfd 523
59ae15a5
PH
524 This exception will be thrown by FileDownloader objects if they detect
525 multiple files would have to be downloaded to the same file on disk.
526 """
527 pass
d77c3dfd
FV
528
529
530class PostProcessingError(Exception):
59ae15a5 531 """Post Processing exception.
d77c3dfd 532
59ae15a5
PH
533 This exception may be raised by PostProcessor's .run() method to
534 indicate an error in the postprocessing task.
535 """
5f6a1245 536
7851b379
PH
537 def __init__(self, msg):
538 self.msg = msg
d77c3dfd 539
5f6a1245 540
d77c3dfd 541class MaxDownloadsReached(Exception):
59ae15a5
PH
542 """ --max-downloads limit has been reached. """
543 pass
d77c3dfd
FV
544
545
546class UnavailableVideoError(Exception):
59ae15a5 547 """Unavailable Format exception.
d77c3dfd 548
59ae15a5
PH
549 This exception will be thrown when a video is requested
550 in a format that is not available for that video.
551 """
552 pass
d77c3dfd
FV
553
554
555class ContentTooShortError(Exception):
59ae15a5 556 """Content Too Short exception.
d77c3dfd 557
59ae15a5
PH
558 This exception may be raised by FileDownloader objects when a file they
559 download is too small for what the server announced first, indicating
560 the connection was probably interrupted.
561 """
562 # Both in bytes
563 downloaded = None
564 expected = None
d77c3dfd 565
59ae15a5
PH
566 def __init__(self, downloaded, expected):
567 self.downloaded = downloaded
568 self.expected = expected
d77c3dfd 569
5f6a1245 570
c5a59d93 571def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
be4a824d
PH
572 hc = http_class(*args, **kwargs)
573 source_address = ydl_handler._params.get('source_address')
574 if source_address is not None:
575 sa = (source_address, 0)
576 if hasattr(hc, 'source_address'): # Python 2.7+
577 hc.source_address = sa
578 else: # Python 2.6
579 def _hc_connect(self, *args, **kwargs):
580 sock = compat_socket_create_connection(
581 (self.host, self.port), self.timeout, sa)
582 if is_https:
d7932313
PH
583 self.sock = ssl.wrap_socket(
584 sock, self.key_file, self.cert_file,
585 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
586 else:
587 self.sock = sock
588 hc.connect = functools.partial(_hc_connect, hc)
589
590 return hc
591
592
acebc9cd 593class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
594 """Handler for HTTP requests and responses.
595
596 This class, when installed with an OpenerDirector, automatically adds
597 the standard headers to every HTTP request and handles gzipped and
598 deflated responses from web servers. If compression is to be avoided in
599 a particular request, the original request in the program code only has
600 to include the HTTP header "Youtubedl-No-Compression", which will be
601 removed before making the real request.
602
603 Part of this code was copied from:
604
605 http://techknack.net/python-urllib2-handlers/
606
607 Andrew Rowls, the author of that code, agreed to release it to the
608 public domain.
609 """
610
be4a824d
PH
611 def __init__(self, params, *args, **kwargs):
612 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
613 self._params = params
614
615 def http_open(self, req):
616 return self.do_open(functools.partial(
c5a59d93 617 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
618 req)
619
59ae15a5
PH
620 @staticmethod
621 def deflate(data):
622 try:
623 return zlib.decompress(data, -zlib.MAX_WBITS)
624 except zlib.error:
625 return zlib.decompress(data)
626
627 @staticmethod
628 def addinfourl_wrapper(stream, headers, url, code):
629 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
630 return compat_urllib_request.addinfourl(stream, headers, url, code)
631 ret = compat_urllib_request.addinfourl(stream, headers, url)
632 ret.code = code
633 return ret
634
acebc9cd 635 def http_request(self, req):
33ac271b 636 for h, v in std_headers.items():
3d5f7a39
JK
637 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
638 # The dict keys are capitalized because of this bug by urllib
639 if h.capitalize() not in req.headers:
33ac271b 640 req.add_header(h, v)
59ae15a5
PH
641 if 'Youtubedl-no-compression' in req.headers:
642 if 'Accept-encoding' in req.headers:
643 del req.headers['Accept-encoding']
644 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
645
646 if sys.version_info < (2, 7) and '#' in req.get_full_url():
647 # Python 2.6 is brain-dead when it comes to fragments
648 req._Request__original = req._Request__original.partition('#')[0]
649 req._Request__r_type = req._Request__r_type.partition('#')[0]
650
59ae15a5
PH
651 return req
652
acebc9cd 653 def http_response(self, req, resp):
59ae15a5
PH
654 old_resp = resp
655 # gzip
656 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
657 content = resp.read()
658 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
659 try:
660 uncompressed = io.BytesIO(gz.read())
661 except IOError as original_ioerror:
662 # There may be junk add the end of the file
663 # See http://stackoverflow.com/q/4928560/35070 for details
664 for i in range(1, 1024):
665 try:
666 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
667 uncompressed = io.BytesIO(gz.read())
668 except IOError:
669 continue
670 break
671 else:
672 raise original_ioerror
673 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
674 resp.msg = old_resp.msg
675 # deflate
676 if resp.headers.get('Content-encoding', '') == 'deflate':
677 gz = io.BytesIO(self.deflate(resp.read()))
678 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
679 resp.msg = old_resp.msg
680 return resp
0f8d03f8 681
acebc9cd
PH
682 https_request = http_request
683 https_response = http_response
bf50b038 684
5de90176 685
be4a824d
PH
686class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
687 def __init__(self, params, https_conn_class=None, *args, **kwargs):
688 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
689 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
690 self._params = params
691
692 def https_open(self, req):
4f264c02
JMF
693 kwargs = {}
694 if hasattr(self, '_context'): # python > 2.6
695 kwargs['context'] = self._context
696 if hasattr(self, '_check_hostname'): # python 3.x
697 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
698 return self.do_open(functools.partial(
699 _create_http_connection, self, self._https_conn_class, True),
4f264c02 700 req, **kwargs)
be4a824d
PH
701
702
08b38d54 703def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
704 """ Return a UNIX timestamp from the given date """
705
706 if date_str is None:
707 return None
708
08b38d54
PH
709 if timezone is None:
710 m = re.search(
711 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
712 date_str)
713 if not m:
912b38b4
PH
714 timezone = datetime.timedelta()
715 else:
08b38d54
PH
716 date_str = date_str[:-len(m.group(0))]
717 if not m.group('sign'):
718 timezone = datetime.timedelta()
719 else:
720 sign = 1 if m.group('sign') == '+' else -1
721 timezone = datetime.timedelta(
722 hours=sign * int(m.group('hours')),
723 minutes=sign * int(m.group('minutes')))
6ad4013d 724 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 725 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
726 return calendar.timegm(dt.timetuple())
727
728
42bdd9d0 729def unified_strdate(date_str, day_first=True):
bf50b038 730 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
731
732 if date_str is None:
733 return None
bf50b038 734 upload_date = None
5f6a1245 735 # Replace commas
026fcc04 736 date_str = date_str.replace(',', ' ')
bf50b038 737 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
738 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
739 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 740 # Remove AM/PM + timezone
9bb8e0a3 741 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 742
19e1d359
JMF
743 format_expressions = [
744 '%d %B %Y',
0f99566c 745 '%d %b %Y',
19e1d359
JMF
746 '%B %d %Y',
747 '%b %d %Y',
78ff59d0
PP
748 '%b %dst %Y %I:%M%p',
749 '%b %dnd %Y %I:%M%p',
750 '%b %dth %Y %I:%M%p',
a69801e2 751 '%Y %m %d',
19e1d359 752 '%Y-%m-%d',
fe556f1b 753 '%Y/%m/%d',
19e1d359 754 '%Y/%m/%d %H:%M:%S',
5d73273f 755 '%Y-%m-%d %H:%M:%S',
e9be9a6a 756 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 757 '%d.%m.%Y %H:%M',
b047de6f 758 '%d.%m.%Y %H.%M',
19e1d359 759 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
760 '%Y-%m-%dT%H:%M:%S.%fZ',
761 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 762 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 763 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 764 '%Y-%m-%dT%H:%M',
19e1d359 765 ]
42bdd9d0
PH
766 if day_first:
767 format_expressions.extend([
79c21abb 768 '%d-%m-%Y',
776dc399
S
769 '%d.%m.%Y',
770 '%d/%m/%Y',
771 '%d/%m/%y',
42bdd9d0
PH
772 '%d/%m/%Y %H:%M:%S',
773 ])
774 else:
775 format_expressions.extend([
79c21abb 776 '%m-%d-%Y',
776dc399
S
777 '%m.%d.%Y',
778 '%m/%d/%Y',
779 '%m/%d/%y',
42bdd9d0
PH
780 '%m/%d/%Y %H:%M:%S',
781 ])
bf50b038
JMF
782 for expression in format_expressions:
783 try:
784 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 785 except ValueError:
bf50b038 786 pass
42393ce2
PH
787 if upload_date is None:
788 timetuple = email.utils.parsedate_tz(date_str)
789 if timetuple:
790 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
791 return upload_date
792
5f6a1245 793
28e614de 794def determine_ext(url, default_ext='unknown_video'):
f4776371
S
795 if url is None:
796 return default_ext
28e614de 797 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
798 if re.match(r'^[A-Za-z0-9]+$', guess):
799 return guess
800 else:
cbdbb766 801 return default_ext
73e79f2a 802
5f6a1245 803
d4051a8e 804def subtitles_filename(filename, sub_lang, sub_format):
28e614de 805 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 806
5f6a1245 807
bd558525 808def date_from_str(date_str):
37254abc
JMF
809 """
810 Return a datetime object from a string in the format YYYYMMDD or
811 (now|today)[+-][0-9](day|week|month|year)(s)?"""
812 today = datetime.date.today()
f8795e10 813 if date_str in ('now', 'today'):
37254abc 814 return today
f8795e10
PH
815 if date_str == 'yesterday':
816 return today - datetime.timedelta(days=1)
37254abc
JMF
817 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
818 if match is not None:
819 sign = match.group('sign')
820 time = int(match.group('time'))
821 if sign == '-':
822 time = -time
823 unit = match.group('unit')
5f6a1245 824 # A bad aproximation?
37254abc
JMF
825 if unit == 'month':
826 unit = 'day'
827 time *= 30
828 elif unit == 'year':
829 unit = 'day'
830 time *= 365
831 unit += 's'
832 delta = datetime.timedelta(**{unit: time})
833 return today + delta
bd558525 834 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
835
836
e63fc1be 837def hyphenate_date(date_str):
838 """
839 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
840 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
841 if match is not None:
842 return '-'.join(match.groups())
843 else:
844 return date_str
845
5f6a1245 846
bd558525
JMF
847class DateRange(object):
848 """Represents a time interval between two dates"""
5f6a1245 849
bd558525
JMF
850 def __init__(self, start=None, end=None):
851 """start and end must be strings in the format accepted by date"""
852 if start is not None:
853 self.start = date_from_str(start)
854 else:
855 self.start = datetime.datetime.min.date()
856 if end is not None:
857 self.end = date_from_str(end)
858 else:
859 self.end = datetime.datetime.max.date()
37254abc 860 if self.start > self.end:
bd558525 861 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 862
bd558525
JMF
863 @classmethod
864 def day(cls, day):
865 """Returns a range that only contains the given day"""
5f6a1245
JW
866 return cls(day, day)
867
bd558525
JMF
868 def __contains__(self, date):
869 """Check if the date is in the range"""
37254abc
JMF
870 if not isinstance(date, datetime.date):
871 date = date_from_str(date)
872 return self.start <= date <= self.end
5f6a1245 873
bd558525 874 def __str__(self):
5f6a1245 875 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
876
877
878def platform_name():
879 """ Returns the platform name as a compat_str """
880 res = platform.platform()
881 if isinstance(res, bytes):
882 res = res.decode(preferredencoding())
883
884 assert isinstance(res, compat_str)
885 return res
c257baff
PH
886
887
b58ddb32
PH
888def _windows_write_string(s, out):
889 """ Returns True if the string was written using special methods,
890 False if it has yet to be written out."""
891 # Adapted from http://stackoverflow.com/a/3259271/35070
892
893 import ctypes
894 import ctypes.wintypes
895
896 WIN_OUTPUT_IDS = {
897 1: -11,
898 2: -12,
899 }
900
a383a98a
PH
901 try:
902 fileno = out.fileno()
903 except AttributeError:
904 # If the output stream doesn't have a fileno, it's virtual
905 return False
aa42e873
PH
906 except io.UnsupportedOperation:
907 # Some strange Windows pseudo files?
908 return False
b58ddb32
PH
909 if fileno not in WIN_OUTPUT_IDS:
910 return False
911
e2f89ec7 912 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 913 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 914 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
915 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
916
e2f89ec7 917 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
918 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
919 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 920 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
921 written = ctypes.wintypes.DWORD(0)
922
6ac4e806 923 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
924 FILE_TYPE_CHAR = 0x0002
925 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 926 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
927 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
928 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 929 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
930 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
931
932 def not_a_console(handle):
933 if handle == INVALID_HANDLE_VALUE or handle is None:
934 return True
8fb3ac36
PH
935 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
936 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
937
938 if not_a_console(h):
939 return False
940
d1b9c912
PH
941 def next_nonbmp_pos(s):
942 try:
943 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
944 except StopIteration:
945 return len(s)
946
947 while s:
948 count = min(next_nonbmp_pos(s), 1024)
949
b58ddb32 950 ret = WriteConsoleW(
d1b9c912 951 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
952 if ret == 0:
953 raise OSError('Failed to write string')
d1b9c912
PH
954 if not count: # We just wrote a non-BMP character
955 assert written.value == 2
956 s = s[1:]
957 else:
958 assert written.value > 0
959 s = s[written.value:]
b58ddb32
PH
960 return True
961
962
734f90bb 963def write_string(s, out=None, encoding=None):
7459e3a2
PH
964 if out is None:
965 out = sys.stderr
8bf48f23 966 assert type(s) == compat_str
7459e3a2 967
b58ddb32
PH
968 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
969 if _windows_write_string(s, out):
970 return
971
7459e3a2
PH
972 if ('b' in getattr(out, 'mode', '') or
973 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
974 byt = s.encode(encoding or preferredencoding(), 'ignore')
975 out.write(byt)
976 elif hasattr(out, 'buffer'):
977 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
978 byt = s.encode(enc, 'ignore')
979 out.buffer.write(byt)
980 else:
8bf48f23 981 out.write(s)
7459e3a2
PH
982 out.flush()
983
984
48ea9cea
PH
985def bytes_to_intlist(bs):
986 if not bs:
987 return []
988 if isinstance(bs[0], int): # Python 3
989 return list(bs)
990 else:
991 return [ord(c) for c in bs]
992
c257baff 993
cba892fa 994def intlist_to_bytes(xs):
995 if not xs:
996 return b''
eb4157fd 997 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
998
999
c1c9a79c
PH
1000# Cross-platform file locking
1001if sys.platform == 'win32':
1002 import ctypes.wintypes
1003 import msvcrt
1004
1005 class OVERLAPPED(ctypes.Structure):
1006 _fields_ = [
1007 ('Internal', ctypes.wintypes.LPVOID),
1008 ('InternalHigh', ctypes.wintypes.LPVOID),
1009 ('Offset', ctypes.wintypes.DWORD),
1010 ('OffsetHigh', ctypes.wintypes.DWORD),
1011 ('hEvent', ctypes.wintypes.HANDLE),
1012 ]
1013
1014 kernel32 = ctypes.windll.kernel32
1015 LockFileEx = kernel32.LockFileEx
1016 LockFileEx.argtypes = [
1017 ctypes.wintypes.HANDLE, # hFile
1018 ctypes.wintypes.DWORD, # dwFlags
1019 ctypes.wintypes.DWORD, # dwReserved
1020 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1021 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1022 ctypes.POINTER(OVERLAPPED) # Overlapped
1023 ]
1024 LockFileEx.restype = ctypes.wintypes.BOOL
1025 UnlockFileEx = kernel32.UnlockFileEx
1026 UnlockFileEx.argtypes = [
1027 ctypes.wintypes.HANDLE, # hFile
1028 ctypes.wintypes.DWORD, # dwReserved
1029 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1030 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1031 ctypes.POINTER(OVERLAPPED) # Overlapped
1032 ]
1033 UnlockFileEx.restype = ctypes.wintypes.BOOL
1034 whole_low = 0xffffffff
1035 whole_high = 0x7fffffff
1036
1037 def _lock_file(f, exclusive):
1038 overlapped = OVERLAPPED()
1039 overlapped.Offset = 0
1040 overlapped.OffsetHigh = 0
1041 overlapped.hEvent = 0
1042 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1043 handle = msvcrt.get_osfhandle(f.fileno())
1044 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1045 whole_low, whole_high, f._lock_file_overlapped_p):
1046 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1047
1048 def _unlock_file(f):
1049 assert f._lock_file_overlapped_p
1050 handle = msvcrt.get_osfhandle(f.fileno())
1051 if not UnlockFileEx(handle, 0,
1052 whole_low, whole_high, f._lock_file_overlapped_p):
1053 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1054
1055else:
1056 import fcntl
1057
1058 def _lock_file(f, exclusive):
2582bebe 1059 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1060
1061 def _unlock_file(f):
2582bebe 1062 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1063
1064
1065class locked_file(object):
1066 def __init__(self, filename, mode, encoding=None):
1067 assert mode in ['r', 'a', 'w']
1068 self.f = io.open(filename, mode, encoding=encoding)
1069 self.mode = mode
1070
1071 def __enter__(self):
1072 exclusive = self.mode != 'r'
1073 try:
1074 _lock_file(self.f, exclusive)
1075 except IOError:
1076 self.f.close()
1077 raise
1078 return self
1079
1080 def __exit__(self, etype, value, traceback):
1081 try:
1082 _unlock_file(self.f)
1083 finally:
1084 self.f.close()
1085
1086 def __iter__(self):
1087 return iter(self.f)
1088
1089 def write(self, *args):
1090 return self.f.write(*args)
1091
1092 def read(self, *args):
1093 return self.f.read(*args)
4eb7f1d1
JMF
1094
1095
4644ac55
S
1096def get_filesystem_encoding():
1097 encoding = sys.getfilesystemencoding()
1098 return encoding if encoding is not None else 'utf-8'
1099
1100
4eb7f1d1 1101def shell_quote(args):
a6a173c2 1102 quoted_args = []
4644ac55 1103 encoding = get_filesystem_encoding()
a6a173c2
JMF
1104 for a in args:
1105 if isinstance(a, bytes):
1106 # We may get a filename encoded with 'encodeFilename'
1107 a = a.decode(encoding)
1108 quoted_args.append(pipes.quote(a))
28e614de 1109 return ' '.join(quoted_args)
9d4660ca
PH
1110
1111
f4d96df0
PH
1112def takewhile_inclusive(pred, seq):
1113 """ Like itertools.takewhile, but include the latest evaluated element
1114 (the first element so that Not pred(e)) """
1115 for e in seq:
1116 yield e
1117 if not pred(e):
1118 return
1119
1120
9d4660ca
PH
1121def smuggle_url(url, data):
1122 """ Pass additional data in a URL for internal use. """
1123
1124 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1125 {'__youtubedl_smuggle': json.dumps(data)})
1126 return url + '#' + sdata
9d4660ca
PH
1127
1128
79f82953 1129def unsmuggle_url(smug_url, default=None):
83e865a3 1130 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1131 return smug_url, default
28e614de
PH
1132 url, _, sdata = smug_url.rpartition('#')
1133 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1134 data = json.loads(jsond)
1135 return url, data
02dbf93f
PH
1136
1137
02dbf93f
PH
1138def format_bytes(bytes):
1139 if bytes is None:
28e614de 1140 return 'N/A'
02dbf93f
PH
1141 if type(bytes) is str:
1142 bytes = float(bytes)
1143 if bytes == 0.0:
1144 exponent = 0
1145 else:
1146 exponent = int(math.log(bytes, 1024.0))
28e614de 1147 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1148 converted = float(bytes) / float(1024 ** exponent)
28e614de 1149 return '%.2f%s' % (converted, suffix)
f53c966a 1150
1c088fa8 1151
be64b5b0
PH
1152def parse_filesize(s):
1153 if s is None:
1154 return None
1155
1156 # The lower-case forms are of course incorrect and inofficial,
1157 # but we support those too
1158 _UNIT_TABLE = {
1159 'B': 1,
1160 'b': 1,
1161 'KiB': 1024,
1162 'KB': 1000,
1163 'kB': 1024,
1164 'Kb': 1000,
1165 'MiB': 1024 ** 2,
1166 'MB': 1000 ** 2,
1167 'mB': 1024 ** 2,
1168 'Mb': 1000 ** 2,
1169 'GiB': 1024 ** 3,
1170 'GB': 1000 ** 3,
1171 'gB': 1024 ** 3,
1172 'Gb': 1000 ** 3,
1173 'TiB': 1024 ** 4,
1174 'TB': 1000 ** 4,
1175 'tB': 1024 ** 4,
1176 'Tb': 1000 ** 4,
1177 'PiB': 1024 ** 5,
1178 'PB': 1000 ** 5,
1179 'pB': 1024 ** 5,
1180 'Pb': 1000 ** 5,
1181 'EiB': 1024 ** 6,
1182 'EB': 1000 ** 6,
1183 'eB': 1024 ** 6,
1184 'Eb': 1000 ** 6,
1185 'ZiB': 1024 ** 7,
1186 'ZB': 1000 ** 7,
1187 'zB': 1024 ** 7,
1188 'Zb': 1000 ** 7,
1189 'YiB': 1024 ** 8,
1190 'YB': 1000 ** 8,
1191 'yB': 1024 ** 8,
1192 'Yb': 1000 ** 8,
1193 }
1194
1195 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1196 m = re.match(
1197 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1198 if not m:
1199 return None
1200
4349c07d
PH
1201 num_str = m.group('num').replace(',', '.')
1202 mult = _UNIT_TABLE[m.group('unit')]
1203 return int(float(num_str) * mult)
be64b5b0
PH
1204
1205
caefb1de
PH
1206def month_by_name(name):
1207 """ Return the number of a month by (locale-independently) English name """
1208
caefb1de 1209 try:
7105440c
YCH
1210 return ENGLISH_MONTH_NAMES.index(name) + 1
1211 except ValueError:
1212 return None
1213
1214
1215def month_by_abbreviation(abbrev):
1216 """ Return the number of a month by (locale-independently) English
1217 abbreviations """
1218
1219 try:
1220 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1221 except ValueError:
1222 return None
18258362
JMF
1223
1224
5aafe895 1225def fix_xml_ampersands(xml_str):
18258362 1226 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1227 return re.sub(
1228 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1229 '&amp;',
5aafe895 1230 xml_str)
e3946f98
PH
1231
1232
1233def setproctitle(title):
8bf48f23 1234 assert isinstance(title, compat_str)
e3946f98
PH
1235 try:
1236 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1237 except OSError:
1238 return
6eefe533
PH
1239 title_bytes = title.encode('utf-8')
1240 buf = ctypes.create_string_buffer(len(title_bytes))
1241 buf.value = title_bytes
e3946f98 1242 try:
6eefe533 1243 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1244 except AttributeError:
1245 return # Strange libc, just skip this
d7dda168
PH
1246
1247
1248def remove_start(s, start):
1249 if s.startswith(start):
1250 return s[len(start):]
1251 return s
29eb5174
PH
1252
1253
2b9faf55
PH
1254def remove_end(s, end):
1255 if s.endswith(end):
1256 return s[:-len(end)]
1257 return s
1258
1259
29eb5174 1260def url_basename(url):
9b8aaeed 1261 path = compat_urlparse.urlparse(url).path
28e614de 1262 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1263
1264
1265class HEADRequest(compat_urllib_request.Request):
1266 def get_method(self):
1267 return "HEAD"
7217e148
PH
1268
1269
9732d77e 1270def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1271 if get_attr:
1272 if v is not None:
1273 v = getattr(v, get_attr, None)
9572013d
PH
1274 if v == '':
1275 v = None
9732d77e
PH
1276 return default if v is None else (int(v) * invscale // scale)
1277
9572013d 1278
40a90862
JMF
1279def str_or_none(v, default=None):
1280 return default if v is None else compat_str(v)
1281
9732d77e
PH
1282
1283def str_to_int(int_str):
48d4681e 1284 """ A more relaxed version of int_or_none """
9732d77e
PH
1285 if int_str is None:
1286 return None
28e614de 1287 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1288 return int(int_str)
608d11f5
PH
1289
1290
9732d77e
PH
1291def float_or_none(v, scale=1, invscale=1, default=None):
1292 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1293
1294
608d11f5 1295def parse_duration(s):
8f9312c3 1296 if not isinstance(s, compat_basestring):
608d11f5
PH
1297 return None
1298
ca7b3246
S
1299 s = s.strip()
1300
608d11f5 1301 m = re.match(
9d22a7df 1302 r'''(?ix)(?:P?T)?
e8df5cee
PH
1303 (?:
1304 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1305 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1306
3e675fab 1307 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
6a68bb57 1308 (?:
8f4b58d7
PH
1309 (?:
1310 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1311 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1312 )?
6a68bb57
PH
1313 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1314 )?
e8df5cee
PH
1315 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1316 )$''', s)
608d11f5
PH
1317 if not m:
1318 return None
e8df5cee
PH
1319 res = 0
1320 if m.group('only_mins'):
1321 return float_or_none(m.group('only_mins'), invscale=60)
1322 if m.group('only_hours'):
1323 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1324 if m.group('secs'):
1325 res += int(m.group('secs'))
3e675fab
PH
1326 if m.group('mins_reversed'):
1327 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1328 if m.group('mins'):
1329 res += int(m.group('mins')) * 60
e8df5cee
PH
1330 if m.group('hours'):
1331 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1332 if m.group('hours_reversed'):
1333 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1334 if m.group('days'):
1335 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1336 if m.group('ms'):
1337 res += float(m.group('ms'))
608d11f5 1338 return res
91d7d0b3
JMF
1339
1340
1341def prepend_extension(filename, ext):
5f6a1245 1342 name, real_ext = os.path.splitext(filename)
28e614de 1343 return '{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1344
1345
1346def check_executable(exe, args=[]):
1347 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1348 args can be a list of arguments for a short output (like -version) """
1349 try:
1350 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1351 except OSError:
1352 return False
1353 return exe
b7ab0590
PH
1354
1355
95807118 1356def get_exe_version(exe, args=['--version'],
cae97f65 1357 version_re=None, unrecognized='present'):
95807118
PH
1358 """ Returns the version of the specified executable,
1359 or False if the executable is not present """
1360 try:
cae97f65 1361 out, _ = subprocess.Popen(
95807118
PH
1362 [exe] + args,
1363 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1364 except OSError:
1365 return False
cae97f65
PH
1366 if isinstance(out, bytes): # Python 2.x
1367 out = out.decode('ascii', 'ignore')
1368 return detect_exe_version(out, version_re, unrecognized)
1369
1370
1371def detect_exe_version(output, version_re=None, unrecognized='present'):
1372 assert isinstance(output, compat_str)
1373 if version_re is None:
1374 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1375 m = re.search(version_re, output)
95807118
PH
1376 if m:
1377 return m.group(1)
1378 else:
1379 return unrecognized
1380
1381
b7ab0590 1382class PagedList(object):
dd26ced1
PH
1383 def __len__(self):
1384 # This is only useful for tests
1385 return len(self.getslice())
1386
9c44d242
PH
1387
1388class OnDemandPagedList(PagedList):
1389 def __init__(self, pagefunc, pagesize):
1390 self._pagefunc = pagefunc
1391 self._pagesize = pagesize
1392
b7ab0590
PH
1393 def getslice(self, start=0, end=None):
1394 res = []
1395 for pagenum in itertools.count(start // self._pagesize):
1396 firstid = pagenum * self._pagesize
1397 nextfirstid = pagenum * self._pagesize + self._pagesize
1398 if start >= nextfirstid:
1399 continue
1400
1401 page_results = list(self._pagefunc(pagenum))
1402
1403 startv = (
1404 start % self._pagesize
1405 if firstid <= start < nextfirstid
1406 else 0)
1407
1408 endv = (
1409 ((end - 1) % self._pagesize) + 1
1410 if (end is not None and firstid <= end <= nextfirstid)
1411 else None)
1412
1413 if startv != 0 or endv is not None:
1414 page_results = page_results[startv:endv]
1415 res.extend(page_results)
1416
1417 # A little optimization - if current page is not "full", ie. does
1418 # not contain page_size videos then we can assume that this page
1419 # is the last one - there are no more ids on further pages -
1420 # i.e. no need to query again.
1421 if len(page_results) + startv < self._pagesize:
1422 break
1423
1424 # If we got the whole page, but the next page is not interesting,
1425 # break out early as well
1426 if end == nextfirstid:
1427 break
1428 return res
81c2f20b
PH
1429
1430
9c44d242
PH
1431class InAdvancePagedList(PagedList):
1432 def __init__(self, pagefunc, pagecount, pagesize):
1433 self._pagefunc = pagefunc
1434 self._pagecount = pagecount
1435 self._pagesize = pagesize
1436
1437 def getslice(self, start=0, end=None):
1438 res = []
1439 start_page = start // self._pagesize
1440 end_page = (
1441 self._pagecount if end is None else (end // self._pagesize + 1))
1442 skip_elems = start - start_page * self._pagesize
1443 only_more = None if end is None else end - start
1444 for pagenum in range(start_page, end_page):
1445 page = list(self._pagefunc(pagenum))
1446 if skip_elems:
1447 page = page[skip_elems:]
1448 skip_elems = None
1449 if only_more is not None:
1450 if len(page) < only_more:
1451 only_more -= len(page)
1452 else:
1453 page = page[:only_more]
1454 res.extend(page)
1455 break
1456 res.extend(page)
1457 return res
1458
1459
81c2f20b 1460def uppercase_escape(s):
676eb3f2 1461 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1462 return re.sub(
a612753d 1463 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1464 lambda m: unicode_escape(m.group(0))[0],
1465 s)
b53466e1 1466
d05cfe06
S
1467
1468def escape_rfc3986(s):
1469 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1470 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1471 s = s.encode('utf-8')
ecc0c5ee 1472 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1473
1474
1475def escape_url(url):
1476 """Escape URL as suggested by RFC 3986"""
1477 url_parsed = compat_urllib_parse_urlparse(url)
1478 return url_parsed._replace(
1479 path=escape_rfc3986(url_parsed.path),
1480 params=escape_rfc3986(url_parsed.params),
1481 query=escape_rfc3986(url_parsed.query),
1482 fragment=escape_rfc3986(url_parsed.fragment)
1483 ).geturl()
1484
b53466e1 1485try:
28e614de 1486 struct.pack('!I', 0)
b53466e1
PH
1487except TypeError:
1488 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1489 def struct_pack(spec, *args):
1490 if isinstance(spec, compat_str):
1491 spec = spec.encode('ascii')
1492 return struct.pack(spec, *args)
1493
1494 def struct_unpack(spec, *args):
1495 if isinstance(spec, compat_str):
1496 spec = spec.encode('ascii')
1497 return struct.unpack(spec, *args)
1498else:
1499 struct_pack = struct.pack
1500 struct_unpack = struct.unpack
62e609ab
PH
1501
1502
1503def read_batch_urls(batch_fd):
1504 def fixup(url):
1505 if not isinstance(url, compat_str):
1506 url = url.decode('utf-8', 'replace')
28e614de 1507 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1508 if url.startswith(BOM_UTF8):
1509 url = url[len(BOM_UTF8):]
1510 url = url.strip()
1511 if url.startswith(('#', ';', ']')):
1512 return False
1513 return url
1514
1515 with contextlib.closing(batch_fd) as fd:
1516 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1517
1518
1519def urlencode_postdata(*args, **kargs):
1520 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1521
1522
0990305d
PH
1523try:
1524 etree_iter = xml.etree.ElementTree.Element.iter
1525except AttributeError: # Python <=2.6
1526 etree_iter = lambda n: n.findall('.//*')
1527
1528
bcf89ce6
PH
1529def parse_xml(s):
1530 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1531 def doctype(self, name, pubid, system):
1532 pass # Ignore doctypes
1533
1534 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1535 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1536 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1537 # Fix up XML parser in Python 2.x
1538 if sys.version_info < (3, 0):
1539 for n in etree_iter(tree):
1540 if n.text is not None:
1541 if not isinstance(n.text, compat_str):
1542 n.text = n.text.decode('utf-8')
1543 return tree
e68301af
PH
1544
1545
a1a530b0
PH
1546US_RATINGS = {
1547 'G': 0,
1548 'PG': 10,
1549 'PG-13': 13,
1550 'R': 16,
1551 'NC': 18,
1552}
fac55558
PH
1553
1554
146c80e2
S
1555def parse_age_limit(s):
1556 if s is None:
d838b1bd 1557 return None
146c80e2 1558 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1559 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1560
1561
fac55558 1562def strip_jsonp(code):
609a61e3
PH
1563 return re.sub(
1564 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1565
1566
e05f6939
PH
1567def js_to_json(code):
1568 def fix_kv(m):
e7b6d122
PH
1569 v = m.group(0)
1570 if v in ('true', 'false', 'null'):
1571 return v
1572 if v.startswith('"'):
1573 return v
1574 if v.startswith("'"):
1575 v = v[1:-1]
1576 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1577 '\\\\': '\\\\',
1578 "\\'": "'",
1579 '"': '\\"',
1580 }[m.group(0)], v)
1581 return '"%s"' % v
e05f6939
PH
1582
1583 res = re.sub(r'''(?x)
d305dd73
PH
1584 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1585 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1586 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1587 ''', fix_kv, code)
ba9e68f4 1588 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1589 return res
1590
1591
478c2c61
PH
1592def qualities(quality_ids):
1593 """ Get a numeric quality value out of a list of possible values """
1594 def q(qid):
1595 try:
1596 return quality_ids.index(qid)
1597 except ValueError:
1598 return -1
1599 return q
1600
acd69589
PH
1601
1602DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1603
a020a0dc
PH
1604
1605def limit_length(s, length):
1606 """ Add ellipses to overly long strings """
1607 if s is None:
1608 return None
1609 ELLIPSES = '...'
1610 if len(s) > length:
1611 return s[:length - len(ELLIPSES)] + ELLIPSES
1612 return s
48844745
PH
1613
1614
1615def version_tuple(v):
5f9b8394 1616 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1617
1618
1619def is_outdated_version(version, limit, assume_new=True):
1620 if not version:
1621 return not assume_new
1622 try:
1623 return version_tuple(version) < version_tuple(limit)
1624 except ValueError:
1625 return not assume_new
732ea2f0
PH
1626
1627
1628def ytdl_is_updateable():
1629 """ Returns if youtube-dl can be updated with -U """
1630 from zipimport import zipimporter
1631
1632 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1633
1634
1635def args_to_str(args):
1636 # Get a short string representation for a subprocess command
1637 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1638
1639
c460bdd5
PH
1640def mimetype2ext(mt):
1641 _, _, res = mt.rpartition('/')
1642
1643 return {
1644 'x-ms-wmv': 'wmv',
1645 'x-mp4-fragmented': 'mp4',
1646 }.get(res, res)
1647
1648
2ccd1b10
PH
1649def urlhandle_detect_ext(url_handle):
1650 try:
1651 url_handle.headers
1652 getheader = lambda h: url_handle.headers[h]
1653 except AttributeError: # Python < 3
1654 getheader = url_handle.info().getheader
1655
b55ee18f
PH
1656 cd = getheader('Content-Disposition')
1657 if cd:
1658 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1659 if m:
1660 e = determine_ext(m.group('filename'), default_ext=None)
1661 if e:
1662 return e
1663
c460bdd5 1664 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1665
1666
1667def age_restricted(content_limit, age_limit):
1668 """ Returns True iff the content should be blocked """
1669
1670 if age_limit is None: # No limit set
1671 return False
1672 if content_limit is None:
1673 return False # Content available for everyone
1674 return age_limit < content_limit
61ca9a80
PH
1675
1676
1677def is_html(first_bytes):
1678 """ Detect whether a file contains HTML by examining its first bytes. """
1679
1680 BOMS = [
1681 (b'\xef\xbb\xbf', 'utf-8'),
1682 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1683 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1684 (b'\xff\xfe', 'utf-16-le'),
1685 (b'\xfe\xff', 'utf-16-be'),
1686 ]
1687 for bom, enc in BOMS:
1688 if first_bytes.startswith(bom):
1689 s = first_bytes[len(bom):].decode(enc, 'replace')
1690 break
1691 else:
1692 s = first_bytes.decode('utf-8', 'replace')
1693
1694 return re.match(r'^\s*<', s)
a055469f
PH
1695
1696
1697def determine_protocol(info_dict):
1698 protocol = info_dict.get('protocol')
1699 if protocol is not None:
1700 return protocol
1701
1702 url = info_dict['url']
1703 if url.startswith('rtmp'):
1704 return 'rtmp'
1705 elif url.startswith('mms'):
1706 return 'mms'
1707 elif url.startswith('rtsp'):
1708 return 'rtsp'
1709
1710 ext = determine_ext(url)
1711 if ext == 'm3u8':
1712 return 'm3u8'
1713 elif ext == 'f4m':
1714 return 'f4m'
1715
1716 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1717
1718
1719def render_table(header_row, data):
1720 """ Render a list of rows, each as a list of values """
1721 table = [header_row] + data
1722 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1723 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1724 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1725
1726
1727def _match_one(filter_part, dct):
1728 COMPARISON_OPERATORS = {
1729 '<': operator.lt,
1730 '<=': operator.le,
1731 '>': operator.gt,
1732 '>=': operator.ge,
1733 '=': operator.eq,
1734 '!=': operator.ne,
1735 }
1736 operator_rex = re.compile(r'''(?x)\s*
1737 (?P<key>[a-z_]+)
1738 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1739 (?:
1740 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1741 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1742 )
1743 \s*$
1744 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1745 m = operator_rex.search(filter_part)
1746 if m:
1747 op = COMPARISON_OPERATORS[m.group('op')]
1748 if m.group('strval') is not None:
1749 if m.group('op') not in ('=', '!='):
1750 raise ValueError(
1751 'Operator %s does not support string values!' % m.group('op'))
1752 comparison_value = m.group('strval')
1753 else:
1754 try:
1755 comparison_value = int(m.group('intval'))
1756 except ValueError:
1757 comparison_value = parse_filesize(m.group('intval'))
1758 if comparison_value is None:
1759 comparison_value = parse_filesize(m.group('intval') + 'B')
1760 if comparison_value is None:
1761 raise ValueError(
1762 'Invalid integer value %r in filter part %r' % (
1763 m.group('intval'), filter_part))
1764 actual_value = dct.get(m.group('key'))
1765 if actual_value is None:
1766 return m.group('none_inclusive')
1767 return op(actual_value, comparison_value)
1768
1769 UNARY_OPERATORS = {
1770 '': lambda v: v is not None,
1771 '!': lambda v: v is None,
1772 }
1773 operator_rex = re.compile(r'''(?x)\s*
1774 (?P<op>%s)\s*(?P<key>[a-z_]+)
1775 \s*$
1776 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1777 m = operator_rex.search(filter_part)
1778 if m:
1779 op = UNARY_OPERATORS[m.group('op')]
1780 actual_value = dct.get(m.group('key'))
1781 return op(actual_value)
1782
1783 raise ValueError('Invalid filter part %r' % filter_part)
1784
1785
1786def match_str(filter_str, dct):
1787 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1788
1789 return all(
1790 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1791
1792
1793def match_filter_func(filter_str):
1794 def _match_func(info_dict):
1795 if match_str(filter_str, info_dict):
1796 return None
1797 else:
1798 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1799 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1800 return _match_func
91410c9b
PH
1801
1802
1803class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
1804 def __init__(self, proxies=None):
1805 # Set default handlers
1806 for type in ('http', 'https'):
1807 setattr(self, '%s_open' % type,
1808 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1809 meth(r, proxy, type))
1810 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1811
91410c9b 1812 def proxy_open(self, req, proxy, type):
2461f79d 1813 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
1814 if req_proxy is not None:
1815 proxy = req_proxy
2461f79d
PH
1816 del req.headers['Ytdl-request-proxy']
1817
1818 if proxy == '__noproxy__':
1819 return None # No Proxy
91410c9b
PH
1820 return compat_urllib_request.ProxyHandler.proxy_open(
1821 self, req, proxy, type)