]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[teamcoco] Fix extraction
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
912b38b4 6import calendar
676eb3f2 7import codecs
62e609ab 8import contextlib
e3946f98 9import ctypes
c496ca96
PH
10import datetime
11import email.utils
f45c185f 12import errno
be4a824d 13import functools
d77c3dfd 14import gzip
b7ab0590 15import itertools
03f9daab 16import io
f4bfd65f 17import json
d77c3dfd 18import locale
02dbf93f 19import math
347de493 20import operator
d77c3dfd 21import os
4eb7f1d1 22import pipes
c496ca96 23import platform
d77c3dfd 24import re
13ebea79 25import ssl
c496ca96 26import socket
b53466e1 27import struct
1c088fa8 28import subprocess
d77c3dfd 29import sys
181c8655 30import tempfile
01951dda 31import traceback
bcf89ce6 32import xml.etree.ElementTree
d77c3dfd 33import zlib
d77c3dfd 34
8c25f81b 35from .compat import (
8f9312c3 36 compat_basestring,
8c25f81b 37 compat_chr,
8c25f81b 38 compat_html_entities,
be4a824d 39 compat_http_client,
8c25f81b 40 compat_parse_qs,
be4a824d 41 compat_socket_create_connection,
8c25f81b
PH
42 compat_str,
43 compat_urllib_error,
44 compat_urllib_parse,
45 compat_urllib_parse_urlparse,
46 compat_urllib_request,
47 compat_urlparse,
7d4111ed 48 shlex_quote,
8c25f81b 49)
4644ac55
S
50
51
468e2e92
FV
52# This is not clearly defined otherwise
53compiled_regex_type = type(re.compile(''))
54
3e669f36 55std_headers = {
18313934 56 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
57 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
58 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59 'Accept-Encoding': 'gzip, deflate',
60 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 61}
f427df17 62
5f6a1245 63
7105440c
YCH
64ENGLISH_MONTH_NAMES = [
65 'January', 'February', 'March', 'April', 'May', 'June',
66 'July', 'August', 'September', 'October', 'November', 'December']
67
68
d77c3dfd 69def preferredencoding():
59ae15a5 70 """Get preferred encoding.
d77c3dfd 71
59ae15a5
PH
72 Returns the best encoding scheme for the system, based on
73 locale.getpreferredencoding() and some further tweaks.
74 """
75 try:
76 pref = locale.getpreferredencoding()
28e614de 77 'TEST'.encode(pref)
59ae15a5
PH
78 except:
79 pref = 'UTF-8'
bae611f2 80
59ae15a5 81 return pref
d77c3dfd 82
f4bfd65f 83
181c8655 84def write_json_file(obj, fn):
1394646a 85 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 86
92120217 87 fn = encodeFilename(fn)
61ee5aeb 88 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
89 encoding = get_filesystem_encoding()
90 # os.path.basename returns a bytes object, but NamedTemporaryFile
91 # will fail if the filename contains non ascii characters unless we
92 # use a unicode object
93 path_basename = lambda f: os.path.basename(fn).decode(encoding)
94 # the same for os.path.dirname
95 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
96 else:
97 path_basename = os.path.basename
98 path_dirname = os.path.dirname
99
73159f99
S
100 args = {
101 'suffix': '.tmp',
ec5f6016
JMF
102 'prefix': path_basename(fn) + '.',
103 'dir': path_dirname(fn),
73159f99
S
104 'delete': False,
105 }
106
181c8655
PH
107 # In Python 2.x, json.dump expects a bytestream.
108 # In Python 3.x, it writes to a character stream
109 if sys.version_info < (3, 0):
73159f99 110 args['mode'] = 'wb'
181c8655 111 else:
73159f99
S
112 args.update({
113 'mode': 'w',
114 'encoding': 'utf-8',
115 })
116
117 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
118
119 try:
120 with tf:
121 json.dump(obj, tf)
1394646a
IK
122 if sys.platform == 'win32':
123 # Need to remove existing file on Windows, else os.rename raises
124 # WindowsError or FileExistsError.
125 try:
126 os.unlink(fn)
127 except OSError:
128 pass
181c8655
PH
129 os.rename(tf.name, fn)
130 except:
131 try:
132 os.remove(tf.name)
133 except OSError:
134 pass
135 raise
136
137
138if sys.version_info >= (2, 7):
59ae56fa
PH
139 def find_xpath_attr(node, xpath, key, val):
140 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
141 assert re.match(r'^[a-zA-Z-]+$', key)
142 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
ab4ee31e 143 expr = xpath + "[@%s='%s']" % (key, val)
59ae56fa
PH
144 return node.find(expr)
145else:
146 def find_xpath_attr(node, xpath, key, val):
4eefbfdb
PH
147 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
148 # .//node does not match if a node is a direct child of . !
8f9312c3 149 if isinstance(xpath, compat_str):
4eefbfdb
PH
150 xpath = xpath.encode('ascii')
151
59ae56fa
PH
152 for f in node.findall(xpath):
153 if f.attrib.get(key) == val:
154 return f
155 return None
156
d7e66d39
JMF
157# On python2.6 the xml.etree.ElementTree.Element methods don't support
158# the namespace parameter
5f6a1245
JW
159
160
d7e66d39
JMF
161def xpath_with_ns(path, ns_map):
162 components = [c.split(':') for c in path.split('/')]
163 replaced = []
164 for c in components:
165 if len(c) == 1:
166 replaced.append(c[0])
167 else:
168 ns, tag = c
169 replaced.append('{%s}%s' % (ns_map[ns], tag))
170 return '/'.join(replaced)
171
d77c3dfd 172
bf0ff932 173def xpath_text(node, xpath, name=None, fatal=False):
d74bebd5
PH
174 if sys.version_info < (2, 7): # Crazy 2.6
175 xpath = xpath.encode('ascii')
176
bf0ff932 177 n = node.find(xpath)
42bdd9d0 178 if n is None or n.text is None:
bf0ff932
PH
179 if fatal:
180 name = xpath if name is None else name
181 raise ExtractorError('Could not find XML element %s' % name)
182 else:
183 return None
184 return n.text
185
186
9e6dd238 187def get_element_by_id(id, html):
43e8fafd
ND
188 """Return the content of the tag with the specified ID in the passed HTML document"""
189 return get_element_by_attribute("id", id, html)
190
12ea2f30 191
43e8fafd
ND
192def get_element_by_attribute(attribute, value, html):
193 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 194
38285056
PH
195 m = re.search(r'''(?xs)
196 <([a-zA-Z0-9:._-]+)
197 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
198 \s+%s=['"]?%s['"]?
199 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 \s*>
201 (?P<content>.*?)
202 </\1>
203 ''' % (re.escape(attribute), re.escape(value)), html)
204
205 if not m:
206 return None
207 res = m.group('content')
208
209 if res.startswith('"') or res.startswith("'"):
210 res = res[1:-1]
a921f407 211
38285056 212 return unescapeHTML(res)
a921f407 213
9e6dd238
FV
214
215def clean_html(html):
59ae15a5 216 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
217
218 if html is None: # Convenience for sanitizing descriptions etc.
219 return html
220
59ae15a5
PH
221 # Newline vs <br />
222 html = html.replace('\n', ' ')
6b3aef80
FV
223 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
224 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
225 # Strip html tags
226 html = re.sub('<.*?>', '', html)
227 # Replace html entities
228 html = unescapeHTML(html)
7decf895 229 return html.strip()
9e6dd238
FV
230
231
d77c3dfd 232def sanitize_open(filename, open_mode):
59ae15a5
PH
233 """Try to open the given filename, and slightly tweak it if this fails.
234
235 Attempts to open the given filename. If this fails, it tries to change
236 the filename slightly, step by step, until it's either able to open it
237 or it fails and raises a final exception, like the standard open()
238 function.
239
240 It returns the tuple (stream, definitive_file_name).
241 """
242 try:
28e614de 243 if filename == '-':
59ae15a5
PH
244 if sys.platform == 'win32':
245 import msvcrt
246 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 247 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
248 stream = open(encodeFilename(filename), open_mode)
249 return (stream, filename)
250 except (IOError, OSError) as err:
f45c185f
PH
251 if err.errno in (errno.EACCES,):
252 raise
59ae15a5 253
f45c185f 254 # In case of error, try to remove win32 forbidden chars
d55de57b 255 alt_filename = sanitize_path(filename)
f45c185f
PH
256 if alt_filename == filename:
257 raise
258 else:
259 # An exception here should be caught in the caller
d55de57b 260 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 261 return (stream, alt_filename)
d77c3dfd
FV
262
263
264def timeconvert(timestr):
59ae15a5
PH
265 """Convert RFC 2822 defined time string into system timestamp"""
266 timestamp = None
267 timetuple = email.utils.parsedate_tz(timestr)
268 if timetuple is not None:
269 timestamp = email.utils.mktime_tz(timetuple)
270 return timestamp
1c469a94 271
5f6a1245 272
796173d0 273def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
274 """Sanitizes a string so it could be used as part of a filename.
275 If restricted is set, use a stricter subset of allowed characters.
796173d0 276 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
277 """
278 def replace_insane(char):
279 if char == '?' or ord(char) < 32 or ord(char) == 127:
280 return ''
281 elif char == '"':
282 return '' if restricted else '\''
283 elif char == ':':
284 return '_-' if restricted else ' -'
285 elif char in '\\/|*<>':
286 return '_'
627dcfff 287 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
288 return '_'
289 if restricted and ord(char) > 127:
290 return '_'
291 return char
292
2aeb06d6
PH
293 # Handle timestamps
294 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 295 result = ''.join(map(replace_insane, s))
796173d0
PH
296 if not is_id:
297 while '__' in result:
298 result = result.replace('__', '_')
299 result = result.strip('_')
300 # Common case of "Foreign band name - English song title"
301 if restricted and result.startswith('-_'):
302 result = result[2:]
5a42414b
PH
303 if result.startswith('-'):
304 result = '_' + result[len('-'):]
a7440261 305 result = result.lstrip('.')
796173d0
PH
306 if not result:
307 result = '_'
59ae15a5 308 return result
d77c3dfd 309
5f6a1245 310
a2aaf4db
S
311def sanitize_path(s):
312 """Sanitizes and normalizes path on Windows"""
313 if sys.platform != 'win32':
314 return s
315 drive, _ = os.path.splitdrive(s)
316 unc, _ = os.path.splitunc(s)
317 unc_or_drive = unc or drive
318 norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
319 if unc_or_drive:
320 norm_path.pop(0)
321 sanitized_path = [
2ebfeaca 322 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db
S
323 for path_part in norm_path]
324 if unc_or_drive:
325 sanitized_path.insert(0, unc_or_drive + os.path.sep)
326 return os.path.join(*sanitized_path)
327
328
92a4793b
S
329def sanitize_url_path_consecutive_slashes(url):
330 """Collapses consecutive slashes in URLs' path"""
331 parsed_url = list(compat_urlparse.urlparse(url))
332 parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
333 return compat_urlparse.urlunparse(parsed_url)
334
335
d77c3dfd 336def orderedSet(iterable):
59ae15a5
PH
337 """ Remove all duplicates from the input iterable """
338 res = []
339 for el in iterable:
340 if el not in res:
341 res.append(el)
342 return res
d77c3dfd 343
912b38b4 344
4e408e47
PH
345def _htmlentity_transform(entity):
346 """Transforms an HTML entity to a character."""
347 # Known non-numeric HTML entity
348 if entity in compat_html_entities.name2codepoint:
349 return compat_chr(compat_html_entities.name2codepoint[entity])
350
351 mobj = re.match(r'#(x?[0-9]+)', entity)
352 if mobj is not None:
353 numstr = mobj.group(1)
28e614de 354 if numstr.startswith('x'):
4e408e47 355 base = 16
28e614de 356 numstr = '0%s' % numstr
4e408e47
PH
357 else:
358 base = 10
359 return compat_chr(int(numstr, base))
360
361 # Unknown entity in name, return its literal representation
28e614de 362 return ('&%s;' % entity)
4e408e47
PH
363
364
d77c3dfd 365def unescapeHTML(s):
912b38b4
PH
366 if s is None:
367 return None
368 assert type(s) == compat_str
d77c3dfd 369
4e408e47
PH
370 return re.sub(
371 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 372
8bf48f23
PH
373
374def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
375 """
376 @param s The name of the file
377 """
d77c3dfd 378
8bf48f23 379 assert type(s) == compat_str
d77c3dfd 380
59ae15a5
PH
381 # Python 3 has a Unicode API
382 if sys.version_info >= (3, 0):
383 return s
0f00efed 384
59ae15a5 385 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
28e614de 386 # Pass '' directly to use Unicode APIs on Windows 2000 and up
59ae15a5
PH
387 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
388 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
389 if not for_subprocess:
390 return s
391 else:
392 # For subprocess calls, encode with locale encoding
393 # Refer to http://stackoverflow.com/a/9951851/35070
394 encoding = preferredencoding()
59ae15a5 395 else:
6df40dcb 396 encoding = sys.getfilesystemencoding()
8bf48f23
PH
397 if encoding is None:
398 encoding = 'utf-8'
399 return s.encode(encoding, 'ignore')
400
f07b74fc
PH
401
402def encodeArgument(s):
403 if not isinstance(s, compat_str):
404 # Legacy code that uses byte strings
405 # Uncomment the following line after fixing all post processors
7af808a5 406 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
407 s = s.decode('ascii')
408 return encodeFilename(s, True)
409
410
8271226a
PH
411def decodeOption(optval):
412 if optval is None:
413 return optval
414 if isinstance(optval, bytes):
415 optval = optval.decode(preferredencoding())
416
417 assert isinstance(optval, compat_str)
418 return optval
1c256f70 419
5f6a1245 420
4539dd30
PH
421def formatSeconds(secs):
422 if secs > 3600:
423 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
424 elif secs > 60:
425 return '%d:%02d' % (secs // 60, secs % 60)
426 else:
427 return '%d' % secs
428
a0ddb8a2 429
be4a824d
PH
430def make_HTTPS_handler(params, **kwargs):
431 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 432 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 433 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 434 if opts_no_check_certificate:
be5f2c19 435 context.check_hostname = False
0db261ba 436 context.verify_mode = ssl.CERT_NONE
a2366922 437 try:
be4a824d 438 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
439 except TypeError:
440 # Python 2.7.8
441 # (create_default_context present but HTTPSHandler has no context=)
442 pass
443
444 if sys.version_info < (3, 2):
d7932313 445 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 446 else: # Python < 3.4
d7932313 447 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 448 context.verify_mode = (ssl.CERT_NONE
dca08720 449 if opts_no_check_certificate
ea6d901e 450 else ssl.CERT_REQUIRED)
303b479e 451 context.set_default_verify_paths()
be4a824d 452 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 453
732ea2f0 454
1c256f70
PH
455class ExtractorError(Exception):
456 """Error during info extraction."""
5f6a1245 457
d11271dd 458 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
459 """ tb, if given, is the original traceback (so that it can be printed out).
460 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
461 """
462
463 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
464 expected = True
d11271dd
PH
465 if video_id is not None:
466 msg = video_id + ': ' + msg
410f3e73 467 if cause:
28e614de 468 msg += ' (caused by %r)' % cause
9a82b238 469 if not expected:
732ea2f0
PH
470 if ytdl_is_updateable():
471 update_cmd = 'type youtube-dl -U to update'
472 else:
473 update_cmd = 'see https://yt-dl.org/update on how to update'
474 msg += '; please report this issue on https://yt-dl.org/bug .'
475 msg += ' Make sure you are using the latest version; %s.' % update_cmd
476 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
1c256f70 477 super(ExtractorError, self).__init__(msg)
d5979c5d 478
1c256f70 479 self.traceback = tb
8cc83b8d 480 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 481 self.cause = cause
d11271dd 482 self.video_id = video_id
1c256f70 483
01951dda
PH
484 def format_traceback(self):
485 if self.traceback is None:
486 return None
28e614de 487 return ''.join(traceback.format_tb(self.traceback))
01951dda 488
1c256f70 489
416c7fcb
PH
490class UnsupportedError(ExtractorError):
491 def __init__(self, url):
492 super(UnsupportedError, self).__init__(
493 'Unsupported URL: %s' % url, expected=True)
494 self.url = url
495
496
55b3e45b
JMF
497class RegexNotFoundError(ExtractorError):
498 """Error when a regex didn't match"""
499 pass
500
501
d77c3dfd 502class DownloadError(Exception):
59ae15a5 503 """Download Error exception.
d77c3dfd 504
59ae15a5
PH
505 This exception may be thrown by FileDownloader objects if they are not
506 configured to continue on errors. They will contain the appropriate
507 error message.
508 """
5f6a1245 509
8cc83b8d
FV
510 def __init__(self, msg, exc_info=None):
511 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
512 super(DownloadError, self).__init__(msg)
513 self.exc_info = exc_info
d77c3dfd
FV
514
515
516class SameFileError(Exception):
59ae15a5 517 """Same File exception.
d77c3dfd 518
59ae15a5
PH
519 This exception will be thrown by FileDownloader objects if they detect
520 multiple files would have to be downloaded to the same file on disk.
521 """
522 pass
d77c3dfd
FV
523
524
525class PostProcessingError(Exception):
59ae15a5 526 """Post Processing exception.
d77c3dfd 527
59ae15a5
PH
528 This exception may be raised by PostProcessor's .run() method to
529 indicate an error in the postprocessing task.
530 """
5f6a1245 531
7851b379
PH
532 def __init__(self, msg):
533 self.msg = msg
d77c3dfd 534
5f6a1245 535
d77c3dfd 536class MaxDownloadsReached(Exception):
59ae15a5
PH
537 """ --max-downloads limit has been reached. """
538 pass
d77c3dfd
FV
539
540
541class UnavailableVideoError(Exception):
59ae15a5 542 """Unavailable Format exception.
d77c3dfd 543
59ae15a5
PH
544 This exception will be thrown when a video is requested
545 in a format that is not available for that video.
546 """
547 pass
d77c3dfd
FV
548
549
550class ContentTooShortError(Exception):
59ae15a5 551 """Content Too Short exception.
d77c3dfd 552
59ae15a5
PH
553 This exception may be raised by FileDownloader objects when a file they
554 download is too small for what the server announced first, indicating
555 the connection was probably interrupted.
556 """
557 # Both in bytes
558 downloaded = None
559 expected = None
d77c3dfd 560
59ae15a5
PH
561 def __init__(self, downloaded, expected):
562 self.downloaded = downloaded
563 self.expected = expected
d77c3dfd 564
5f6a1245 565
c5a59d93 566def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
be4a824d
PH
567 hc = http_class(*args, **kwargs)
568 source_address = ydl_handler._params.get('source_address')
569 if source_address is not None:
570 sa = (source_address, 0)
571 if hasattr(hc, 'source_address'): # Python 2.7+
572 hc.source_address = sa
573 else: # Python 2.6
574 def _hc_connect(self, *args, **kwargs):
575 sock = compat_socket_create_connection(
576 (self.host, self.port), self.timeout, sa)
577 if is_https:
d7932313
PH
578 self.sock = ssl.wrap_socket(
579 sock, self.key_file, self.cert_file,
580 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
581 else:
582 self.sock = sock
583 hc.connect = functools.partial(_hc_connect, hc)
584
585 return hc
586
587
acebc9cd 588class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
589 """Handler for HTTP requests and responses.
590
591 This class, when installed with an OpenerDirector, automatically adds
592 the standard headers to every HTTP request and handles gzipped and
593 deflated responses from web servers. If compression is to be avoided in
594 a particular request, the original request in the program code only has
595 to include the HTTP header "Youtubedl-No-Compression", which will be
596 removed before making the real request.
597
598 Part of this code was copied from:
599
600 http://techknack.net/python-urllib2-handlers/
601
602 Andrew Rowls, the author of that code, agreed to release it to the
603 public domain.
604 """
605
be4a824d
PH
606 def __init__(self, params, *args, **kwargs):
607 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
608 self._params = params
609
610 def http_open(self, req):
611 return self.do_open(functools.partial(
c5a59d93 612 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
613 req)
614
59ae15a5
PH
615 @staticmethod
616 def deflate(data):
617 try:
618 return zlib.decompress(data, -zlib.MAX_WBITS)
619 except zlib.error:
620 return zlib.decompress(data)
621
622 @staticmethod
623 def addinfourl_wrapper(stream, headers, url, code):
624 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
625 return compat_urllib_request.addinfourl(stream, headers, url, code)
626 ret = compat_urllib_request.addinfourl(stream, headers, url)
627 ret.code = code
628 return ret
629
acebc9cd 630 def http_request(self, req):
33ac271b 631 for h, v in std_headers.items():
3d5f7a39
JK
632 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
633 # The dict keys are capitalized because of this bug by urllib
634 if h.capitalize() not in req.headers:
33ac271b 635 req.add_header(h, v)
59ae15a5
PH
636 if 'Youtubedl-no-compression' in req.headers:
637 if 'Accept-encoding' in req.headers:
638 del req.headers['Accept-encoding']
639 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
640
641 if sys.version_info < (2, 7) and '#' in req.get_full_url():
642 # Python 2.6 is brain-dead when it comes to fragments
643 req._Request__original = req._Request__original.partition('#')[0]
644 req._Request__r_type = req._Request__r_type.partition('#')[0]
645
59ae15a5
PH
646 return req
647
acebc9cd 648 def http_response(self, req, resp):
59ae15a5
PH
649 old_resp = resp
650 # gzip
651 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
652 content = resp.read()
653 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
654 try:
655 uncompressed = io.BytesIO(gz.read())
656 except IOError as original_ioerror:
657 # There may be junk add the end of the file
658 # See http://stackoverflow.com/q/4928560/35070 for details
659 for i in range(1, 1024):
660 try:
661 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
662 uncompressed = io.BytesIO(gz.read())
663 except IOError:
664 continue
665 break
666 else:
667 raise original_ioerror
668 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
669 resp.msg = old_resp.msg
670 # deflate
671 if resp.headers.get('Content-encoding', '') == 'deflate':
672 gz = io.BytesIO(self.deflate(resp.read()))
673 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
674 resp.msg = old_resp.msg
675 return resp
0f8d03f8 676
acebc9cd
PH
677 https_request = http_request
678 https_response = http_response
bf50b038 679
5de90176 680
be4a824d
PH
681class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
682 def __init__(self, params, https_conn_class=None, *args, **kwargs):
683 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
684 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
685 self._params = params
686
687 def https_open(self, req):
4f264c02
JMF
688 kwargs = {}
689 if hasattr(self, '_context'): # python > 2.6
690 kwargs['context'] = self._context
691 if hasattr(self, '_check_hostname'): # python 3.x
692 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
693 return self.do_open(functools.partial(
694 _create_http_connection, self, self._https_conn_class, True),
4f264c02 695 req, **kwargs)
be4a824d
PH
696
697
08b38d54 698def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
699 """ Return a UNIX timestamp from the given date """
700
701 if date_str is None:
702 return None
703
08b38d54
PH
704 if timezone is None:
705 m = re.search(
706 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
707 date_str)
708 if not m:
912b38b4
PH
709 timezone = datetime.timedelta()
710 else:
08b38d54
PH
711 date_str = date_str[:-len(m.group(0))]
712 if not m.group('sign'):
713 timezone = datetime.timedelta()
714 else:
715 sign = 1 if m.group('sign') == '+' else -1
716 timezone = datetime.timedelta(
717 hours=sign * int(m.group('hours')),
718 minutes=sign * int(m.group('minutes')))
6ad4013d 719 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 720 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
721 return calendar.timegm(dt.timetuple())
722
723
42bdd9d0 724def unified_strdate(date_str, day_first=True):
bf50b038 725 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
726
727 if date_str is None:
728 return None
bf50b038 729 upload_date = None
5f6a1245 730 # Replace commas
026fcc04 731 date_str = date_str.replace(',', ' ')
bf50b038 732 # %z (UTC offset) is only supported in python>=3.2
026fcc04 733 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 734 # Remove AM/PM + timezone
9bb8e0a3 735 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 736
19e1d359
JMF
737 format_expressions = [
738 '%d %B %Y',
0f99566c 739 '%d %b %Y',
19e1d359
JMF
740 '%B %d %Y',
741 '%b %d %Y',
78ff59d0
PP
742 '%b %dst %Y %I:%M%p',
743 '%b %dnd %Y %I:%M%p',
744 '%b %dth %Y %I:%M%p',
a69801e2 745 '%Y %m %d',
19e1d359 746 '%Y-%m-%d',
fe556f1b 747 '%Y/%m/%d',
19e1d359 748 '%Y/%m/%d %H:%M:%S',
5d73273f 749 '%Y-%m-%d %H:%M:%S',
e9be9a6a 750 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 751 '%d.%m.%Y %H:%M',
b047de6f 752 '%d.%m.%Y %H.%M',
19e1d359 753 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
754 '%Y-%m-%dT%H:%M:%S.%fZ',
755 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 756 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 757 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 758 '%Y-%m-%dT%H:%M',
19e1d359 759 ]
42bdd9d0
PH
760 if day_first:
761 format_expressions.extend([
776dc399
S
762 '%d.%m.%Y',
763 '%d/%m/%Y',
764 '%d/%m/%y',
42bdd9d0
PH
765 '%d/%m/%Y %H:%M:%S',
766 ])
767 else:
768 format_expressions.extend([
776dc399
S
769 '%m.%d.%Y',
770 '%m/%d/%Y',
771 '%m/%d/%y',
42bdd9d0
PH
772 '%m/%d/%Y %H:%M:%S',
773 ])
bf50b038
JMF
774 for expression in format_expressions:
775 try:
776 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 777 except ValueError:
bf50b038 778 pass
42393ce2
PH
779 if upload_date is None:
780 timetuple = email.utils.parsedate_tz(date_str)
781 if timetuple:
782 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
783 return upload_date
784
5f6a1245 785
28e614de 786def determine_ext(url, default_ext='unknown_video'):
f4776371
S
787 if url is None:
788 return default_ext
28e614de 789 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
790 if re.match(r'^[A-Za-z0-9]+$', guess):
791 return guess
792 else:
cbdbb766 793 return default_ext
73e79f2a 794
5f6a1245 795
d4051a8e 796def subtitles_filename(filename, sub_lang, sub_format):
28e614de 797 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 798
5f6a1245 799
bd558525 800def date_from_str(date_str):
37254abc
JMF
801 """
802 Return a datetime object from a string in the format YYYYMMDD or
803 (now|today)[+-][0-9](day|week|month|year)(s)?"""
804 today = datetime.date.today()
f8795e10 805 if date_str in ('now', 'today'):
37254abc 806 return today
f8795e10
PH
807 if date_str == 'yesterday':
808 return today - datetime.timedelta(days=1)
37254abc
JMF
809 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
810 if match is not None:
811 sign = match.group('sign')
812 time = int(match.group('time'))
813 if sign == '-':
814 time = -time
815 unit = match.group('unit')
5f6a1245 816 # A bad aproximation?
37254abc
JMF
817 if unit == 'month':
818 unit = 'day'
819 time *= 30
820 elif unit == 'year':
821 unit = 'day'
822 time *= 365
823 unit += 's'
824 delta = datetime.timedelta(**{unit: time})
825 return today + delta
bd558525 826 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
827
828
e63fc1be 829def hyphenate_date(date_str):
830 """
831 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
832 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
833 if match is not None:
834 return '-'.join(match.groups())
835 else:
836 return date_str
837
5f6a1245 838
bd558525
JMF
839class DateRange(object):
840 """Represents a time interval between two dates"""
5f6a1245 841
bd558525
JMF
842 def __init__(self, start=None, end=None):
843 """start and end must be strings in the format accepted by date"""
844 if start is not None:
845 self.start = date_from_str(start)
846 else:
847 self.start = datetime.datetime.min.date()
848 if end is not None:
849 self.end = date_from_str(end)
850 else:
851 self.end = datetime.datetime.max.date()
37254abc 852 if self.start > self.end:
bd558525 853 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 854
bd558525
JMF
855 @classmethod
856 def day(cls, day):
857 """Returns a range that only contains the given day"""
5f6a1245
JW
858 return cls(day, day)
859
bd558525
JMF
860 def __contains__(self, date):
861 """Check if the date is in the range"""
37254abc
JMF
862 if not isinstance(date, datetime.date):
863 date = date_from_str(date)
864 return self.start <= date <= self.end
5f6a1245 865
bd558525 866 def __str__(self):
5f6a1245 867 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
868
869
870def platform_name():
871 """ Returns the platform name as a compat_str """
872 res = platform.platform()
873 if isinstance(res, bytes):
874 res = res.decode(preferredencoding())
875
876 assert isinstance(res, compat_str)
877 return res
c257baff
PH
878
879
b58ddb32
PH
880def _windows_write_string(s, out):
881 """ Returns True if the string was written using special methods,
882 False if it has yet to be written out."""
883 # Adapted from http://stackoverflow.com/a/3259271/35070
884
885 import ctypes
886 import ctypes.wintypes
887
888 WIN_OUTPUT_IDS = {
889 1: -11,
890 2: -12,
891 }
892
a383a98a
PH
893 try:
894 fileno = out.fileno()
895 except AttributeError:
896 # If the output stream doesn't have a fileno, it's virtual
897 return False
aa42e873
PH
898 except io.UnsupportedOperation:
899 # Some strange Windows pseudo files?
900 return False
b58ddb32
PH
901 if fileno not in WIN_OUTPUT_IDS:
902 return False
903
e2f89ec7 904 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 905 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 906 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
907 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
908
e2f89ec7 909 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
910 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
911 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 912 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
913 written = ctypes.wintypes.DWORD(0)
914
6ac4e806 915 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
916 FILE_TYPE_CHAR = 0x0002
917 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 918 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
919 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
920 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 921 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
922 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
923
924 def not_a_console(handle):
925 if handle == INVALID_HANDLE_VALUE or handle is None:
926 return True
8fb3ac36
PH
927 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
928 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
929
930 if not_a_console(h):
931 return False
932
d1b9c912
PH
933 def next_nonbmp_pos(s):
934 try:
935 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
936 except StopIteration:
937 return len(s)
938
939 while s:
940 count = min(next_nonbmp_pos(s), 1024)
941
b58ddb32 942 ret = WriteConsoleW(
d1b9c912 943 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
944 if ret == 0:
945 raise OSError('Failed to write string')
d1b9c912
PH
946 if not count: # We just wrote a non-BMP character
947 assert written.value == 2
948 s = s[1:]
949 else:
950 assert written.value > 0
951 s = s[written.value:]
b58ddb32
PH
952 return True
953
954
734f90bb 955def write_string(s, out=None, encoding=None):
7459e3a2
PH
956 if out is None:
957 out = sys.stderr
8bf48f23 958 assert type(s) == compat_str
7459e3a2 959
b58ddb32
PH
960 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
961 if _windows_write_string(s, out):
962 return
963
7459e3a2
PH
964 if ('b' in getattr(out, 'mode', '') or
965 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
966 byt = s.encode(encoding or preferredencoding(), 'ignore')
967 out.write(byt)
968 elif hasattr(out, 'buffer'):
969 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
970 byt = s.encode(enc, 'ignore')
971 out.buffer.write(byt)
972 else:
8bf48f23 973 out.write(s)
7459e3a2
PH
974 out.flush()
975
976
48ea9cea
PH
977def bytes_to_intlist(bs):
978 if not bs:
979 return []
980 if isinstance(bs[0], int): # Python 3
981 return list(bs)
982 else:
983 return [ord(c) for c in bs]
984
c257baff 985
cba892fa 986def intlist_to_bytes(xs):
987 if not xs:
988 return b''
eb4157fd 989 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
990
991
c1c9a79c
PH
992# Cross-platform file locking
993if sys.platform == 'win32':
994 import ctypes.wintypes
995 import msvcrt
996
997 class OVERLAPPED(ctypes.Structure):
998 _fields_ = [
999 ('Internal', ctypes.wintypes.LPVOID),
1000 ('InternalHigh', ctypes.wintypes.LPVOID),
1001 ('Offset', ctypes.wintypes.DWORD),
1002 ('OffsetHigh', ctypes.wintypes.DWORD),
1003 ('hEvent', ctypes.wintypes.HANDLE),
1004 ]
1005
1006 kernel32 = ctypes.windll.kernel32
1007 LockFileEx = kernel32.LockFileEx
1008 LockFileEx.argtypes = [
1009 ctypes.wintypes.HANDLE, # hFile
1010 ctypes.wintypes.DWORD, # dwFlags
1011 ctypes.wintypes.DWORD, # dwReserved
1012 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1013 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1014 ctypes.POINTER(OVERLAPPED) # Overlapped
1015 ]
1016 LockFileEx.restype = ctypes.wintypes.BOOL
1017 UnlockFileEx = kernel32.UnlockFileEx
1018 UnlockFileEx.argtypes = [
1019 ctypes.wintypes.HANDLE, # hFile
1020 ctypes.wintypes.DWORD, # dwReserved
1021 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1022 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1023 ctypes.POINTER(OVERLAPPED) # Overlapped
1024 ]
1025 UnlockFileEx.restype = ctypes.wintypes.BOOL
1026 whole_low = 0xffffffff
1027 whole_high = 0x7fffffff
1028
1029 def _lock_file(f, exclusive):
1030 overlapped = OVERLAPPED()
1031 overlapped.Offset = 0
1032 overlapped.OffsetHigh = 0
1033 overlapped.hEvent = 0
1034 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1035 handle = msvcrt.get_osfhandle(f.fileno())
1036 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1037 whole_low, whole_high, f._lock_file_overlapped_p):
1038 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1039
1040 def _unlock_file(f):
1041 assert f._lock_file_overlapped_p
1042 handle = msvcrt.get_osfhandle(f.fileno())
1043 if not UnlockFileEx(handle, 0,
1044 whole_low, whole_high, f._lock_file_overlapped_p):
1045 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1046
1047else:
1048 import fcntl
1049
1050 def _lock_file(f, exclusive):
2582bebe 1051 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1052
1053 def _unlock_file(f):
2582bebe 1054 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1055
1056
1057class locked_file(object):
1058 def __init__(self, filename, mode, encoding=None):
1059 assert mode in ['r', 'a', 'w']
1060 self.f = io.open(filename, mode, encoding=encoding)
1061 self.mode = mode
1062
1063 def __enter__(self):
1064 exclusive = self.mode != 'r'
1065 try:
1066 _lock_file(self.f, exclusive)
1067 except IOError:
1068 self.f.close()
1069 raise
1070 return self
1071
1072 def __exit__(self, etype, value, traceback):
1073 try:
1074 _unlock_file(self.f)
1075 finally:
1076 self.f.close()
1077
1078 def __iter__(self):
1079 return iter(self.f)
1080
1081 def write(self, *args):
1082 return self.f.write(*args)
1083
1084 def read(self, *args):
1085 return self.f.read(*args)
4eb7f1d1
JMF
1086
1087
4644ac55
S
1088def get_filesystem_encoding():
1089 encoding = sys.getfilesystemencoding()
1090 return encoding if encoding is not None else 'utf-8'
1091
1092
4eb7f1d1 1093def shell_quote(args):
a6a173c2 1094 quoted_args = []
4644ac55 1095 encoding = get_filesystem_encoding()
a6a173c2
JMF
1096 for a in args:
1097 if isinstance(a, bytes):
1098 # We may get a filename encoded with 'encodeFilename'
1099 a = a.decode(encoding)
1100 quoted_args.append(pipes.quote(a))
28e614de 1101 return ' '.join(quoted_args)
9d4660ca
PH
1102
1103
f4d96df0
PH
1104def takewhile_inclusive(pred, seq):
1105 """ Like itertools.takewhile, but include the latest evaluated element
1106 (the first element so that Not pred(e)) """
1107 for e in seq:
1108 yield e
1109 if not pred(e):
1110 return
1111
1112
9d4660ca
PH
1113def smuggle_url(url, data):
1114 """ Pass additional data in a URL for internal use. """
1115
1116 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1117 {'__youtubedl_smuggle': json.dumps(data)})
1118 return url + '#' + sdata
9d4660ca
PH
1119
1120
79f82953 1121def unsmuggle_url(smug_url, default=None):
83e865a3 1122 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1123 return smug_url, default
28e614de
PH
1124 url, _, sdata = smug_url.rpartition('#')
1125 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1126 data = json.loads(jsond)
1127 return url, data
02dbf93f
PH
1128
1129
02dbf93f
PH
1130def format_bytes(bytes):
1131 if bytes is None:
28e614de 1132 return 'N/A'
02dbf93f
PH
1133 if type(bytes) is str:
1134 bytes = float(bytes)
1135 if bytes == 0.0:
1136 exponent = 0
1137 else:
1138 exponent = int(math.log(bytes, 1024.0))
28e614de 1139 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1140 converted = float(bytes) / float(1024 ** exponent)
28e614de 1141 return '%.2f%s' % (converted, suffix)
f53c966a 1142
1c088fa8 1143
be64b5b0
PH
1144def parse_filesize(s):
1145 if s is None:
1146 return None
1147
1148 # The lower-case forms are of course incorrect and inofficial,
1149 # but we support those too
1150 _UNIT_TABLE = {
1151 'B': 1,
1152 'b': 1,
1153 'KiB': 1024,
1154 'KB': 1000,
1155 'kB': 1024,
1156 'Kb': 1000,
1157 'MiB': 1024 ** 2,
1158 'MB': 1000 ** 2,
1159 'mB': 1024 ** 2,
1160 'Mb': 1000 ** 2,
1161 'GiB': 1024 ** 3,
1162 'GB': 1000 ** 3,
1163 'gB': 1024 ** 3,
1164 'Gb': 1000 ** 3,
1165 'TiB': 1024 ** 4,
1166 'TB': 1000 ** 4,
1167 'tB': 1024 ** 4,
1168 'Tb': 1000 ** 4,
1169 'PiB': 1024 ** 5,
1170 'PB': 1000 ** 5,
1171 'pB': 1024 ** 5,
1172 'Pb': 1000 ** 5,
1173 'EiB': 1024 ** 6,
1174 'EB': 1000 ** 6,
1175 'eB': 1024 ** 6,
1176 'Eb': 1000 ** 6,
1177 'ZiB': 1024 ** 7,
1178 'ZB': 1000 ** 7,
1179 'zB': 1024 ** 7,
1180 'Zb': 1000 ** 7,
1181 'YiB': 1024 ** 8,
1182 'YB': 1000 ** 8,
1183 'yB': 1024 ** 8,
1184 'Yb': 1000 ** 8,
1185 }
1186
1187 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1188 m = re.match(
1189 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1190 if not m:
1191 return None
1192
4349c07d
PH
1193 num_str = m.group('num').replace(',', '.')
1194 mult = _UNIT_TABLE[m.group('unit')]
1195 return int(float(num_str) * mult)
be64b5b0
PH
1196
1197
caefb1de
PH
1198def month_by_name(name):
1199 """ Return the number of a month by (locale-independently) English name """
1200
caefb1de 1201 try:
7105440c
YCH
1202 return ENGLISH_MONTH_NAMES.index(name) + 1
1203 except ValueError:
1204 return None
1205
1206
1207def month_by_abbreviation(abbrev):
1208 """ Return the number of a month by (locale-independently) English
1209 abbreviations """
1210
1211 try:
1212 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1213 except ValueError:
1214 return None
18258362
JMF
1215
1216
5aafe895 1217def fix_xml_ampersands(xml_str):
18258362 1218 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1219 return re.sub(
1220 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1221 '&amp;',
5aafe895 1222 xml_str)
e3946f98
PH
1223
1224
1225def setproctitle(title):
8bf48f23 1226 assert isinstance(title, compat_str)
e3946f98
PH
1227 try:
1228 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1229 except OSError:
1230 return
6eefe533
PH
1231 title_bytes = title.encode('utf-8')
1232 buf = ctypes.create_string_buffer(len(title_bytes))
1233 buf.value = title_bytes
e3946f98 1234 try:
6eefe533 1235 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1236 except AttributeError:
1237 return # Strange libc, just skip this
d7dda168
PH
1238
1239
1240def remove_start(s, start):
1241 if s.startswith(start):
1242 return s[len(start):]
1243 return s
29eb5174
PH
1244
1245
2b9faf55
PH
1246def remove_end(s, end):
1247 if s.endswith(end):
1248 return s[:-len(end)]
1249 return s
1250
1251
29eb5174 1252def url_basename(url):
9b8aaeed 1253 path = compat_urlparse.urlparse(url).path
28e614de 1254 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1255
1256
1257class HEADRequest(compat_urllib_request.Request):
1258 def get_method(self):
1259 return "HEAD"
7217e148
PH
1260
1261
9732d77e 1262def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1263 if get_attr:
1264 if v is not None:
1265 v = getattr(v, get_attr, None)
9572013d
PH
1266 if v == '':
1267 v = None
9732d77e
PH
1268 return default if v is None else (int(v) * invscale // scale)
1269
9572013d 1270
40a90862
JMF
1271def str_or_none(v, default=None):
1272 return default if v is None else compat_str(v)
1273
9732d77e
PH
1274
1275def str_to_int(int_str):
48d4681e 1276 """ A more relaxed version of int_or_none """
9732d77e
PH
1277 if int_str is None:
1278 return None
28e614de 1279 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1280 return int(int_str)
608d11f5
PH
1281
1282
9732d77e
PH
1283def float_or_none(v, scale=1, invscale=1, default=None):
1284 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1285
1286
608d11f5 1287def parse_duration(s):
8f9312c3 1288 if not isinstance(s, compat_basestring):
608d11f5
PH
1289 return None
1290
ca7b3246
S
1291 s = s.strip()
1292
608d11f5 1293 m = re.match(
9d22a7df 1294 r'''(?ix)(?:P?T)?
e8df5cee
PH
1295 (?:
1296 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1297 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1298
3e675fab 1299 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
6a68bb57 1300 (?:
8f4b58d7
PH
1301 (?:
1302 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1303 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1304 )?
6a68bb57
PH
1305 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1306 )?
e8df5cee
PH
1307 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1308 )$''', s)
608d11f5
PH
1309 if not m:
1310 return None
e8df5cee
PH
1311 res = 0
1312 if m.group('only_mins'):
1313 return float_or_none(m.group('only_mins'), invscale=60)
1314 if m.group('only_hours'):
1315 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1316 if m.group('secs'):
1317 res += int(m.group('secs'))
3e675fab
PH
1318 if m.group('mins_reversed'):
1319 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1320 if m.group('mins'):
1321 res += int(m.group('mins')) * 60
e8df5cee
PH
1322 if m.group('hours'):
1323 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1324 if m.group('hours_reversed'):
1325 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1326 if m.group('days'):
1327 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1328 if m.group('ms'):
1329 res += float(m.group('ms'))
608d11f5 1330 return res
91d7d0b3
JMF
1331
1332
1333def prepend_extension(filename, ext):
5f6a1245 1334 name, real_ext = os.path.splitext(filename)
28e614de 1335 return '{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1336
1337
1338def check_executable(exe, args=[]):
1339 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1340 args can be a list of arguments for a short output (like -version) """
1341 try:
1342 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1343 except OSError:
1344 return False
1345 return exe
b7ab0590
PH
1346
1347
95807118 1348def get_exe_version(exe, args=['--version'],
cae97f65 1349 version_re=None, unrecognized='present'):
95807118
PH
1350 """ Returns the version of the specified executable,
1351 or False if the executable is not present """
1352 try:
cae97f65 1353 out, _ = subprocess.Popen(
95807118
PH
1354 [exe] + args,
1355 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1356 except OSError:
1357 return False
cae97f65
PH
1358 if isinstance(out, bytes): # Python 2.x
1359 out = out.decode('ascii', 'ignore')
1360 return detect_exe_version(out, version_re, unrecognized)
1361
1362
1363def detect_exe_version(output, version_re=None, unrecognized='present'):
1364 assert isinstance(output, compat_str)
1365 if version_re is None:
1366 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1367 m = re.search(version_re, output)
95807118
PH
1368 if m:
1369 return m.group(1)
1370 else:
1371 return unrecognized
1372
1373
b7ab0590 1374class PagedList(object):
dd26ced1
PH
1375 def __len__(self):
1376 # This is only useful for tests
1377 return len(self.getslice())
1378
9c44d242
PH
1379
1380class OnDemandPagedList(PagedList):
1381 def __init__(self, pagefunc, pagesize):
1382 self._pagefunc = pagefunc
1383 self._pagesize = pagesize
1384
b7ab0590
PH
1385 def getslice(self, start=0, end=None):
1386 res = []
1387 for pagenum in itertools.count(start // self._pagesize):
1388 firstid = pagenum * self._pagesize
1389 nextfirstid = pagenum * self._pagesize + self._pagesize
1390 if start >= nextfirstid:
1391 continue
1392
1393 page_results = list(self._pagefunc(pagenum))
1394
1395 startv = (
1396 start % self._pagesize
1397 if firstid <= start < nextfirstid
1398 else 0)
1399
1400 endv = (
1401 ((end - 1) % self._pagesize) + 1
1402 if (end is not None and firstid <= end <= nextfirstid)
1403 else None)
1404
1405 if startv != 0 or endv is not None:
1406 page_results = page_results[startv:endv]
1407 res.extend(page_results)
1408
1409 # A little optimization - if current page is not "full", ie. does
1410 # not contain page_size videos then we can assume that this page
1411 # is the last one - there are no more ids on further pages -
1412 # i.e. no need to query again.
1413 if len(page_results) + startv < self._pagesize:
1414 break
1415
1416 # If we got the whole page, but the next page is not interesting,
1417 # break out early as well
1418 if end == nextfirstid:
1419 break
1420 return res
81c2f20b
PH
1421
1422
9c44d242
PH
1423class InAdvancePagedList(PagedList):
1424 def __init__(self, pagefunc, pagecount, pagesize):
1425 self._pagefunc = pagefunc
1426 self._pagecount = pagecount
1427 self._pagesize = pagesize
1428
1429 def getslice(self, start=0, end=None):
1430 res = []
1431 start_page = start // self._pagesize
1432 end_page = (
1433 self._pagecount if end is None else (end // self._pagesize + 1))
1434 skip_elems = start - start_page * self._pagesize
1435 only_more = None if end is None else end - start
1436 for pagenum in range(start_page, end_page):
1437 page = list(self._pagefunc(pagenum))
1438 if skip_elems:
1439 page = page[skip_elems:]
1440 skip_elems = None
1441 if only_more is not None:
1442 if len(page) < only_more:
1443 only_more -= len(page)
1444 else:
1445 page = page[:only_more]
1446 res.extend(page)
1447 break
1448 res.extend(page)
1449 return res
1450
1451
81c2f20b 1452def uppercase_escape(s):
676eb3f2 1453 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1454 return re.sub(
a612753d 1455 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1456 lambda m: unicode_escape(m.group(0))[0],
1457 s)
b53466e1 1458
d05cfe06
S
1459
1460def escape_rfc3986(s):
1461 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1462 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1463 s = s.encode('utf-8')
ecc0c5ee 1464 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1465
1466
1467def escape_url(url):
1468 """Escape URL as suggested by RFC 3986"""
1469 url_parsed = compat_urllib_parse_urlparse(url)
1470 return url_parsed._replace(
1471 path=escape_rfc3986(url_parsed.path),
1472 params=escape_rfc3986(url_parsed.params),
1473 query=escape_rfc3986(url_parsed.query),
1474 fragment=escape_rfc3986(url_parsed.fragment)
1475 ).geturl()
1476
b53466e1 1477try:
28e614de 1478 struct.pack('!I', 0)
b53466e1
PH
1479except TypeError:
1480 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1481 def struct_pack(spec, *args):
1482 if isinstance(spec, compat_str):
1483 spec = spec.encode('ascii')
1484 return struct.pack(spec, *args)
1485
1486 def struct_unpack(spec, *args):
1487 if isinstance(spec, compat_str):
1488 spec = spec.encode('ascii')
1489 return struct.unpack(spec, *args)
1490else:
1491 struct_pack = struct.pack
1492 struct_unpack = struct.unpack
62e609ab
PH
1493
1494
1495def read_batch_urls(batch_fd):
1496 def fixup(url):
1497 if not isinstance(url, compat_str):
1498 url = url.decode('utf-8', 'replace')
28e614de 1499 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1500 if url.startswith(BOM_UTF8):
1501 url = url[len(BOM_UTF8):]
1502 url = url.strip()
1503 if url.startswith(('#', ';', ']')):
1504 return False
1505 return url
1506
1507 with contextlib.closing(batch_fd) as fd:
1508 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1509
1510
1511def urlencode_postdata(*args, **kargs):
1512 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1513
1514
0990305d
PH
1515try:
1516 etree_iter = xml.etree.ElementTree.Element.iter
1517except AttributeError: # Python <=2.6
1518 etree_iter = lambda n: n.findall('.//*')
1519
1520
bcf89ce6
PH
1521def parse_xml(s):
1522 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1523 def doctype(self, name, pubid, system):
1524 pass # Ignore doctypes
1525
1526 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1527 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1528 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1529 # Fix up XML parser in Python 2.x
1530 if sys.version_info < (3, 0):
1531 for n in etree_iter(tree):
1532 if n.text is not None:
1533 if not isinstance(n.text, compat_str):
1534 n.text = n.text.decode('utf-8')
1535 return tree
e68301af
PH
1536
1537
a1a530b0
PH
1538US_RATINGS = {
1539 'G': 0,
1540 'PG': 10,
1541 'PG-13': 13,
1542 'R': 16,
1543 'NC': 18,
1544}
fac55558
PH
1545
1546
146c80e2
S
1547def parse_age_limit(s):
1548 if s is None:
d838b1bd 1549 return None
146c80e2 1550 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1551 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1552
1553
fac55558 1554def strip_jsonp(code):
609a61e3
PH
1555 return re.sub(
1556 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1557
1558
e05f6939
PH
1559def js_to_json(code):
1560 def fix_kv(m):
e7b6d122
PH
1561 v = m.group(0)
1562 if v in ('true', 'false', 'null'):
1563 return v
1564 if v.startswith('"'):
1565 return v
1566 if v.startswith("'"):
1567 v = v[1:-1]
1568 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1569 '\\\\': '\\\\',
1570 "\\'": "'",
1571 '"': '\\"',
1572 }[m.group(0)], v)
1573 return '"%s"' % v
e05f6939
PH
1574
1575 res = re.sub(r'''(?x)
d305dd73
PH
1576 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1577 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1578 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939
PH
1579 ''', fix_kv, code)
1580 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1581 return res
1582
1583
478c2c61
PH
1584def qualities(quality_ids):
1585 """ Get a numeric quality value out of a list of possible values """
1586 def q(qid):
1587 try:
1588 return quality_ids.index(qid)
1589 except ValueError:
1590 return -1
1591 return q
1592
acd69589
PH
1593
1594DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1595
a020a0dc
PH
1596
1597def limit_length(s, length):
1598 """ Add ellipses to overly long strings """
1599 if s is None:
1600 return None
1601 ELLIPSES = '...'
1602 if len(s) > length:
1603 return s[:length - len(ELLIPSES)] + ELLIPSES
1604 return s
48844745
PH
1605
1606
1607def version_tuple(v):
5f9b8394 1608 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1609
1610
1611def is_outdated_version(version, limit, assume_new=True):
1612 if not version:
1613 return not assume_new
1614 try:
1615 return version_tuple(version) < version_tuple(limit)
1616 except ValueError:
1617 return not assume_new
732ea2f0
PH
1618
1619
1620def ytdl_is_updateable():
1621 """ Returns if youtube-dl can be updated with -U """
1622 from zipimport import zipimporter
1623
1624 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1625
1626
1627def args_to_str(args):
1628 # Get a short string representation for a subprocess command
1629 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1630
1631
c460bdd5
PH
1632def mimetype2ext(mt):
1633 _, _, res = mt.rpartition('/')
1634
1635 return {
1636 'x-ms-wmv': 'wmv',
1637 'x-mp4-fragmented': 'mp4',
1638 }.get(res, res)
1639
1640
2ccd1b10
PH
1641def urlhandle_detect_ext(url_handle):
1642 try:
1643 url_handle.headers
1644 getheader = lambda h: url_handle.headers[h]
1645 except AttributeError: # Python < 3
1646 getheader = url_handle.info().getheader
1647
b55ee18f
PH
1648 cd = getheader('Content-Disposition')
1649 if cd:
1650 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1651 if m:
1652 e = determine_ext(m.group('filename'), default_ext=None)
1653 if e:
1654 return e
1655
c460bdd5 1656 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1657
1658
1659def age_restricted(content_limit, age_limit):
1660 """ Returns True iff the content should be blocked """
1661
1662 if age_limit is None: # No limit set
1663 return False
1664 if content_limit is None:
1665 return False # Content available for everyone
1666 return age_limit < content_limit
61ca9a80
PH
1667
1668
1669def is_html(first_bytes):
1670 """ Detect whether a file contains HTML by examining its first bytes. """
1671
1672 BOMS = [
1673 (b'\xef\xbb\xbf', 'utf-8'),
1674 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1675 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1676 (b'\xff\xfe', 'utf-16-le'),
1677 (b'\xfe\xff', 'utf-16-be'),
1678 ]
1679 for bom, enc in BOMS:
1680 if first_bytes.startswith(bom):
1681 s = first_bytes[len(bom):].decode(enc, 'replace')
1682 break
1683 else:
1684 s = first_bytes.decode('utf-8', 'replace')
1685
1686 return re.match(r'^\s*<', s)
a055469f
PH
1687
1688
1689def determine_protocol(info_dict):
1690 protocol = info_dict.get('protocol')
1691 if protocol is not None:
1692 return protocol
1693
1694 url = info_dict['url']
1695 if url.startswith('rtmp'):
1696 return 'rtmp'
1697 elif url.startswith('mms'):
1698 return 'mms'
1699 elif url.startswith('rtsp'):
1700 return 'rtsp'
1701
1702 ext = determine_ext(url)
1703 if ext == 'm3u8':
1704 return 'm3u8'
1705 elif ext == 'f4m':
1706 return 'f4m'
1707
1708 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1709
1710
1711def render_table(header_row, data):
1712 """ Render a list of rows, each as a list of values """
1713 table = [header_row] + data
1714 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1715 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1716 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1717
1718
1719def _match_one(filter_part, dct):
1720 COMPARISON_OPERATORS = {
1721 '<': operator.lt,
1722 '<=': operator.le,
1723 '>': operator.gt,
1724 '>=': operator.ge,
1725 '=': operator.eq,
1726 '!=': operator.ne,
1727 }
1728 operator_rex = re.compile(r'''(?x)\s*
1729 (?P<key>[a-z_]+)
1730 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1731 (?:
1732 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1733 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1734 )
1735 \s*$
1736 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1737 m = operator_rex.search(filter_part)
1738 if m:
1739 op = COMPARISON_OPERATORS[m.group('op')]
1740 if m.group('strval') is not None:
1741 if m.group('op') not in ('=', '!='):
1742 raise ValueError(
1743 'Operator %s does not support string values!' % m.group('op'))
1744 comparison_value = m.group('strval')
1745 else:
1746 try:
1747 comparison_value = int(m.group('intval'))
1748 except ValueError:
1749 comparison_value = parse_filesize(m.group('intval'))
1750 if comparison_value is None:
1751 comparison_value = parse_filesize(m.group('intval') + 'B')
1752 if comparison_value is None:
1753 raise ValueError(
1754 'Invalid integer value %r in filter part %r' % (
1755 m.group('intval'), filter_part))
1756 actual_value = dct.get(m.group('key'))
1757 if actual_value is None:
1758 return m.group('none_inclusive')
1759 return op(actual_value, comparison_value)
1760
1761 UNARY_OPERATORS = {
1762 '': lambda v: v is not None,
1763 '!': lambda v: v is None,
1764 }
1765 operator_rex = re.compile(r'''(?x)\s*
1766 (?P<op>%s)\s*(?P<key>[a-z_]+)
1767 \s*$
1768 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1769 m = operator_rex.search(filter_part)
1770 if m:
1771 op = UNARY_OPERATORS[m.group('op')]
1772 actual_value = dct.get(m.group('key'))
1773 return op(actual_value)
1774
1775 raise ValueError('Invalid filter part %r' % filter_part)
1776
1777
1778def match_str(filter_str, dct):
1779 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1780
1781 return all(
1782 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1783
1784
1785def match_filter_func(filter_str):
1786 def _match_func(info_dict):
1787 if match_str(filter_str, info_dict):
1788 return None
1789 else:
1790 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1791 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1792 return _match_func
91410c9b
PH
1793
1794
1795class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
1796 def __init__(self, proxies=None):
1797 # Set default handlers
1798 for type in ('http', 'https'):
1799 setattr(self, '%s_open' % type,
1800 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1801 meth(r, proxy, type))
1802 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1803
91410c9b 1804 def proxy_open(self, req, proxy, type):
2461f79d 1805 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
1806 if req_proxy is not None:
1807 proxy = req_proxy
2461f79d
PH
1808 del req.headers['Ytdl-request-proxy']
1809
1810 if proxy == '__noproxy__':
1811 return None # No Proxy
91410c9b
PH
1812 return compat_urllib_request.ProxyHandler.proxy_open(
1813 self, req, proxy, type)