]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[utils] Check ext with trailing slash against the list of known extensions
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
912b38b4 7import calendar
676eb3f2 8import codecs
62e609ab 9import contextlib
e3946f98 10import ctypes
c496ca96
PH
11import datetime
12import email.utils
f45c185f 13import errno
be4a824d 14import functools
d77c3dfd 15import gzip
b7ab0590 16import itertools
03f9daab 17import io
f4bfd65f 18import json
d77c3dfd 19import locale
02dbf93f 20import math
347de493 21import operator
d77c3dfd 22import os
4eb7f1d1 23import pipes
c496ca96 24import platform
d77c3dfd 25import re
13ebea79 26import ssl
c496ca96 27import socket
b53466e1 28import struct
1c088fa8 29import subprocess
d77c3dfd 30import sys
181c8655 31import tempfile
01951dda 32import traceback
bcf89ce6 33import xml.etree.ElementTree
d77c3dfd 34import zlib
d77c3dfd 35
8c25f81b 36from .compat import (
8f9312c3 37 compat_basestring,
8c25f81b 38 compat_chr,
36e6f62c 39 compat_etree_fromstring,
8c25f81b 40 compat_html_entities,
be4a824d 41 compat_http_client,
c86b6142 42 compat_kwargs,
8c25f81b 43 compat_parse_qs,
be4a824d 44 compat_socket_create_connection,
8c25f81b
PH
45 compat_str,
46 compat_urllib_error,
47 compat_urllib_parse,
48 compat_urllib_parse_urlparse,
49 compat_urllib_request,
50 compat_urlparse,
7d4111ed 51 shlex_quote,
8c25f81b 52)
4644ac55
S
53
54
468e2e92
FV
55# This is not clearly defined otherwise
56compiled_regex_type = type(re.compile(''))
57
3e669f36 58std_headers = {
18313934 59 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
60 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
61 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
62 'Accept-Encoding': 'gzip, deflate',
63 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 64}
f427df17 65
5f6a1245 66
bf42a990
S
67NO_DEFAULT = object()
68
7105440c
YCH
69ENGLISH_MONTH_NAMES = [
70 'January', 'February', 'March', 'April', 'May', 'June',
71 'July', 'August', 'September', 'October', 'November', 'December']
72
73
d77c3dfd 74def preferredencoding():
59ae15a5 75 """Get preferred encoding.
d77c3dfd 76
59ae15a5
PH
77 Returns the best encoding scheme for the system, based on
78 locale.getpreferredencoding() and some further tweaks.
79 """
80 try:
81 pref = locale.getpreferredencoding()
28e614de 82 'TEST'.encode(pref)
70a1165b 83 except Exception:
59ae15a5 84 pref = 'UTF-8'
bae611f2 85
59ae15a5 86 return pref
d77c3dfd 87
f4bfd65f 88
181c8655 89def write_json_file(obj, fn):
1394646a 90 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 91
92120217 92 fn = encodeFilename(fn)
61ee5aeb 93 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
94 encoding = get_filesystem_encoding()
95 # os.path.basename returns a bytes object, but NamedTemporaryFile
96 # will fail if the filename contains non ascii characters unless we
97 # use a unicode object
98 path_basename = lambda f: os.path.basename(fn).decode(encoding)
99 # the same for os.path.dirname
100 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
101 else:
102 path_basename = os.path.basename
103 path_dirname = os.path.dirname
104
73159f99
S
105 args = {
106 'suffix': '.tmp',
ec5f6016
JMF
107 'prefix': path_basename(fn) + '.',
108 'dir': path_dirname(fn),
73159f99
S
109 'delete': False,
110 }
111
181c8655
PH
112 # In Python 2.x, json.dump expects a bytestream.
113 # In Python 3.x, it writes to a character stream
114 if sys.version_info < (3, 0):
73159f99 115 args['mode'] = 'wb'
181c8655 116 else:
73159f99
S
117 args.update({
118 'mode': 'w',
119 'encoding': 'utf-8',
120 })
121
c86b6142 122 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
123
124 try:
125 with tf:
126 json.dump(obj, tf)
1394646a
IK
127 if sys.platform == 'win32':
128 # Need to remove existing file on Windows, else os.rename raises
129 # WindowsError or FileExistsError.
130 try:
131 os.unlink(fn)
132 except OSError:
133 pass
181c8655 134 os.rename(tf.name, fn)
70a1165b 135 except Exception:
181c8655
PH
136 try:
137 os.remove(tf.name)
138 except OSError:
139 pass
140 raise
141
142
143if sys.version_info >= (2, 7):
ee114368 144 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 145 """ Find the xpath xpath[@key=val] """
5d2354f1 146 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
147 if val:
148 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
149 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
150 return node.find(expr)
151else:
ee114368 152 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
153 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
154 # .//node does not match if a node is a direct child of . !
8f9312c3 155 if isinstance(xpath, compat_str):
4eefbfdb
PH
156 xpath = xpath.encode('ascii')
157
59ae56fa 158 for f in node.findall(xpath):
ee114368
S
159 if key not in f.attrib:
160 continue
161 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
162 return f
163 return None
164
d7e66d39
JMF
165# On python2.6 the xml.etree.ElementTree.Element methods don't support
166# the namespace parameter
5f6a1245
JW
167
168
d7e66d39
JMF
169def xpath_with_ns(path, ns_map):
170 components = [c.split(':') for c in path.split('/')]
171 replaced = []
172 for c in components:
173 if len(c) == 1:
174 replaced.append(c[0])
175 else:
176 ns, tag = c
177 replaced.append('{%s}%s' % (ns_map[ns], tag))
178 return '/'.join(replaced)
179
d77c3dfd 180
a41fb80c 181def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745
S
182 def _find_xpath(xpath):
183 if sys.version_info < (2, 7): # Crazy 2.6
184 xpath = xpath.encode('ascii')
185 return node.find(xpath)
186
187 if isinstance(xpath, (str, compat_str)):
188 n = _find_xpath(xpath)
189 else:
190 for xp in xpath:
191 n = _find_xpath(xp)
192 if n is not None:
193 break
d74bebd5 194
8e636da4 195 if n is None:
bf42a990
S
196 if default is not NO_DEFAULT:
197 return default
198 elif fatal:
bf0ff932
PH
199 name = xpath if name is None else name
200 raise ExtractorError('Could not find XML element %s' % name)
201 else:
202 return None
a41fb80c
S
203 return n
204
205
206def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
207 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
208 if n is None or n == default:
209 return n
210 if n.text is None:
211 if default is not NO_DEFAULT:
212 return default
213 elif fatal:
214 name = xpath if name is None else name
215 raise ExtractorError('Could not find XML element\'s text %s' % name)
216 else:
217 return None
218 return n.text
a41fb80c
S
219
220
221def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
222 n = find_xpath_attr(node, xpath, key)
223 if n is None:
224 if default is not NO_DEFAULT:
225 return default
226 elif fatal:
227 name = '%s[@%s]' % (xpath, key) if name is None else name
228 raise ExtractorError('Could not find XML attribute %s' % name)
229 else:
230 return None
231 return n.attrib[key]
bf0ff932
PH
232
233
9e6dd238 234def get_element_by_id(id, html):
43e8fafd
ND
235 """Return the content of the tag with the specified ID in the passed HTML document"""
236 return get_element_by_attribute("id", id, html)
237
12ea2f30 238
43e8fafd
ND
239def get_element_by_attribute(attribute, value, html):
240 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 241
38285056
PH
242 m = re.search(r'''(?xs)
243 <([a-zA-Z0-9:._-]+)
244 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
245 \s+%s=['"]?%s['"]?
246 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
247 \s*>
248 (?P<content>.*?)
249 </\1>
250 ''' % (re.escape(attribute), re.escape(value)), html)
251
252 if not m:
253 return None
254 res = m.group('content')
255
256 if res.startswith('"') or res.startswith("'"):
257 res = res[1:-1]
a921f407 258
38285056 259 return unescapeHTML(res)
a921f407 260
9e6dd238
FV
261
262def clean_html(html):
59ae15a5 263 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
264
265 if html is None: # Convenience for sanitizing descriptions etc.
266 return html
267
59ae15a5
PH
268 # Newline vs <br />
269 html = html.replace('\n', ' ')
6b3aef80
FV
270 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
271 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
272 # Strip html tags
273 html = re.sub('<.*?>', '', html)
274 # Replace html entities
275 html = unescapeHTML(html)
7decf895 276 return html.strip()
9e6dd238
FV
277
278
d77c3dfd 279def sanitize_open(filename, open_mode):
59ae15a5
PH
280 """Try to open the given filename, and slightly tweak it if this fails.
281
282 Attempts to open the given filename. If this fails, it tries to change
283 the filename slightly, step by step, until it's either able to open it
284 or it fails and raises a final exception, like the standard open()
285 function.
286
287 It returns the tuple (stream, definitive_file_name).
288 """
289 try:
28e614de 290 if filename == '-':
59ae15a5
PH
291 if sys.platform == 'win32':
292 import msvcrt
293 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 294 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
295 stream = open(encodeFilename(filename), open_mode)
296 return (stream, filename)
297 except (IOError, OSError) as err:
f45c185f
PH
298 if err.errno in (errno.EACCES,):
299 raise
59ae15a5 300
f45c185f 301 # In case of error, try to remove win32 forbidden chars
d55de57b 302 alt_filename = sanitize_path(filename)
f45c185f
PH
303 if alt_filename == filename:
304 raise
305 else:
306 # An exception here should be caught in the caller
d55de57b 307 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 308 return (stream, alt_filename)
d77c3dfd
FV
309
310
311def timeconvert(timestr):
59ae15a5
PH
312 """Convert RFC 2822 defined time string into system timestamp"""
313 timestamp = None
314 timetuple = email.utils.parsedate_tz(timestr)
315 if timetuple is not None:
316 timestamp = email.utils.mktime_tz(timetuple)
317 return timestamp
1c469a94 318
5f6a1245 319
796173d0 320def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
321 """Sanitizes a string so it could be used as part of a filename.
322 If restricted is set, use a stricter subset of allowed characters.
796173d0 323 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
324 """
325 def replace_insane(char):
326 if char == '?' or ord(char) < 32 or ord(char) == 127:
327 return ''
328 elif char == '"':
329 return '' if restricted else '\''
330 elif char == ':':
331 return '_-' if restricted else ' -'
332 elif char in '\\/|*<>':
333 return '_'
627dcfff 334 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
335 return '_'
336 if restricted and ord(char) > 127:
337 return '_'
338 return char
339
2aeb06d6
PH
340 # Handle timestamps
341 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 342 result = ''.join(map(replace_insane, s))
796173d0
PH
343 if not is_id:
344 while '__' in result:
345 result = result.replace('__', '_')
346 result = result.strip('_')
347 # Common case of "Foreign band name - English song title"
348 if restricted and result.startswith('-_'):
349 result = result[2:]
5a42414b
PH
350 if result.startswith('-'):
351 result = '_' + result[len('-'):]
a7440261 352 result = result.lstrip('.')
796173d0
PH
353 if not result:
354 result = '_'
59ae15a5 355 return result
d77c3dfd 356
5f6a1245 357
a2aaf4db
S
358def sanitize_path(s):
359 """Sanitizes and normalizes path on Windows"""
360 if sys.platform != 'win32':
361 return s
be531ef1
S
362 drive_or_unc, _ = os.path.splitdrive(s)
363 if sys.version_info < (2, 7) and not drive_or_unc:
364 drive_or_unc, _ = os.path.splitunc(s)
365 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
366 if drive_or_unc:
a2aaf4db
S
367 norm_path.pop(0)
368 sanitized_path = [
c90d16cf 369 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
a2aaf4db 370 for path_part in norm_path]
be531ef1
S
371 if drive_or_unc:
372 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
373 return os.path.join(*sanitized_path)
374
375
d77c3dfd 376def orderedSet(iterable):
59ae15a5
PH
377 """ Remove all duplicates from the input iterable """
378 res = []
379 for el in iterable:
380 if el not in res:
381 res.append(el)
382 return res
d77c3dfd 383
912b38b4 384
4e408e47
PH
385def _htmlentity_transform(entity):
386 """Transforms an HTML entity to a character."""
387 # Known non-numeric HTML entity
388 if entity in compat_html_entities.name2codepoint:
389 return compat_chr(compat_html_entities.name2codepoint[entity])
390
91757b0f 391 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
392 if mobj is not None:
393 numstr = mobj.group(1)
28e614de 394 if numstr.startswith('x'):
4e408e47 395 base = 16
28e614de 396 numstr = '0%s' % numstr
4e408e47
PH
397 else:
398 base = 10
7aefc49c
S
399 # See https://github.com/rg3/youtube-dl/issues/7518
400 try:
401 return compat_chr(int(numstr, base))
402 except ValueError:
403 pass
4e408e47
PH
404
405 # Unknown entity in name, return its literal representation
7a3f0c00 406 return '&%s;' % entity
4e408e47
PH
407
408
d77c3dfd 409def unescapeHTML(s):
912b38b4
PH
410 if s is None:
411 return None
412 assert type(s) == compat_str
d77c3dfd 413
4e408e47
PH
414 return re.sub(
415 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 416
8bf48f23 417
aa49acd1
S
418def get_subprocess_encoding():
419 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
420 # For subprocess calls, encode with locale encoding
421 # Refer to http://stackoverflow.com/a/9951851/35070
422 encoding = preferredencoding()
423 else:
424 encoding = sys.getfilesystemencoding()
425 if encoding is None:
426 encoding = 'utf-8'
427 return encoding
428
429
8bf48f23 430def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
431 """
432 @param s The name of the file
433 """
d77c3dfd 434
8bf48f23 435 assert type(s) == compat_str
d77c3dfd 436
59ae15a5
PH
437 # Python 3 has a Unicode API
438 if sys.version_info >= (3, 0):
439 return s
0f00efed 440
aa49acd1
S
441 # Pass '' directly to use Unicode APIs on Windows 2000 and up
442 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
443 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
444 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
445 return s
446
447 return s.encode(get_subprocess_encoding(), 'ignore')
448
449
450def decodeFilename(b, for_subprocess=False):
451
452 if sys.version_info >= (3, 0):
453 return b
454
455 if not isinstance(b, bytes):
456 return b
457
458 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 459
f07b74fc
PH
460
461def encodeArgument(s):
462 if not isinstance(s, compat_str):
463 # Legacy code that uses byte strings
464 # Uncomment the following line after fixing all post processors
7af808a5 465 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
466 s = s.decode('ascii')
467 return encodeFilename(s, True)
468
469
aa49acd1
S
470def decodeArgument(b):
471 return decodeFilename(b, True)
472
473
8271226a
PH
474def decodeOption(optval):
475 if optval is None:
476 return optval
477 if isinstance(optval, bytes):
478 optval = optval.decode(preferredencoding())
479
480 assert isinstance(optval, compat_str)
481 return optval
1c256f70 482
5f6a1245 483
4539dd30
PH
484def formatSeconds(secs):
485 if secs > 3600:
486 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
487 elif secs > 60:
488 return '%d:%02d' % (secs // 60, secs % 60)
489 else:
490 return '%d' % secs
491
a0ddb8a2 492
be4a824d
PH
493def make_HTTPS_handler(params, **kwargs):
494 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 495 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 496 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 497 if opts_no_check_certificate:
be5f2c19 498 context.check_hostname = False
0db261ba 499 context.verify_mode = ssl.CERT_NONE
a2366922 500 try:
be4a824d 501 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
502 except TypeError:
503 # Python 2.7.8
504 # (create_default_context present but HTTPSHandler has no context=)
505 pass
506
507 if sys.version_info < (3, 2):
d7932313 508 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 509 else: # Python < 3.4
d7932313 510 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 511 context.verify_mode = (ssl.CERT_NONE
dca08720 512 if opts_no_check_certificate
ea6d901e 513 else ssl.CERT_REQUIRED)
303b479e 514 context.set_default_verify_paths()
be4a824d 515 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 516
732ea2f0 517
08f2a92c
JMF
518def bug_reports_message():
519 if ytdl_is_updateable():
520 update_cmd = 'type youtube-dl -U to update'
521 else:
522 update_cmd = 'see https://yt-dl.org/update on how to update'
523 msg = '; please report this issue on https://yt-dl.org/bug .'
524 msg += ' Make sure you are using the latest version; %s.' % update_cmd
525 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
526 return msg
527
528
1c256f70
PH
529class ExtractorError(Exception):
530 """Error during info extraction."""
5f6a1245 531
d11271dd 532 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
533 """ tb, if given, is the original traceback (so that it can be printed out).
534 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
535 """
536
537 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
538 expected = True
d11271dd
PH
539 if video_id is not None:
540 msg = video_id + ': ' + msg
410f3e73 541 if cause:
28e614de 542 msg += ' (caused by %r)' % cause
9a82b238 543 if not expected:
08f2a92c 544 msg += bug_reports_message()
1c256f70 545 super(ExtractorError, self).__init__(msg)
d5979c5d 546
1c256f70 547 self.traceback = tb
8cc83b8d 548 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 549 self.cause = cause
d11271dd 550 self.video_id = video_id
1c256f70 551
01951dda
PH
552 def format_traceback(self):
553 if self.traceback is None:
554 return None
28e614de 555 return ''.join(traceback.format_tb(self.traceback))
01951dda 556
1c256f70 557
416c7fcb
PH
558class UnsupportedError(ExtractorError):
559 def __init__(self, url):
560 super(UnsupportedError, self).__init__(
561 'Unsupported URL: %s' % url, expected=True)
562 self.url = url
563
564
55b3e45b
JMF
565class RegexNotFoundError(ExtractorError):
566 """Error when a regex didn't match"""
567 pass
568
569
d77c3dfd 570class DownloadError(Exception):
59ae15a5 571 """Download Error exception.
d77c3dfd 572
59ae15a5
PH
573 This exception may be thrown by FileDownloader objects if they are not
574 configured to continue on errors. They will contain the appropriate
575 error message.
576 """
5f6a1245 577
8cc83b8d
FV
578 def __init__(self, msg, exc_info=None):
579 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
580 super(DownloadError, self).__init__(msg)
581 self.exc_info = exc_info
d77c3dfd
FV
582
583
584class SameFileError(Exception):
59ae15a5 585 """Same File exception.
d77c3dfd 586
59ae15a5
PH
587 This exception will be thrown by FileDownloader objects if they detect
588 multiple files would have to be downloaded to the same file on disk.
589 """
590 pass
d77c3dfd
FV
591
592
593class PostProcessingError(Exception):
59ae15a5 594 """Post Processing exception.
d77c3dfd 595
59ae15a5
PH
596 This exception may be raised by PostProcessor's .run() method to
597 indicate an error in the postprocessing task.
598 """
5f6a1245 599
7851b379
PH
600 def __init__(self, msg):
601 self.msg = msg
d77c3dfd 602
5f6a1245 603
d77c3dfd 604class MaxDownloadsReached(Exception):
59ae15a5
PH
605 """ --max-downloads limit has been reached. """
606 pass
d77c3dfd
FV
607
608
609class UnavailableVideoError(Exception):
59ae15a5 610 """Unavailable Format exception.
d77c3dfd 611
59ae15a5
PH
612 This exception will be thrown when a video is requested
613 in a format that is not available for that video.
614 """
615 pass
d77c3dfd
FV
616
617
618class ContentTooShortError(Exception):
59ae15a5 619 """Content Too Short exception.
d77c3dfd 620
59ae15a5
PH
621 This exception may be raised by FileDownloader objects when a file they
622 download is too small for what the server announced first, indicating
623 the connection was probably interrupted.
624 """
d77c3dfd 625
59ae15a5 626 def __init__(self, downloaded, expected):
2c7ed247 627 # Both in bytes
59ae15a5
PH
628 self.downloaded = downloaded
629 self.expected = expected
d77c3dfd 630
5f6a1245 631
c5a59d93 632def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
633 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
634 # expected HTTP responses to meet HTTP/1.0 or later (see also
635 # https://github.com/rg3/youtube-dl/issues/6727)
636 if sys.version_info < (3, 0):
5a1a2e94 637 kwargs[b'strict'] = True
be4a824d
PH
638 hc = http_class(*args, **kwargs)
639 source_address = ydl_handler._params.get('source_address')
640 if source_address is not None:
641 sa = (source_address, 0)
642 if hasattr(hc, 'source_address'): # Python 2.7+
643 hc.source_address = sa
644 else: # Python 2.6
645 def _hc_connect(self, *args, **kwargs):
646 sock = compat_socket_create_connection(
647 (self.host, self.port), self.timeout, sa)
648 if is_https:
d7932313
PH
649 self.sock = ssl.wrap_socket(
650 sock, self.key_file, self.cert_file,
651 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
652 else:
653 self.sock = sock
654 hc.connect = functools.partial(_hc_connect, hc)
655
656 return hc
657
658
acebc9cd 659class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
660 """Handler for HTTP requests and responses.
661
662 This class, when installed with an OpenerDirector, automatically adds
663 the standard headers to every HTTP request and handles gzipped and
664 deflated responses from web servers. If compression is to be avoided in
665 a particular request, the original request in the program code only has
666 to include the HTTP header "Youtubedl-No-Compression", which will be
667 removed before making the real request.
668
669 Part of this code was copied from:
670
671 http://techknack.net/python-urllib2-handlers/
672
673 Andrew Rowls, the author of that code, agreed to release it to the
674 public domain.
675 """
676
be4a824d
PH
677 def __init__(self, params, *args, **kwargs):
678 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
679 self._params = params
680
681 def http_open(self, req):
682 return self.do_open(functools.partial(
c5a59d93 683 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
684 req)
685
59ae15a5
PH
686 @staticmethod
687 def deflate(data):
688 try:
689 return zlib.decompress(data, -zlib.MAX_WBITS)
690 except zlib.error:
691 return zlib.decompress(data)
692
693 @staticmethod
694 def addinfourl_wrapper(stream, headers, url, code):
695 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
696 return compat_urllib_request.addinfourl(stream, headers, url, code)
697 ret = compat_urllib_request.addinfourl(stream, headers, url)
698 ret.code = code
699 return ret
700
acebc9cd 701 def http_request(self, req):
51f267d9
S
702 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
703 # always respected by websites, some tend to give out URLs with non percent-encoded
704 # non-ASCII characters (see telemb.py, ard.py [#3412])
705 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
706 # To work around aforementioned issue we will replace request's original URL with
707 # percent-encoded one
708 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
709 # the code of this workaround has been moved here from YoutubeDL.urlopen()
710 url = req.get_full_url()
711 url_escaped = escape_url(url)
712
713 # Substitute URL if any change after escaping
714 if url != url_escaped:
715 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
716 new_req = req_type(
717 url_escaped, data=req.data, headers=req.headers,
718 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
719 new_req.timeout = req.timeout
720 req = new_req
721
33ac271b 722 for h, v in std_headers.items():
3d5f7a39
JK
723 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
724 # The dict keys are capitalized because of this bug by urllib
725 if h.capitalize() not in req.headers:
33ac271b 726 req.add_header(h, v)
59ae15a5
PH
727 if 'Youtubedl-no-compression' in req.headers:
728 if 'Accept-encoding' in req.headers:
729 del req.headers['Accept-encoding']
730 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
731
732 if sys.version_info < (2, 7) and '#' in req.get_full_url():
733 # Python 2.6 is brain-dead when it comes to fragments
734 req._Request__original = req._Request__original.partition('#')[0]
735 req._Request__r_type = req._Request__r_type.partition('#')[0]
736
59ae15a5
PH
737 return req
738
acebc9cd 739 def http_response(self, req, resp):
59ae15a5
PH
740 old_resp = resp
741 # gzip
742 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
743 content = resp.read()
744 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
745 try:
746 uncompressed = io.BytesIO(gz.read())
747 except IOError as original_ioerror:
748 # There may be junk add the end of the file
749 # See http://stackoverflow.com/q/4928560/35070 for details
750 for i in range(1, 1024):
751 try:
752 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
753 uncompressed = io.BytesIO(gz.read())
754 except IOError:
755 continue
756 break
757 else:
758 raise original_ioerror
759 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
760 resp.msg = old_resp.msg
761 # deflate
762 if resp.headers.get('Content-encoding', '') == 'deflate':
763 gz = io.BytesIO(self.deflate(resp.read()))
764 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
765 resp.msg = old_resp.msg
ad729172
S
766 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
767 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
768 if 300 <= resp.code < 400:
769 location = resp.headers.get('Location')
770 if location:
771 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
772 if sys.version_info >= (3, 0):
773 location = location.encode('iso-8859-1').decode('utf-8')
774 location_escaped = escape_url(location)
775 if location != location_escaped:
776 del resp.headers['Location']
777 resp.headers['Location'] = location_escaped
59ae15a5 778 return resp
0f8d03f8 779
acebc9cd
PH
780 https_request = http_request
781 https_response = http_response
bf50b038 782
5de90176 783
be4a824d
PH
784class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
785 def __init__(self, params, https_conn_class=None, *args, **kwargs):
786 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
787 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
788 self._params = params
789
790 def https_open(self, req):
4f264c02
JMF
791 kwargs = {}
792 if hasattr(self, '_context'): # python > 2.6
793 kwargs['context'] = self._context
794 if hasattr(self, '_check_hostname'): # python 3.x
795 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
796 return self.do_open(functools.partial(
797 _create_http_connection, self, self._https_conn_class, True),
4f264c02 798 req, **kwargs)
be4a824d
PH
799
800
a6420bf5
S
801class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
802 def __init__(self, cookiejar=None):
803 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
804
805 def http_response(self, request, response):
806 # Python 2 will choke on next HTTP request in row if there are non-ASCII
807 # characters in Set-Cookie HTTP header of last response (see
808 # https://github.com/rg3/youtube-dl/issues/6769).
809 # In order to at least prevent crashing we will percent encode Set-Cookie
810 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
811 # if sys.version_info < (3, 0) and response.headers:
812 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
813 # set_cookie = response.headers.get(set_cookie_header)
814 # if set_cookie:
815 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
816 # if set_cookie != set_cookie_escaped:
817 # del response.headers[set_cookie_header]
818 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
819 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
820
821 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
822 https_response = http_response
823
824
08b38d54 825def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
826 """ Return a UNIX timestamp from the given date """
827
828 if date_str is None:
829 return None
830
52c3a6e4
S
831 date_str = re.sub(r'\.[0-9]+', '', date_str)
832
08b38d54
PH
833 if timezone is None:
834 m = re.search(
52c3a6e4 835 r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
08b38d54
PH
836 date_str)
837 if not m:
912b38b4
PH
838 timezone = datetime.timedelta()
839 else:
08b38d54
PH
840 date_str = date_str[:-len(m.group(0))]
841 if not m.group('sign'):
842 timezone = datetime.timedelta()
843 else:
844 sign = 1 if m.group('sign') == '+' else -1
845 timezone = datetime.timedelta(
846 hours=sign * int(m.group('hours')),
847 minutes=sign * int(m.group('minutes')))
52c3a6e4
S
848 try:
849 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
850 dt = datetime.datetime.strptime(date_str, date_format) - timezone
851 return calendar.timegm(dt.timetuple())
852 except ValueError:
853 pass
912b38b4
PH
854
855
42bdd9d0 856def unified_strdate(date_str, day_first=True):
bf50b038 857 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
858
859 if date_str is None:
860 return None
bf50b038 861 upload_date = None
5f6a1245 862 # Replace commas
026fcc04 863 date_str = date_str.replace(',', ' ')
bf50b038 864 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
865 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
866 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 867 # Remove AM/PM + timezone
9bb8e0a3 868 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 869
19e1d359
JMF
870 format_expressions = [
871 '%d %B %Y',
0f99566c 872 '%d %b %Y',
19e1d359
JMF
873 '%B %d %Y',
874 '%b %d %Y',
78ff59d0
PP
875 '%b %dst %Y %I:%M%p',
876 '%b %dnd %Y %I:%M%p',
877 '%b %dth %Y %I:%M%p',
a69801e2 878 '%Y %m %d',
19e1d359 879 '%Y-%m-%d',
fe556f1b 880 '%Y/%m/%d',
19e1d359 881 '%Y/%m/%d %H:%M:%S',
5d73273f 882 '%Y-%m-%d %H:%M:%S',
e9be9a6a 883 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 884 '%d.%m.%Y %H:%M',
b047de6f 885 '%d.%m.%Y %H.%M',
19e1d359 886 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
887 '%Y-%m-%dT%H:%M:%S.%fZ',
888 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 889 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 890 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 891 '%Y-%m-%dT%H:%M',
19e1d359 892 ]
42bdd9d0
PH
893 if day_first:
894 format_expressions.extend([
79c21abb 895 '%d-%m-%Y',
776dc399
S
896 '%d.%m.%Y',
897 '%d/%m/%Y',
898 '%d/%m/%y',
42bdd9d0
PH
899 '%d/%m/%Y %H:%M:%S',
900 ])
901 else:
902 format_expressions.extend([
79c21abb 903 '%m-%d-%Y',
776dc399
S
904 '%m.%d.%Y',
905 '%m/%d/%Y',
906 '%m/%d/%y',
42bdd9d0
PH
907 '%m/%d/%Y %H:%M:%S',
908 ])
bf50b038
JMF
909 for expression in format_expressions:
910 try:
911 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 912 except ValueError:
bf50b038 913 pass
42393ce2
PH
914 if upload_date is None:
915 timetuple = email.utils.parsedate_tz(date_str)
916 if timetuple:
917 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
6a750402
JMF
918 if upload_date is not None:
919 return compat_str(upload_date)
bf50b038 920
5f6a1245 921
28e614de 922def determine_ext(url, default_ext='unknown_video'):
f4776371
S
923 if url is None:
924 return default_ext
9cb9a5df 925 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
926 if re.match(r'^[A-Za-z0-9]+$', guess):
927 return guess
9cb9a5df
S
928 elif guess.rstrip('/') in (
929 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
930 'flv', 'f4v', 'f4a', 'f4b',
931 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
932 'mkv', 'mka', 'mk3d',
933 'avi', 'divx',
934 'mov',
935 'asf', 'wmv', 'wma',
936 '3gp', '3g2',
937 'mp3',
938 'flac',
939 'ape',
940 'wav',
941 'f4f', 'f4m', 'm3u8', 'smil'):
942 return guess.rstrip('/')
73e79f2a 943 else:
cbdbb766 944 return default_ext
73e79f2a 945
5f6a1245 946
d4051a8e 947def subtitles_filename(filename, sub_lang, sub_format):
28e614de 948 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 949
5f6a1245 950
bd558525 951def date_from_str(date_str):
37254abc
JMF
952 """
953 Return a datetime object from a string in the format YYYYMMDD or
954 (now|today)[+-][0-9](day|week|month|year)(s)?"""
955 today = datetime.date.today()
f8795e10 956 if date_str in ('now', 'today'):
37254abc 957 return today
f8795e10
PH
958 if date_str == 'yesterday':
959 return today - datetime.timedelta(days=1)
37254abc
JMF
960 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
961 if match is not None:
962 sign = match.group('sign')
963 time = int(match.group('time'))
964 if sign == '-':
965 time = -time
966 unit = match.group('unit')
5f6a1245 967 # A bad aproximation?
37254abc
JMF
968 if unit == 'month':
969 unit = 'day'
970 time *= 30
971 elif unit == 'year':
972 unit = 'day'
973 time *= 365
974 unit += 's'
975 delta = datetime.timedelta(**{unit: time})
976 return today + delta
bd558525 977 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
978
979
e63fc1be 980def hyphenate_date(date_str):
981 """
982 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
983 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
984 if match is not None:
985 return '-'.join(match.groups())
986 else:
987 return date_str
988
5f6a1245 989
bd558525
JMF
990class DateRange(object):
991 """Represents a time interval between two dates"""
5f6a1245 992
bd558525
JMF
993 def __init__(self, start=None, end=None):
994 """start and end must be strings in the format accepted by date"""
995 if start is not None:
996 self.start = date_from_str(start)
997 else:
998 self.start = datetime.datetime.min.date()
999 if end is not None:
1000 self.end = date_from_str(end)
1001 else:
1002 self.end = datetime.datetime.max.date()
37254abc 1003 if self.start > self.end:
bd558525 1004 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 1005
bd558525
JMF
1006 @classmethod
1007 def day(cls, day):
1008 """Returns a range that only contains the given day"""
5f6a1245
JW
1009 return cls(day, day)
1010
bd558525
JMF
1011 def __contains__(self, date):
1012 """Check if the date is in the range"""
37254abc
JMF
1013 if not isinstance(date, datetime.date):
1014 date = date_from_str(date)
1015 return self.start <= date <= self.end
5f6a1245 1016
bd558525 1017 def __str__(self):
5f6a1245 1018 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
1019
1020
1021def platform_name():
1022 """ Returns the platform name as a compat_str """
1023 res = platform.platform()
1024 if isinstance(res, bytes):
1025 res = res.decode(preferredencoding())
1026
1027 assert isinstance(res, compat_str)
1028 return res
c257baff
PH
1029
1030
b58ddb32
PH
1031def _windows_write_string(s, out):
1032 """ Returns True if the string was written using special methods,
1033 False if it has yet to be written out."""
1034 # Adapted from http://stackoverflow.com/a/3259271/35070
1035
1036 import ctypes
1037 import ctypes.wintypes
1038
1039 WIN_OUTPUT_IDS = {
1040 1: -11,
1041 2: -12,
1042 }
1043
a383a98a
PH
1044 try:
1045 fileno = out.fileno()
1046 except AttributeError:
1047 # If the output stream doesn't have a fileno, it's virtual
1048 return False
aa42e873
PH
1049 except io.UnsupportedOperation:
1050 # Some strange Windows pseudo files?
1051 return False
b58ddb32
PH
1052 if fileno not in WIN_OUTPUT_IDS:
1053 return False
1054
e2f89ec7 1055 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1056 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 1057 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
1058 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1059
e2f89ec7 1060 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1061 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1062 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 1063 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
1064 written = ctypes.wintypes.DWORD(0)
1065
6ac4e806 1066 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
1067 FILE_TYPE_CHAR = 0x0002
1068 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1069 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1070 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1071 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 1072 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
1073 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1074
1075 def not_a_console(handle):
1076 if handle == INVALID_HANDLE_VALUE or handle is None:
1077 return True
8fb3ac36
PH
1078 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1079 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1080
1081 if not_a_console(h):
1082 return False
1083
d1b9c912
PH
1084 def next_nonbmp_pos(s):
1085 try:
1086 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1087 except StopIteration:
1088 return len(s)
1089
1090 while s:
1091 count = min(next_nonbmp_pos(s), 1024)
1092
b58ddb32 1093 ret = WriteConsoleW(
d1b9c912 1094 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1095 if ret == 0:
1096 raise OSError('Failed to write string')
d1b9c912
PH
1097 if not count: # We just wrote a non-BMP character
1098 assert written.value == 2
1099 s = s[1:]
1100 else:
1101 assert written.value > 0
1102 s = s[written.value:]
b58ddb32
PH
1103 return True
1104
1105
734f90bb 1106def write_string(s, out=None, encoding=None):
7459e3a2
PH
1107 if out is None:
1108 out = sys.stderr
8bf48f23 1109 assert type(s) == compat_str
7459e3a2 1110
b58ddb32
PH
1111 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1112 if _windows_write_string(s, out):
1113 return
1114
7459e3a2
PH
1115 if ('b' in getattr(out, 'mode', '') or
1116 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1117 byt = s.encode(encoding or preferredencoding(), 'ignore')
1118 out.write(byt)
1119 elif hasattr(out, 'buffer'):
1120 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1121 byt = s.encode(enc, 'ignore')
1122 out.buffer.write(byt)
1123 else:
8bf48f23 1124 out.write(s)
7459e3a2
PH
1125 out.flush()
1126
1127
48ea9cea
PH
1128def bytes_to_intlist(bs):
1129 if not bs:
1130 return []
1131 if isinstance(bs[0], int): # Python 3
1132 return list(bs)
1133 else:
1134 return [ord(c) for c in bs]
1135
c257baff 1136
cba892fa 1137def intlist_to_bytes(xs):
1138 if not xs:
1139 return b''
eb4157fd 1140 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1141
1142
c1c9a79c
PH
1143# Cross-platform file locking
1144if sys.platform == 'win32':
1145 import ctypes.wintypes
1146 import msvcrt
1147
1148 class OVERLAPPED(ctypes.Structure):
1149 _fields_ = [
1150 ('Internal', ctypes.wintypes.LPVOID),
1151 ('InternalHigh', ctypes.wintypes.LPVOID),
1152 ('Offset', ctypes.wintypes.DWORD),
1153 ('OffsetHigh', ctypes.wintypes.DWORD),
1154 ('hEvent', ctypes.wintypes.HANDLE),
1155 ]
1156
1157 kernel32 = ctypes.windll.kernel32
1158 LockFileEx = kernel32.LockFileEx
1159 LockFileEx.argtypes = [
1160 ctypes.wintypes.HANDLE, # hFile
1161 ctypes.wintypes.DWORD, # dwFlags
1162 ctypes.wintypes.DWORD, # dwReserved
1163 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1164 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1165 ctypes.POINTER(OVERLAPPED) # Overlapped
1166 ]
1167 LockFileEx.restype = ctypes.wintypes.BOOL
1168 UnlockFileEx = kernel32.UnlockFileEx
1169 UnlockFileEx.argtypes = [
1170 ctypes.wintypes.HANDLE, # hFile
1171 ctypes.wintypes.DWORD, # dwReserved
1172 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1173 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1174 ctypes.POINTER(OVERLAPPED) # Overlapped
1175 ]
1176 UnlockFileEx.restype = ctypes.wintypes.BOOL
1177 whole_low = 0xffffffff
1178 whole_high = 0x7fffffff
1179
1180 def _lock_file(f, exclusive):
1181 overlapped = OVERLAPPED()
1182 overlapped.Offset = 0
1183 overlapped.OffsetHigh = 0
1184 overlapped.hEvent = 0
1185 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1186 handle = msvcrt.get_osfhandle(f.fileno())
1187 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1188 whole_low, whole_high, f._lock_file_overlapped_p):
1189 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1190
1191 def _unlock_file(f):
1192 assert f._lock_file_overlapped_p
1193 handle = msvcrt.get_osfhandle(f.fileno())
1194 if not UnlockFileEx(handle, 0,
1195 whole_low, whole_high, f._lock_file_overlapped_p):
1196 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1197
1198else:
1199 import fcntl
1200
1201 def _lock_file(f, exclusive):
2582bebe 1202 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1203
1204 def _unlock_file(f):
2582bebe 1205 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1206
1207
1208class locked_file(object):
1209 def __init__(self, filename, mode, encoding=None):
1210 assert mode in ['r', 'a', 'w']
1211 self.f = io.open(filename, mode, encoding=encoding)
1212 self.mode = mode
1213
1214 def __enter__(self):
1215 exclusive = self.mode != 'r'
1216 try:
1217 _lock_file(self.f, exclusive)
1218 except IOError:
1219 self.f.close()
1220 raise
1221 return self
1222
1223 def __exit__(self, etype, value, traceback):
1224 try:
1225 _unlock_file(self.f)
1226 finally:
1227 self.f.close()
1228
1229 def __iter__(self):
1230 return iter(self.f)
1231
1232 def write(self, *args):
1233 return self.f.write(*args)
1234
1235 def read(self, *args):
1236 return self.f.read(*args)
4eb7f1d1
JMF
1237
1238
4644ac55
S
1239def get_filesystem_encoding():
1240 encoding = sys.getfilesystemencoding()
1241 return encoding if encoding is not None else 'utf-8'
1242
1243
4eb7f1d1 1244def shell_quote(args):
a6a173c2 1245 quoted_args = []
4644ac55 1246 encoding = get_filesystem_encoding()
a6a173c2
JMF
1247 for a in args:
1248 if isinstance(a, bytes):
1249 # We may get a filename encoded with 'encodeFilename'
1250 a = a.decode(encoding)
1251 quoted_args.append(pipes.quote(a))
28e614de 1252 return ' '.join(quoted_args)
9d4660ca
PH
1253
1254
1255def smuggle_url(url, data):
1256 """ Pass additional data in a URL for internal use. """
1257
1258 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1259 {'__youtubedl_smuggle': json.dumps(data)})
1260 return url + '#' + sdata
9d4660ca
PH
1261
1262
79f82953 1263def unsmuggle_url(smug_url, default=None):
83e865a3 1264 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1265 return smug_url, default
28e614de
PH
1266 url, _, sdata = smug_url.rpartition('#')
1267 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1268 data = json.loads(jsond)
1269 return url, data
02dbf93f
PH
1270
1271
02dbf93f
PH
1272def format_bytes(bytes):
1273 if bytes is None:
28e614de 1274 return 'N/A'
02dbf93f
PH
1275 if type(bytes) is str:
1276 bytes = float(bytes)
1277 if bytes == 0.0:
1278 exponent = 0
1279 else:
1280 exponent = int(math.log(bytes, 1024.0))
28e614de 1281 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1282 converted = float(bytes) / float(1024 ** exponent)
28e614de 1283 return '%.2f%s' % (converted, suffix)
f53c966a 1284
1c088fa8 1285
be64b5b0
PH
1286def parse_filesize(s):
1287 if s is None:
1288 return None
1289
1290 # The lower-case forms are of course incorrect and inofficial,
1291 # but we support those too
1292 _UNIT_TABLE = {
1293 'B': 1,
1294 'b': 1,
1295 'KiB': 1024,
1296 'KB': 1000,
1297 'kB': 1024,
1298 'Kb': 1000,
1299 'MiB': 1024 ** 2,
1300 'MB': 1000 ** 2,
1301 'mB': 1024 ** 2,
1302 'Mb': 1000 ** 2,
1303 'GiB': 1024 ** 3,
1304 'GB': 1000 ** 3,
1305 'gB': 1024 ** 3,
1306 'Gb': 1000 ** 3,
1307 'TiB': 1024 ** 4,
1308 'TB': 1000 ** 4,
1309 'tB': 1024 ** 4,
1310 'Tb': 1000 ** 4,
1311 'PiB': 1024 ** 5,
1312 'PB': 1000 ** 5,
1313 'pB': 1024 ** 5,
1314 'Pb': 1000 ** 5,
1315 'EiB': 1024 ** 6,
1316 'EB': 1000 ** 6,
1317 'eB': 1024 ** 6,
1318 'Eb': 1000 ** 6,
1319 'ZiB': 1024 ** 7,
1320 'ZB': 1000 ** 7,
1321 'zB': 1024 ** 7,
1322 'Zb': 1000 ** 7,
1323 'YiB': 1024 ** 8,
1324 'YB': 1000 ** 8,
1325 'yB': 1024 ** 8,
1326 'Yb': 1000 ** 8,
1327 }
1328
1329 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1330 m = re.match(
1331 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1332 if not m:
1333 return None
1334
4349c07d
PH
1335 num_str = m.group('num').replace(',', '.')
1336 mult = _UNIT_TABLE[m.group('unit')]
1337 return int(float(num_str) * mult)
be64b5b0
PH
1338
1339
caefb1de
PH
1340def month_by_name(name):
1341 """ Return the number of a month by (locale-independently) English name """
1342
caefb1de 1343 try:
7105440c
YCH
1344 return ENGLISH_MONTH_NAMES.index(name) + 1
1345 except ValueError:
1346 return None
1347
1348
1349def month_by_abbreviation(abbrev):
1350 """ Return the number of a month by (locale-independently) English
1351 abbreviations """
1352
1353 try:
1354 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1355 except ValueError:
1356 return None
18258362
JMF
1357
1358
5aafe895 1359def fix_xml_ampersands(xml_str):
18258362 1360 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1361 return re.sub(
1362 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1363 '&amp;',
5aafe895 1364 xml_str)
e3946f98
PH
1365
1366
1367def setproctitle(title):
8bf48f23 1368 assert isinstance(title, compat_str)
e3946f98
PH
1369 try:
1370 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1371 except OSError:
1372 return
6eefe533
PH
1373 title_bytes = title.encode('utf-8')
1374 buf = ctypes.create_string_buffer(len(title_bytes))
1375 buf.value = title_bytes
e3946f98 1376 try:
6eefe533 1377 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1378 except AttributeError:
1379 return # Strange libc, just skip this
d7dda168
PH
1380
1381
1382def remove_start(s, start):
1383 if s.startswith(start):
1384 return s[len(start):]
1385 return s
29eb5174
PH
1386
1387
2b9faf55
PH
1388def remove_end(s, end):
1389 if s.endswith(end):
1390 return s[:-len(end)]
1391 return s
1392
1393
29eb5174 1394def url_basename(url):
9b8aaeed 1395 path = compat_urlparse.urlparse(url).path
28e614de 1396 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1397
1398
1399class HEADRequest(compat_urllib_request.Request):
1400 def get_method(self):
1401 return "HEAD"
7217e148
PH
1402
1403
9732d77e 1404def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1405 if get_attr:
1406 if v is not None:
1407 v = getattr(v, get_attr, None)
9572013d
PH
1408 if v == '':
1409 v = None
1812afb7
S
1410 if v is None:
1411 return default
1412 try:
1413 return int(v) * invscale // scale
1414 except ValueError:
af98f8ff 1415 return default
9732d77e 1416
9572013d 1417
40a90862
JMF
1418def str_or_none(v, default=None):
1419 return default if v is None else compat_str(v)
1420
9732d77e
PH
1421
1422def str_to_int(int_str):
48d4681e 1423 """ A more relaxed version of int_or_none """
9732d77e
PH
1424 if int_str is None:
1425 return None
28e614de 1426 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1427 return int(int_str)
608d11f5
PH
1428
1429
9732d77e 1430def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1431 if v is None:
1432 return default
1433 try:
1434 return float(v) * invscale / scale
1435 except ValueError:
1436 return default
43f775e4
PH
1437
1438
608d11f5 1439def parse_duration(s):
8f9312c3 1440 if not isinstance(s, compat_basestring):
608d11f5
PH
1441 return None
1442
ca7b3246
S
1443 s = s.strip()
1444
608d11f5 1445 m = re.match(
9d22a7df 1446 r'''(?ix)(?:P?T)?
e8df5cee 1447 (?:
9c29bc69 1448 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1449 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1450
9c29bc69 1451 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1452 (?:
8f4b58d7
PH
1453 (?:
1454 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1455 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1456 )?
6a68bb57
PH
1457 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1458 )?
e8df5cee
PH
1459 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1460 )$''', s)
608d11f5
PH
1461 if not m:
1462 return None
e8df5cee
PH
1463 res = 0
1464 if m.group('only_mins'):
1465 return float_or_none(m.group('only_mins'), invscale=60)
1466 if m.group('only_hours'):
1467 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1468 if m.group('secs'):
1469 res += int(m.group('secs'))
3e675fab
PH
1470 if m.group('mins_reversed'):
1471 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1472 if m.group('mins'):
1473 res += int(m.group('mins')) * 60
e8df5cee
PH
1474 if m.group('hours'):
1475 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1476 if m.group('hours_reversed'):
1477 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1478 if m.group('days'):
1479 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1480 if m.group('ms'):
1481 res += float(m.group('ms'))
608d11f5 1482 return res
91d7d0b3
JMF
1483
1484
e65e4c88 1485def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1486 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1487 return (
1488 '{0}.{1}{2}'.format(name, ext, real_ext)
1489 if not expected_real_ext or real_ext[1:] == expected_real_ext
1490 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1491
1492
b3ed15b7
S
1493def replace_extension(filename, ext, expected_real_ext=None):
1494 name, real_ext = os.path.splitext(filename)
1495 return '{0}.{1}'.format(
1496 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1497 ext)
1498
1499
d70ad093
PH
1500def check_executable(exe, args=[]):
1501 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1502 args can be a list of arguments for a short output (like -version) """
1503 try:
1504 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1505 except OSError:
1506 return False
1507 return exe
b7ab0590
PH
1508
1509
95807118 1510def get_exe_version(exe, args=['--version'],
cae97f65 1511 version_re=None, unrecognized='present'):
95807118
PH
1512 """ Returns the version of the specified executable,
1513 or False if the executable is not present """
1514 try:
cae97f65 1515 out, _ = subprocess.Popen(
54116803 1516 [encodeArgument(exe)] + args,
95807118
PH
1517 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1518 except OSError:
1519 return False
cae97f65
PH
1520 if isinstance(out, bytes): # Python 2.x
1521 out = out.decode('ascii', 'ignore')
1522 return detect_exe_version(out, version_re, unrecognized)
1523
1524
1525def detect_exe_version(output, version_re=None, unrecognized='present'):
1526 assert isinstance(output, compat_str)
1527 if version_re is None:
1528 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1529 m = re.search(version_re, output)
95807118
PH
1530 if m:
1531 return m.group(1)
1532 else:
1533 return unrecognized
1534
1535
b7ab0590 1536class PagedList(object):
dd26ced1
PH
1537 def __len__(self):
1538 # This is only useful for tests
1539 return len(self.getslice())
1540
9c44d242
PH
1541
1542class OnDemandPagedList(PagedList):
1543 def __init__(self, pagefunc, pagesize):
1544 self._pagefunc = pagefunc
1545 self._pagesize = pagesize
1546
b7ab0590
PH
1547 def getslice(self, start=0, end=None):
1548 res = []
1549 for pagenum in itertools.count(start // self._pagesize):
1550 firstid = pagenum * self._pagesize
1551 nextfirstid = pagenum * self._pagesize + self._pagesize
1552 if start >= nextfirstid:
1553 continue
1554
1555 page_results = list(self._pagefunc(pagenum))
1556
1557 startv = (
1558 start % self._pagesize
1559 if firstid <= start < nextfirstid
1560 else 0)
1561
1562 endv = (
1563 ((end - 1) % self._pagesize) + 1
1564 if (end is not None and firstid <= end <= nextfirstid)
1565 else None)
1566
1567 if startv != 0 or endv is not None:
1568 page_results = page_results[startv:endv]
1569 res.extend(page_results)
1570
1571 # A little optimization - if current page is not "full", ie. does
1572 # not contain page_size videos then we can assume that this page
1573 # is the last one - there are no more ids on further pages -
1574 # i.e. no need to query again.
1575 if len(page_results) + startv < self._pagesize:
1576 break
1577
1578 # If we got the whole page, but the next page is not interesting,
1579 # break out early as well
1580 if end == nextfirstid:
1581 break
1582 return res
81c2f20b
PH
1583
1584
9c44d242
PH
1585class InAdvancePagedList(PagedList):
1586 def __init__(self, pagefunc, pagecount, pagesize):
1587 self._pagefunc = pagefunc
1588 self._pagecount = pagecount
1589 self._pagesize = pagesize
1590
1591 def getslice(self, start=0, end=None):
1592 res = []
1593 start_page = start // self._pagesize
1594 end_page = (
1595 self._pagecount if end is None else (end // self._pagesize + 1))
1596 skip_elems = start - start_page * self._pagesize
1597 only_more = None if end is None else end - start
1598 for pagenum in range(start_page, end_page):
1599 page = list(self._pagefunc(pagenum))
1600 if skip_elems:
1601 page = page[skip_elems:]
1602 skip_elems = None
1603 if only_more is not None:
1604 if len(page) < only_more:
1605 only_more -= len(page)
1606 else:
1607 page = page[:only_more]
1608 res.extend(page)
1609 break
1610 res.extend(page)
1611 return res
1612
1613
81c2f20b 1614def uppercase_escape(s):
676eb3f2 1615 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1616 return re.sub(
a612753d 1617 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1618 lambda m: unicode_escape(m.group(0))[0],
1619 s)
0fe2ff78
YCH
1620
1621
1622def lowercase_escape(s):
1623 unicode_escape = codecs.getdecoder('unicode_escape')
1624 return re.sub(
1625 r'\\u[0-9a-fA-F]{4}',
1626 lambda m: unicode_escape(m.group(0))[0],
1627 s)
b53466e1 1628
d05cfe06
S
1629
1630def escape_rfc3986(s):
1631 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1632 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1633 s = s.encode('utf-8')
ecc0c5ee 1634 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1635
1636
1637def escape_url(url):
1638 """Escape URL as suggested by RFC 3986"""
1639 url_parsed = compat_urllib_parse_urlparse(url)
1640 return url_parsed._replace(
1641 path=escape_rfc3986(url_parsed.path),
1642 params=escape_rfc3986(url_parsed.params),
1643 query=escape_rfc3986(url_parsed.query),
1644 fragment=escape_rfc3986(url_parsed.fragment)
1645 ).geturl()
1646
b53466e1 1647try:
28e614de 1648 struct.pack('!I', 0)
b53466e1
PH
1649except TypeError:
1650 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1651 def struct_pack(spec, *args):
1652 if isinstance(spec, compat_str):
1653 spec = spec.encode('ascii')
1654 return struct.pack(spec, *args)
1655
1656 def struct_unpack(spec, *args):
1657 if isinstance(spec, compat_str):
1658 spec = spec.encode('ascii')
1659 return struct.unpack(spec, *args)
1660else:
1661 struct_pack = struct.pack
1662 struct_unpack = struct.unpack
62e609ab
PH
1663
1664
1665def read_batch_urls(batch_fd):
1666 def fixup(url):
1667 if not isinstance(url, compat_str):
1668 url = url.decode('utf-8', 'replace')
28e614de 1669 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1670 if url.startswith(BOM_UTF8):
1671 url = url[len(BOM_UTF8):]
1672 url = url.strip()
1673 if url.startswith(('#', ';', ']')):
1674 return False
1675 return url
1676
1677 with contextlib.closing(batch_fd) as fd:
1678 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1679
1680
1681def urlencode_postdata(*args, **kargs):
1682 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1683
1684
16392824 1685def encode_dict(d, encoding='utf-8'):
7e1f5447
S
1686 def encode(v):
1687 return v.encode(encoding) if isinstance(v, compat_basestring) else v
1688 return dict((encode(k), encode(v)) for k, v in d.items())
16392824
S
1689
1690
a1a530b0
PH
1691US_RATINGS = {
1692 'G': 0,
1693 'PG': 10,
1694 'PG-13': 13,
1695 'R': 16,
1696 'NC': 18,
1697}
fac55558
PH
1698
1699
146c80e2
S
1700def parse_age_limit(s):
1701 if s is None:
d838b1bd 1702 return None
146c80e2 1703 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1704 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1705
1706
fac55558 1707def strip_jsonp(code):
609a61e3
PH
1708 return re.sub(
1709 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1710
1711
e05f6939
PH
1712def js_to_json(code):
1713 def fix_kv(m):
e7b6d122
PH
1714 v = m.group(0)
1715 if v in ('true', 'false', 'null'):
1716 return v
1717 if v.startswith('"'):
d01949dc
S
1718 v = re.sub(r"\\'", "'", v[1:-1])
1719 elif v.startswith("'"):
e7b6d122
PH
1720 v = v[1:-1]
1721 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1722 '\\\\': '\\\\',
1723 "\\'": "'",
1724 '"': '\\"',
1725 }[m.group(0)], v)
1726 return '"%s"' % v
e05f6939
PH
1727
1728 res = re.sub(r'''(?x)
d305dd73
PH
1729 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1730 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1731 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1732 ''', fix_kv, code)
ba9e68f4 1733 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1734 return res
1735
1736
478c2c61
PH
1737def qualities(quality_ids):
1738 """ Get a numeric quality value out of a list of possible values """
1739 def q(qid):
1740 try:
1741 return quality_ids.index(qid)
1742 except ValueError:
1743 return -1
1744 return q
1745
acd69589
PH
1746
1747DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1748
a020a0dc
PH
1749
1750def limit_length(s, length):
1751 """ Add ellipses to overly long strings """
1752 if s is None:
1753 return None
1754 ELLIPSES = '...'
1755 if len(s) > length:
1756 return s[:length - len(ELLIPSES)] + ELLIPSES
1757 return s
48844745
PH
1758
1759
1760def version_tuple(v):
5f9b8394 1761 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1762
1763
1764def is_outdated_version(version, limit, assume_new=True):
1765 if not version:
1766 return not assume_new
1767 try:
1768 return version_tuple(version) < version_tuple(limit)
1769 except ValueError:
1770 return not assume_new
732ea2f0
PH
1771
1772
1773def ytdl_is_updateable():
1774 """ Returns if youtube-dl can be updated with -U """
1775 from zipimport import zipimporter
1776
1777 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1778
1779
1780def args_to_str(args):
1781 # Get a short string representation for a subprocess command
1782 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1783
1784
c460bdd5
PH
1785def mimetype2ext(mt):
1786 _, _, res = mt.rpartition('/')
1787
1788 return {
1789 'x-ms-wmv': 'wmv',
1790 'x-mp4-fragmented': 'mp4',
ecee5724 1791 'ttml+xml': 'ttml',
c460bdd5
PH
1792 }.get(res, res)
1793
1794
2ccd1b10
PH
1795def urlhandle_detect_ext(url_handle):
1796 try:
1797 url_handle.headers
1798 getheader = lambda h: url_handle.headers[h]
1799 except AttributeError: # Python < 3
1800 getheader = url_handle.info().getheader
1801
b55ee18f
PH
1802 cd = getheader('Content-Disposition')
1803 if cd:
1804 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1805 if m:
1806 e = determine_ext(m.group('filename'), default_ext=None)
1807 if e:
1808 return e
1809
c460bdd5 1810 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1811
1812
1e399778
YCH
1813def encode_data_uri(data, mime_type):
1814 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1815
1816
05900629
PH
1817def age_restricted(content_limit, age_limit):
1818 """ Returns True iff the content should be blocked """
1819
1820 if age_limit is None: # No limit set
1821 return False
1822 if content_limit is None:
1823 return False # Content available for everyone
1824 return age_limit < content_limit
61ca9a80
PH
1825
1826
1827def is_html(first_bytes):
1828 """ Detect whether a file contains HTML by examining its first bytes. """
1829
1830 BOMS = [
1831 (b'\xef\xbb\xbf', 'utf-8'),
1832 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1833 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1834 (b'\xff\xfe', 'utf-16-le'),
1835 (b'\xfe\xff', 'utf-16-be'),
1836 ]
1837 for bom, enc in BOMS:
1838 if first_bytes.startswith(bom):
1839 s = first_bytes[len(bom):].decode(enc, 'replace')
1840 break
1841 else:
1842 s = first_bytes.decode('utf-8', 'replace')
1843
1844 return re.match(r'^\s*<', s)
a055469f
PH
1845
1846
1847def determine_protocol(info_dict):
1848 protocol = info_dict.get('protocol')
1849 if protocol is not None:
1850 return protocol
1851
1852 url = info_dict['url']
1853 if url.startswith('rtmp'):
1854 return 'rtmp'
1855 elif url.startswith('mms'):
1856 return 'mms'
1857 elif url.startswith('rtsp'):
1858 return 'rtsp'
1859
1860 ext = determine_ext(url)
1861 if ext == 'm3u8':
1862 return 'm3u8'
1863 elif ext == 'f4m':
1864 return 'f4m'
1865
1866 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1867
1868
1869def render_table(header_row, data):
1870 """ Render a list of rows, each as a list of values """
1871 table = [header_row] + data
1872 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1873 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1874 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1875
1876
1877def _match_one(filter_part, dct):
1878 COMPARISON_OPERATORS = {
1879 '<': operator.lt,
1880 '<=': operator.le,
1881 '>': operator.gt,
1882 '>=': operator.ge,
1883 '=': operator.eq,
1884 '!=': operator.ne,
1885 }
1886 operator_rex = re.compile(r'''(?x)\s*
1887 (?P<key>[a-z_]+)
1888 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1889 (?:
1890 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1891 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1892 )
1893 \s*$
1894 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1895 m = operator_rex.search(filter_part)
1896 if m:
1897 op = COMPARISON_OPERATORS[m.group('op')]
1898 if m.group('strval') is not None:
1899 if m.group('op') not in ('=', '!='):
1900 raise ValueError(
1901 'Operator %s does not support string values!' % m.group('op'))
1902 comparison_value = m.group('strval')
1903 else:
1904 try:
1905 comparison_value = int(m.group('intval'))
1906 except ValueError:
1907 comparison_value = parse_filesize(m.group('intval'))
1908 if comparison_value is None:
1909 comparison_value = parse_filesize(m.group('intval') + 'B')
1910 if comparison_value is None:
1911 raise ValueError(
1912 'Invalid integer value %r in filter part %r' % (
1913 m.group('intval'), filter_part))
1914 actual_value = dct.get(m.group('key'))
1915 if actual_value is None:
1916 return m.group('none_inclusive')
1917 return op(actual_value, comparison_value)
1918
1919 UNARY_OPERATORS = {
1920 '': lambda v: v is not None,
1921 '!': lambda v: v is None,
1922 }
1923 operator_rex = re.compile(r'''(?x)\s*
1924 (?P<op>%s)\s*(?P<key>[a-z_]+)
1925 \s*$
1926 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1927 m = operator_rex.search(filter_part)
1928 if m:
1929 op = UNARY_OPERATORS[m.group('op')]
1930 actual_value = dct.get(m.group('key'))
1931 return op(actual_value)
1932
1933 raise ValueError('Invalid filter part %r' % filter_part)
1934
1935
1936def match_str(filter_str, dct):
1937 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1938
1939 return all(
1940 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1941
1942
1943def match_filter_func(filter_str):
1944 def _match_func(info_dict):
1945 if match_str(filter_str, info_dict):
1946 return None
1947 else:
1948 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1949 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1950 return _match_func
91410c9b
PH
1951
1952
bf6427d2
YCH
1953def parse_dfxp_time_expr(time_expr):
1954 if not time_expr:
1955 return 0.0
1956
1957 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1958 if mobj:
1959 return float(mobj.group('time_offset'))
1960
1961 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1962 if mobj:
1963 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1964
1965
c1c924ab
YCH
1966def srt_subtitles_timecode(seconds):
1967 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
1968
1969
1970def dfxp2srt(dfxp_data):
4e335771
YCH
1971 _x = functools.partial(xpath_with_ns, ns_map={
1972 'ttml': 'http://www.w3.org/ns/ttml',
1973 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
1974 })
bf6427d2
YCH
1975
1976 def parse_node(node):
1977 str_or_empty = functools.partial(str_or_none, default='')
1978
1979 out = str_or_empty(node.text)
1980
1981 for child in node:
4e335771 1982 if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
bf6427d2 1983 out += '\n' + str_or_empty(child.tail)
4e335771 1984 elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
bf6427d2
YCH
1985 out += str_or_empty(parse_node(child))
1986 else:
1987 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1988
1989 return out
1990
36e6f62c 1991 dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
bf6427d2 1992 out = []
4e335771 1993 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
1994
1995 if not paras:
1996 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
1997
1998 for para, index in zip(paras, itertools.count(1)):
7dff0363
YCH
1999 begin_time = parse_dfxp_time_expr(para.attrib['begin'])
2000 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
2001 if not end_time:
2002 end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
bf6427d2
YCH
2003 out.append('%d\n%s --> %s\n%s\n\n' % (
2004 index,
c1c924ab
YCH
2005 srt_subtitles_timecode(begin_time),
2006 srt_subtitles_timecode(end_time),
bf6427d2
YCH
2007 parse_node(para)))
2008
2009 return ''.join(out)
2010
2011
66e289ba
S
2012def cli_option(params, command_option, param):
2013 param = params.get(param)
2014 return [command_option, param] if param is not None else []
2015
2016
2017def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2018 param = params.get(param)
2019 assert isinstance(param, bool)
2020 if separator:
2021 return [command_option + separator + (true_value if param else false_value)]
2022 return [command_option, true_value if param else false_value]
2023
2024
2025def cli_valueless_option(params, command_option, param, expected_value=True):
2026 param = params.get(param)
2027 return [command_option] if param == expected_value else []
2028
2029
2030def cli_configuration_args(params, param, default=[]):
2031 ex_args = params.get(param)
2032 if ex_args is None:
2033 return default
2034 assert isinstance(ex_args, list)
2035 return ex_args
2036
2037
39672624
YCH
2038class ISO639Utils(object):
2039 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2040 _lang_map = {
2041 'aa': 'aar',
2042 'ab': 'abk',
2043 'ae': 'ave',
2044 'af': 'afr',
2045 'ak': 'aka',
2046 'am': 'amh',
2047 'an': 'arg',
2048 'ar': 'ara',
2049 'as': 'asm',
2050 'av': 'ava',
2051 'ay': 'aym',
2052 'az': 'aze',
2053 'ba': 'bak',
2054 'be': 'bel',
2055 'bg': 'bul',
2056 'bh': 'bih',
2057 'bi': 'bis',
2058 'bm': 'bam',
2059 'bn': 'ben',
2060 'bo': 'bod',
2061 'br': 'bre',
2062 'bs': 'bos',
2063 'ca': 'cat',
2064 'ce': 'che',
2065 'ch': 'cha',
2066 'co': 'cos',
2067 'cr': 'cre',
2068 'cs': 'ces',
2069 'cu': 'chu',
2070 'cv': 'chv',
2071 'cy': 'cym',
2072 'da': 'dan',
2073 'de': 'deu',
2074 'dv': 'div',
2075 'dz': 'dzo',
2076 'ee': 'ewe',
2077 'el': 'ell',
2078 'en': 'eng',
2079 'eo': 'epo',
2080 'es': 'spa',
2081 'et': 'est',
2082 'eu': 'eus',
2083 'fa': 'fas',
2084 'ff': 'ful',
2085 'fi': 'fin',
2086 'fj': 'fij',
2087 'fo': 'fao',
2088 'fr': 'fra',
2089 'fy': 'fry',
2090 'ga': 'gle',
2091 'gd': 'gla',
2092 'gl': 'glg',
2093 'gn': 'grn',
2094 'gu': 'guj',
2095 'gv': 'glv',
2096 'ha': 'hau',
2097 'he': 'heb',
2098 'hi': 'hin',
2099 'ho': 'hmo',
2100 'hr': 'hrv',
2101 'ht': 'hat',
2102 'hu': 'hun',
2103 'hy': 'hye',
2104 'hz': 'her',
2105 'ia': 'ina',
2106 'id': 'ind',
2107 'ie': 'ile',
2108 'ig': 'ibo',
2109 'ii': 'iii',
2110 'ik': 'ipk',
2111 'io': 'ido',
2112 'is': 'isl',
2113 'it': 'ita',
2114 'iu': 'iku',
2115 'ja': 'jpn',
2116 'jv': 'jav',
2117 'ka': 'kat',
2118 'kg': 'kon',
2119 'ki': 'kik',
2120 'kj': 'kua',
2121 'kk': 'kaz',
2122 'kl': 'kal',
2123 'km': 'khm',
2124 'kn': 'kan',
2125 'ko': 'kor',
2126 'kr': 'kau',
2127 'ks': 'kas',
2128 'ku': 'kur',
2129 'kv': 'kom',
2130 'kw': 'cor',
2131 'ky': 'kir',
2132 'la': 'lat',
2133 'lb': 'ltz',
2134 'lg': 'lug',
2135 'li': 'lim',
2136 'ln': 'lin',
2137 'lo': 'lao',
2138 'lt': 'lit',
2139 'lu': 'lub',
2140 'lv': 'lav',
2141 'mg': 'mlg',
2142 'mh': 'mah',
2143 'mi': 'mri',
2144 'mk': 'mkd',
2145 'ml': 'mal',
2146 'mn': 'mon',
2147 'mr': 'mar',
2148 'ms': 'msa',
2149 'mt': 'mlt',
2150 'my': 'mya',
2151 'na': 'nau',
2152 'nb': 'nob',
2153 'nd': 'nde',
2154 'ne': 'nep',
2155 'ng': 'ndo',
2156 'nl': 'nld',
2157 'nn': 'nno',
2158 'no': 'nor',
2159 'nr': 'nbl',
2160 'nv': 'nav',
2161 'ny': 'nya',
2162 'oc': 'oci',
2163 'oj': 'oji',
2164 'om': 'orm',
2165 'or': 'ori',
2166 'os': 'oss',
2167 'pa': 'pan',
2168 'pi': 'pli',
2169 'pl': 'pol',
2170 'ps': 'pus',
2171 'pt': 'por',
2172 'qu': 'que',
2173 'rm': 'roh',
2174 'rn': 'run',
2175 'ro': 'ron',
2176 'ru': 'rus',
2177 'rw': 'kin',
2178 'sa': 'san',
2179 'sc': 'srd',
2180 'sd': 'snd',
2181 'se': 'sme',
2182 'sg': 'sag',
2183 'si': 'sin',
2184 'sk': 'slk',
2185 'sl': 'slv',
2186 'sm': 'smo',
2187 'sn': 'sna',
2188 'so': 'som',
2189 'sq': 'sqi',
2190 'sr': 'srp',
2191 'ss': 'ssw',
2192 'st': 'sot',
2193 'su': 'sun',
2194 'sv': 'swe',
2195 'sw': 'swa',
2196 'ta': 'tam',
2197 'te': 'tel',
2198 'tg': 'tgk',
2199 'th': 'tha',
2200 'ti': 'tir',
2201 'tk': 'tuk',
2202 'tl': 'tgl',
2203 'tn': 'tsn',
2204 'to': 'ton',
2205 'tr': 'tur',
2206 'ts': 'tso',
2207 'tt': 'tat',
2208 'tw': 'twi',
2209 'ty': 'tah',
2210 'ug': 'uig',
2211 'uk': 'ukr',
2212 'ur': 'urd',
2213 'uz': 'uzb',
2214 've': 'ven',
2215 'vi': 'vie',
2216 'vo': 'vol',
2217 'wa': 'wln',
2218 'wo': 'wol',
2219 'xh': 'xho',
2220 'yi': 'yid',
2221 'yo': 'yor',
2222 'za': 'zha',
2223 'zh': 'zho',
2224 'zu': 'zul',
2225 }
2226
2227 @classmethod
2228 def short2long(cls, code):
2229 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2230 return cls._lang_map.get(code[:2])
2231
2232 @classmethod
2233 def long2short(cls, code):
2234 """Convert language code from ISO 639-2/T to ISO 639-1"""
2235 for short_name, long_name in cls._lang_map.items():
2236 if long_name == code:
2237 return short_name
2238
2239
4eb10f66
YCH
2240class ISO3166Utils(object):
2241 # From http://data.okfn.org/data/core/country-list
2242 _country_map = {
2243 'AF': 'Afghanistan',
2244 'AX': 'Åland Islands',
2245 'AL': 'Albania',
2246 'DZ': 'Algeria',
2247 'AS': 'American Samoa',
2248 'AD': 'Andorra',
2249 'AO': 'Angola',
2250 'AI': 'Anguilla',
2251 'AQ': 'Antarctica',
2252 'AG': 'Antigua and Barbuda',
2253 'AR': 'Argentina',
2254 'AM': 'Armenia',
2255 'AW': 'Aruba',
2256 'AU': 'Australia',
2257 'AT': 'Austria',
2258 'AZ': 'Azerbaijan',
2259 'BS': 'Bahamas',
2260 'BH': 'Bahrain',
2261 'BD': 'Bangladesh',
2262 'BB': 'Barbados',
2263 'BY': 'Belarus',
2264 'BE': 'Belgium',
2265 'BZ': 'Belize',
2266 'BJ': 'Benin',
2267 'BM': 'Bermuda',
2268 'BT': 'Bhutan',
2269 'BO': 'Bolivia, Plurinational State of',
2270 'BQ': 'Bonaire, Sint Eustatius and Saba',
2271 'BA': 'Bosnia and Herzegovina',
2272 'BW': 'Botswana',
2273 'BV': 'Bouvet Island',
2274 'BR': 'Brazil',
2275 'IO': 'British Indian Ocean Territory',
2276 'BN': 'Brunei Darussalam',
2277 'BG': 'Bulgaria',
2278 'BF': 'Burkina Faso',
2279 'BI': 'Burundi',
2280 'KH': 'Cambodia',
2281 'CM': 'Cameroon',
2282 'CA': 'Canada',
2283 'CV': 'Cape Verde',
2284 'KY': 'Cayman Islands',
2285 'CF': 'Central African Republic',
2286 'TD': 'Chad',
2287 'CL': 'Chile',
2288 'CN': 'China',
2289 'CX': 'Christmas Island',
2290 'CC': 'Cocos (Keeling) Islands',
2291 'CO': 'Colombia',
2292 'KM': 'Comoros',
2293 'CG': 'Congo',
2294 'CD': 'Congo, the Democratic Republic of the',
2295 'CK': 'Cook Islands',
2296 'CR': 'Costa Rica',
2297 'CI': 'Côte d\'Ivoire',
2298 'HR': 'Croatia',
2299 'CU': 'Cuba',
2300 'CW': 'Curaçao',
2301 'CY': 'Cyprus',
2302 'CZ': 'Czech Republic',
2303 'DK': 'Denmark',
2304 'DJ': 'Djibouti',
2305 'DM': 'Dominica',
2306 'DO': 'Dominican Republic',
2307 'EC': 'Ecuador',
2308 'EG': 'Egypt',
2309 'SV': 'El Salvador',
2310 'GQ': 'Equatorial Guinea',
2311 'ER': 'Eritrea',
2312 'EE': 'Estonia',
2313 'ET': 'Ethiopia',
2314 'FK': 'Falkland Islands (Malvinas)',
2315 'FO': 'Faroe Islands',
2316 'FJ': 'Fiji',
2317 'FI': 'Finland',
2318 'FR': 'France',
2319 'GF': 'French Guiana',
2320 'PF': 'French Polynesia',
2321 'TF': 'French Southern Territories',
2322 'GA': 'Gabon',
2323 'GM': 'Gambia',
2324 'GE': 'Georgia',
2325 'DE': 'Germany',
2326 'GH': 'Ghana',
2327 'GI': 'Gibraltar',
2328 'GR': 'Greece',
2329 'GL': 'Greenland',
2330 'GD': 'Grenada',
2331 'GP': 'Guadeloupe',
2332 'GU': 'Guam',
2333 'GT': 'Guatemala',
2334 'GG': 'Guernsey',
2335 'GN': 'Guinea',
2336 'GW': 'Guinea-Bissau',
2337 'GY': 'Guyana',
2338 'HT': 'Haiti',
2339 'HM': 'Heard Island and McDonald Islands',
2340 'VA': 'Holy See (Vatican City State)',
2341 'HN': 'Honduras',
2342 'HK': 'Hong Kong',
2343 'HU': 'Hungary',
2344 'IS': 'Iceland',
2345 'IN': 'India',
2346 'ID': 'Indonesia',
2347 'IR': 'Iran, Islamic Republic of',
2348 'IQ': 'Iraq',
2349 'IE': 'Ireland',
2350 'IM': 'Isle of Man',
2351 'IL': 'Israel',
2352 'IT': 'Italy',
2353 'JM': 'Jamaica',
2354 'JP': 'Japan',
2355 'JE': 'Jersey',
2356 'JO': 'Jordan',
2357 'KZ': 'Kazakhstan',
2358 'KE': 'Kenya',
2359 'KI': 'Kiribati',
2360 'KP': 'Korea, Democratic People\'s Republic of',
2361 'KR': 'Korea, Republic of',
2362 'KW': 'Kuwait',
2363 'KG': 'Kyrgyzstan',
2364 'LA': 'Lao People\'s Democratic Republic',
2365 'LV': 'Latvia',
2366 'LB': 'Lebanon',
2367 'LS': 'Lesotho',
2368 'LR': 'Liberia',
2369 'LY': 'Libya',
2370 'LI': 'Liechtenstein',
2371 'LT': 'Lithuania',
2372 'LU': 'Luxembourg',
2373 'MO': 'Macao',
2374 'MK': 'Macedonia, the Former Yugoslav Republic of',
2375 'MG': 'Madagascar',
2376 'MW': 'Malawi',
2377 'MY': 'Malaysia',
2378 'MV': 'Maldives',
2379 'ML': 'Mali',
2380 'MT': 'Malta',
2381 'MH': 'Marshall Islands',
2382 'MQ': 'Martinique',
2383 'MR': 'Mauritania',
2384 'MU': 'Mauritius',
2385 'YT': 'Mayotte',
2386 'MX': 'Mexico',
2387 'FM': 'Micronesia, Federated States of',
2388 'MD': 'Moldova, Republic of',
2389 'MC': 'Monaco',
2390 'MN': 'Mongolia',
2391 'ME': 'Montenegro',
2392 'MS': 'Montserrat',
2393 'MA': 'Morocco',
2394 'MZ': 'Mozambique',
2395 'MM': 'Myanmar',
2396 'NA': 'Namibia',
2397 'NR': 'Nauru',
2398 'NP': 'Nepal',
2399 'NL': 'Netherlands',
2400 'NC': 'New Caledonia',
2401 'NZ': 'New Zealand',
2402 'NI': 'Nicaragua',
2403 'NE': 'Niger',
2404 'NG': 'Nigeria',
2405 'NU': 'Niue',
2406 'NF': 'Norfolk Island',
2407 'MP': 'Northern Mariana Islands',
2408 'NO': 'Norway',
2409 'OM': 'Oman',
2410 'PK': 'Pakistan',
2411 'PW': 'Palau',
2412 'PS': 'Palestine, State of',
2413 'PA': 'Panama',
2414 'PG': 'Papua New Guinea',
2415 'PY': 'Paraguay',
2416 'PE': 'Peru',
2417 'PH': 'Philippines',
2418 'PN': 'Pitcairn',
2419 'PL': 'Poland',
2420 'PT': 'Portugal',
2421 'PR': 'Puerto Rico',
2422 'QA': 'Qatar',
2423 'RE': 'Réunion',
2424 'RO': 'Romania',
2425 'RU': 'Russian Federation',
2426 'RW': 'Rwanda',
2427 'BL': 'Saint Barthélemy',
2428 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2429 'KN': 'Saint Kitts and Nevis',
2430 'LC': 'Saint Lucia',
2431 'MF': 'Saint Martin (French part)',
2432 'PM': 'Saint Pierre and Miquelon',
2433 'VC': 'Saint Vincent and the Grenadines',
2434 'WS': 'Samoa',
2435 'SM': 'San Marino',
2436 'ST': 'Sao Tome and Principe',
2437 'SA': 'Saudi Arabia',
2438 'SN': 'Senegal',
2439 'RS': 'Serbia',
2440 'SC': 'Seychelles',
2441 'SL': 'Sierra Leone',
2442 'SG': 'Singapore',
2443 'SX': 'Sint Maarten (Dutch part)',
2444 'SK': 'Slovakia',
2445 'SI': 'Slovenia',
2446 'SB': 'Solomon Islands',
2447 'SO': 'Somalia',
2448 'ZA': 'South Africa',
2449 'GS': 'South Georgia and the South Sandwich Islands',
2450 'SS': 'South Sudan',
2451 'ES': 'Spain',
2452 'LK': 'Sri Lanka',
2453 'SD': 'Sudan',
2454 'SR': 'Suriname',
2455 'SJ': 'Svalbard and Jan Mayen',
2456 'SZ': 'Swaziland',
2457 'SE': 'Sweden',
2458 'CH': 'Switzerland',
2459 'SY': 'Syrian Arab Republic',
2460 'TW': 'Taiwan, Province of China',
2461 'TJ': 'Tajikistan',
2462 'TZ': 'Tanzania, United Republic of',
2463 'TH': 'Thailand',
2464 'TL': 'Timor-Leste',
2465 'TG': 'Togo',
2466 'TK': 'Tokelau',
2467 'TO': 'Tonga',
2468 'TT': 'Trinidad and Tobago',
2469 'TN': 'Tunisia',
2470 'TR': 'Turkey',
2471 'TM': 'Turkmenistan',
2472 'TC': 'Turks and Caicos Islands',
2473 'TV': 'Tuvalu',
2474 'UG': 'Uganda',
2475 'UA': 'Ukraine',
2476 'AE': 'United Arab Emirates',
2477 'GB': 'United Kingdom',
2478 'US': 'United States',
2479 'UM': 'United States Minor Outlying Islands',
2480 'UY': 'Uruguay',
2481 'UZ': 'Uzbekistan',
2482 'VU': 'Vanuatu',
2483 'VE': 'Venezuela, Bolivarian Republic of',
2484 'VN': 'Viet Nam',
2485 'VG': 'Virgin Islands, British',
2486 'VI': 'Virgin Islands, U.S.',
2487 'WF': 'Wallis and Futuna',
2488 'EH': 'Western Sahara',
2489 'YE': 'Yemen',
2490 'ZM': 'Zambia',
2491 'ZW': 'Zimbabwe',
2492 }
2493
2494 @classmethod
2495 def short2full(cls, code):
2496 """Convert an ISO 3166-2 country code to the corresponding full name"""
2497 return cls._country_map.get(code.upper())
2498
2499
91410c9b 2500class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2501 def __init__(self, proxies=None):
2502 # Set default handlers
2503 for type in ('http', 'https'):
2504 setattr(self, '%s_open' % type,
2505 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2506 meth(r, proxy, type))
2507 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2508
91410c9b 2509 def proxy_open(self, req, proxy, type):
2461f79d 2510 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2511 if req_proxy is not None:
2512 proxy = req_proxy
2461f79d
PH
2513 del req.headers['Ytdl-request-proxy']
2514
2515 if proxy == '__noproxy__':
2516 return None # No Proxy
91410c9b
PH
2517 return compat_urllib_request.ProxyHandler.proxy_open(
2518 self, req, proxy, type)