]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[mdr] Add audio test
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
912b38b4 7import calendar
676eb3f2 8import codecs
62e609ab 9import contextlib
e3946f98 10import ctypes
c496ca96
PH
11import datetime
12import email.utils
f45c185f 13import errno
be4a824d 14import functools
d77c3dfd 15import gzip
b7ab0590 16import itertools
03f9daab 17import io
f4bfd65f 18import json
d77c3dfd 19import locale
02dbf93f 20import math
347de493 21import operator
d77c3dfd 22import os
4eb7f1d1 23import pipes
c496ca96 24import platform
d77c3dfd 25import re
13ebea79 26import ssl
c496ca96 27import socket
b53466e1 28import struct
1c088fa8 29import subprocess
d77c3dfd 30import sys
181c8655 31import tempfile
01951dda 32import traceback
bcf89ce6 33import xml.etree.ElementTree
d77c3dfd 34import zlib
d77c3dfd 35
8c25f81b 36from .compat import (
8f9312c3 37 compat_basestring,
8c25f81b 38 compat_chr,
8c25f81b 39 compat_html_entities,
be4a824d 40 compat_http_client,
c86b6142 41 compat_kwargs,
8c25f81b 42 compat_parse_qs,
be4a824d 43 compat_socket_create_connection,
8c25f81b
PH
44 compat_str,
45 compat_urllib_error,
46 compat_urllib_parse,
47 compat_urllib_parse_urlparse,
48 compat_urllib_request,
49 compat_urlparse,
7d4111ed 50 shlex_quote,
8c25f81b 51)
4644ac55
S
52
53
468e2e92
FV
54# This is not clearly defined otherwise
55compiled_regex_type = type(re.compile(''))
56
3e669f36 57std_headers = {
18313934 58 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
59 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
60 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
61 'Accept-Encoding': 'gzip, deflate',
62 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 63}
f427df17 64
5f6a1245 65
bf42a990
S
66NO_DEFAULT = object()
67
7105440c
YCH
68ENGLISH_MONTH_NAMES = [
69 'January', 'February', 'March', 'April', 'May', 'June',
70 'July', 'August', 'September', 'October', 'November', 'December']
71
72
d77c3dfd 73def preferredencoding():
59ae15a5 74 """Get preferred encoding.
d77c3dfd 75
59ae15a5
PH
76 Returns the best encoding scheme for the system, based on
77 locale.getpreferredencoding() and some further tweaks.
78 """
79 try:
80 pref = locale.getpreferredencoding()
28e614de 81 'TEST'.encode(pref)
70a1165b 82 except Exception:
59ae15a5 83 pref = 'UTF-8'
bae611f2 84
59ae15a5 85 return pref
d77c3dfd 86
f4bfd65f 87
181c8655 88def write_json_file(obj, fn):
1394646a 89 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 90
92120217 91 fn = encodeFilename(fn)
61ee5aeb 92 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
93 encoding = get_filesystem_encoding()
94 # os.path.basename returns a bytes object, but NamedTemporaryFile
95 # will fail if the filename contains non ascii characters unless we
96 # use a unicode object
97 path_basename = lambda f: os.path.basename(fn).decode(encoding)
98 # the same for os.path.dirname
99 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
100 else:
101 path_basename = os.path.basename
102 path_dirname = os.path.dirname
103
73159f99
S
104 args = {
105 'suffix': '.tmp',
ec5f6016
JMF
106 'prefix': path_basename(fn) + '.',
107 'dir': path_dirname(fn),
73159f99
S
108 'delete': False,
109 }
110
181c8655
PH
111 # In Python 2.x, json.dump expects a bytestream.
112 # In Python 3.x, it writes to a character stream
113 if sys.version_info < (3, 0):
73159f99 114 args['mode'] = 'wb'
181c8655 115 else:
73159f99
S
116 args.update({
117 'mode': 'w',
118 'encoding': 'utf-8',
119 })
120
c86b6142 121 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
122
123 try:
124 with tf:
125 json.dump(obj, tf)
1394646a
IK
126 if sys.platform == 'win32':
127 # Need to remove existing file on Windows, else os.rename raises
128 # WindowsError or FileExistsError.
129 try:
130 os.unlink(fn)
131 except OSError:
132 pass
181c8655 133 os.rename(tf.name, fn)
70a1165b 134 except Exception:
181c8655
PH
135 try:
136 os.remove(tf.name)
137 except OSError:
138 pass
139 raise
140
141
142if sys.version_info >= (2, 7):
ee114368 143 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 144 """ Find the xpath xpath[@key=val] """
5d2354f1 145 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
146 if val:
147 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
148 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
149 return node.find(expr)
150else:
ee114368 151 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
152 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
153 # .//node does not match if a node is a direct child of . !
8f9312c3 154 if isinstance(xpath, compat_str):
4eefbfdb
PH
155 xpath = xpath.encode('ascii')
156
59ae56fa 157 for f in node.findall(xpath):
ee114368
S
158 if key not in f.attrib:
159 continue
160 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
161 return f
162 return None
163
d7e66d39
JMF
164# On python2.6 the xml.etree.ElementTree.Element methods don't support
165# the namespace parameter
5f6a1245
JW
166
167
d7e66d39
JMF
168def xpath_with_ns(path, ns_map):
169 components = [c.split(':') for c in path.split('/')]
170 replaced = []
171 for c in components:
172 if len(c) == 1:
173 replaced.append(c[0])
174 else:
175 ns, tag = c
176 replaced.append('{%s}%s' % (ns_map[ns], tag))
177 return '/'.join(replaced)
178
d77c3dfd 179
a41fb80c 180def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
d74bebd5
PH
181 if sys.version_info < (2, 7): # Crazy 2.6
182 xpath = xpath.encode('ascii')
183
bf0ff932 184 n = node.find(xpath)
8e636da4 185 if n is None:
bf42a990
S
186 if default is not NO_DEFAULT:
187 return default
188 elif fatal:
bf0ff932
PH
189 name = xpath if name is None else name
190 raise ExtractorError('Could not find XML element %s' % name)
191 else:
192 return None
a41fb80c
S
193 return n
194
195
196def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
197 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
198 if n is None or n == default:
199 return n
200 if n.text is None:
201 if default is not NO_DEFAULT:
202 return default
203 elif fatal:
204 name = xpath if name is None else name
205 raise ExtractorError('Could not find XML element\'s text %s' % name)
206 else:
207 return None
208 return n.text
a41fb80c
S
209
210
211def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
212 n = find_xpath_attr(node, xpath, key)
213 if n is None:
214 if default is not NO_DEFAULT:
215 return default
216 elif fatal:
217 name = '%s[@%s]' % (xpath, key) if name is None else name
218 raise ExtractorError('Could not find XML attribute %s' % name)
219 else:
220 return None
221 return n.attrib[key]
bf0ff932
PH
222
223
9e6dd238 224def get_element_by_id(id, html):
43e8fafd
ND
225 """Return the content of the tag with the specified ID in the passed HTML document"""
226 return get_element_by_attribute("id", id, html)
227
12ea2f30 228
43e8fafd
ND
229def get_element_by_attribute(attribute, value, html):
230 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 231
38285056
PH
232 m = re.search(r'''(?xs)
233 <([a-zA-Z0-9:._-]+)
234 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
235 \s+%s=['"]?%s['"]?
236 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
237 \s*>
238 (?P<content>.*?)
239 </\1>
240 ''' % (re.escape(attribute), re.escape(value)), html)
241
242 if not m:
243 return None
244 res = m.group('content')
245
246 if res.startswith('"') or res.startswith("'"):
247 res = res[1:-1]
a921f407 248
38285056 249 return unescapeHTML(res)
a921f407 250
9e6dd238
FV
251
252def clean_html(html):
59ae15a5 253 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
254
255 if html is None: # Convenience for sanitizing descriptions etc.
256 return html
257
59ae15a5
PH
258 # Newline vs <br />
259 html = html.replace('\n', ' ')
6b3aef80
FV
260 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
261 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
262 # Strip html tags
263 html = re.sub('<.*?>', '', html)
264 # Replace html entities
265 html = unescapeHTML(html)
7decf895 266 return html.strip()
9e6dd238
FV
267
268
d77c3dfd 269def sanitize_open(filename, open_mode):
59ae15a5
PH
270 """Try to open the given filename, and slightly tweak it if this fails.
271
272 Attempts to open the given filename. If this fails, it tries to change
273 the filename slightly, step by step, until it's either able to open it
274 or it fails and raises a final exception, like the standard open()
275 function.
276
277 It returns the tuple (stream, definitive_file_name).
278 """
279 try:
28e614de 280 if filename == '-':
59ae15a5
PH
281 if sys.platform == 'win32':
282 import msvcrt
283 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 284 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
285 stream = open(encodeFilename(filename), open_mode)
286 return (stream, filename)
287 except (IOError, OSError) as err:
f45c185f
PH
288 if err.errno in (errno.EACCES,):
289 raise
59ae15a5 290
f45c185f 291 # In case of error, try to remove win32 forbidden chars
d55de57b 292 alt_filename = sanitize_path(filename)
f45c185f
PH
293 if alt_filename == filename:
294 raise
295 else:
296 # An exception here should be caught in the caller
d55de57b 297 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 298 return (stream, alt_filename)
d77c3dfd
FV
299
300
301def timeconvert(timestr):
59ae15a5
PH
302 """Convert RFC 2822 defined time string into system timestamp"""
303 timestamp = None
304 timetuple = email.utils.parsedate_tz(timestr)
305 if timetuple is not None:
306 timestamp = email.utils.mktime_tz(timetuple)
307 return timestamp
1c469a94 308
5f6a1245 309
796173d0 310def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
311 """Sanitizes a string so it could be used as part of a filename.
312 If restricted is set, use a stricter subset of allowed characters.
796173d0 313 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
314 """
315 def replace_insane(char):
316 if char == '?' or ord(char) < 32 or ord(char) == 127:
317 return ''
318 elif char == '"':
319 return '' if restricted else '\''
320 elif char == ':':
321 return '_-' if restricted else ' -'
322 elif char in '\\/|*<>':
323 return '_'
627dcfff 324 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
325 return '_'
326 if restricted and ord(char) > 127:
327 return '_'
328 return char
329
2aeb06d6
PH
330 # Handle timestamps
331 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 332 result = ''.join(map(replace_insane, s))
796173d0
PH
333 if not is_id:
334 while '__' in result:
335 result = result.replace('__', '_')
336 result = result.strip('_')
337 # Common case of "Foreign band name - English song title"
338 if restricted and result.startswith('-_'):
339 result = result[2:]
5a42414b
PH
340 if result.startswith('-'):
341 result = '_' + result[len('-'):]
a7440261 342 result = result.lstrip('.')
796173d0
PH
343 if not result:
344 result = '_'
59ae15a5 345 return result
d77c3dfd 346
5f6a1245 347
a2aaf4db
S
348def sanitize_path(s):
349 """Sanitizes and normalizes path on Windows"""
350 if sys.platform != 'win32':
351 return s
be531ef1
S
352 drive_or_unc, _ = os.path.splitdrive(s)
353 if sys.version_info < (2, 7) and not drive_or_unc:
354 drive_or_unc, _ = os.path.splitunc(s)
355 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
356 if drive_or_unc:
a2aaf4db
S
357 norm_path.pop(0)
358 sanitized_path = [
2ebfeaca 359 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db 360 for path_part in norm_path]
be531ef1
S
361 if drive_or_unc:
362 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
363 return os.path.join(*sanitized_path)
364
365
d77c3dfd 366def orderedSet(iterable):
59ae15a5
PH
367 """ Remove all duplicates from the input iterable """
368 res = []
369 for el in iterable:
370 if el not in res:
371 res.append(el)
372 return res
d77c3dfd 373
912b38b4 374
4e408e47
PH
375def _htmlentity_transform(entity):
376 """Transforms an HTML entity to a character."""
377 # Known non-numeric HTML entity
378 if entity in compat_html_entities.name2codepoint:
379 return compat_chr(compat_html_entities.name2codepoint[entity])
380
91757b0f 381 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
382 if mobj is not None:
383 numstr = mobj.group(1)
28e614de 384 if numstr.startswith('x'):
4e408e47 385 base = 16
28e614de 386 numstr = '0%s' % numstr
4e408e47
PH
387 else:
388 base = 10
389 return compat_chr(int(numstr, base))
390
391 # Unknown entity in name, return its literal representation
28e614de 392 return ('&%s;' % entity)
4e408e47
PH
393
394
d77c3dfd 395def unescapeHTML(s):
912b38b4
PH
396 if s is None:
397 return None
398 assert type(s) == compat_str
d77c3dfd 399
4e408e47
PH
400 return re.sub(
401 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 402
8bf48f23 403
aa49acd1
S
404def get_subprocess_encoding():
405 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
406 # For subprocess calls, encode with locale encoding
407 # Refer to http://stackoverflow.com/a/9951851/35070
408 encoding = preferredencoding()
409 else:
410 encoding = sys.getfilesystemencoding()
411 if encoding is None:
412 encoding = 'utf-8'
413 return encoding
414
415
8bf48f23 416def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
417 """
418 @param s The name of the file
419 """
d77c3dfd 420
8bf48f23 421 assert type(s) == compat_str
d77c3dfd 422
59ae15a5
PH
423 # Python 3 has a Unicode API
424 if sys.version_info >= (3, 0):
425 return s
0f00efed 426
aa49acd1
S
427 # Pass '' directly to use Unicode APIs on Windows 2000 and up
428 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
429 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
430 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
431 return s
432
433 return s.encode(get_subprocess_encoding(), 'ignore')
434
435
436def decodeFilename(b, for_subprocess=False):
437
438 if sys.version_info >= (3, 0):
439 return b
440
441 if not isinstance(b, bytes):
442 return b
443
444 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 445
f07b74fc
PH
446
447def encodeArgument(s):
448 if not isinstance(s, compat_str):
449 # Legacy code that uses byte strings
450 # Uncomment the following line after fixing all post processors
7af808a5 451 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
452 s = s.decode('ascii')
453 return encodeFilename(s, True)
454
455
aa49acd1
S
456def decodeArgument(b):
457 return decodeFilename(b, True)
458
459
8271226a
PH
460def decodeOption(optval):
461 if optval is None:
462 return optval
463 if isinstance(optval, bytes):
464 optval = optval.decode(preferredencoding())
465
466 assert isinstance(optval, compat_str)
467 return optval
1c256f70 468
5f6a1245 469
4539dd30
PH
470def formatSeconds(secs):
471 if secs > 3600:
472 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
473 elif secs > 60:
474 return '%d:%02d' % (secs // 60, secs % 60)
475 else:
476 return '%d' % secs
477
a0ddb8a2 478
be4a824d
PH
479def make_HTTPS_handler(params, **kwargs):
480 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 481 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 482 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 483 if opts_no_check_certificate:
be5f2c19 484 context.check_hostname = False
0db261ba 485 context.verify_mode = ssl.CERT_NONE
a2366922 486 try:
be4a824d 487 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
488 except TypeError:
489 # Python 2.7.8
490 # (create_default_context present but HTTPSHandler has no context=)
491 pass
492
493 if sys.version_info < (3, 2):
d7932313 494 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 495 else: # Python < 3.4
d7932313 496 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 497 context.verify_mode = (ssl.CERT_NONE
dca08720 498 if opts_no_check_certificate
ea6d901e 499 else ssl.CERT_REQUIRED)
303b479e 500 context.set_default_verify_paths()
be4a824d 501 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 502
732ea2f0 503
08f2a92c
JMF
504def bug_reports_message():
505 if ytdl_is_updateable():
506 update_cmd = 'type youtube-dl -U to update'
507 else:
508 update_cmd = 'see https://yt-dl.org/update on how to update'
509 msg = '; please report this issue on https://yt-dl.org/bug .'
510 msg += ' Make sure you are using the latest version; %s.' % update_cmd
511 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
512 return msg
513
514
1c256f70
PH
515class ExtractorError(Exception):
516 """Error during info extraction."""
5f6a1245 517
d11271dd 518 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
519 """ tb, if given, is the original traceback (so that it can be printed out).
520 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
521 """
522
523 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
524 expected = True
d11271dd
PH
525 if video_id is not None:
526 msg = video_id + ': ' + msg
410f3e73 527 if cause:
28e614de 528 msg += ' (caused by %r)' % cause
9a82b238 529 if not expected:
08f2a92c 530 msg += bug_reports_message()
1c256f70 531 super(ExtractorError, self).__init__(msg)
d5979c5d 532
1c256f70 533 self.traceback = tb
8cc83b8d 534 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 535 self.cause = cause
d11271dd 536 self.video_id = video_id
1c256f70 537
01951dda
PH
538 def format_traceback(self):
539 if self.traceback is None:
540 return None
28e614de 541 return ''.join(traceback.format_tb(self.traceback))
01951dda 542
1c256f70 543
416c7fcb
PH
544class UnsupportedError(ExtractorError):
545 def __init__(self, url):
546 super(UnsupportedError, self).__init__(
547 'Unsupported URL: %s' % url, expected=True)
548 self.url = url
549
550
55b3e45b
JMF
551class RegexNotFoundError(ExtractorError):
552 """Error when a regex didn't match"""
553 pass
554
555
d77c3dfd 556class DownloadError(Exception):
59ae15a5 557 """Download Error exception.
d77c3dfd 558
59ae15a5
PH
559 This exception may be thrown by FileDownloader objects if they are not
560 configured to continue on errors. They will contain the appropriate
561 error message.
562 """
5f6a1245 563
8cc83b8d
FV
564 def __init__(self, msg, exc_info=None):
565 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
566 super(DownloadError, self).__init__(msg)
567 self.exc_info = exc_info
d77c3dfd
FV
568
569
570class SameFileError(Exception):
59ae15a5 571 """Same File exception.
d77c3dfd 572
59ae15a5
PH
573 This exception will be thrown by FileDownloader objects if they detect
574 multiple files would have to be downloaded to the same file on disk.
575 """
576 pass
d77c3dfd
FV
577
578
579class PostProcessingError(Exception):
59ae15a5 580 """Post Processing exception.
d77c3dfd 581
59ae15a5
PH
582 This exception may be raised by PostProcessor's .run() method to
583 indicate an error in the postprocessing task.
584 """
5f6a1245 585
7851b379
PH
586 def __init__(self, msg):
587 self.msg = msg
d77c3dfd 588
5f6a1245 589
d77c3dfd 590class MaxDownloadsReached(Exception):
59ae15a5
PH
591 """ --max-downloads limit has been reached. """
592 pass
d77c3dfd
FV
593
594
595class UnavailableVideoError(Exception):
59ae15a5 596 """Unavailable Format exception.
d77c3dfd 597
59ae15a5
PH
598 This exception will be thrown when a video is requested
599 in a format that is not available for that video.
600 """
601 pass
d77c3dfd
FV
602
603
604class ContentTooShortError(Exception):
59ae15a5 605 """Content Too Short exception.
d77c3dfd 606
59ae15a5
PH
607 This exception may be raised by FileDownloader objects when a file they
608 download is too small for what the server announced first, indicating
609 the connection was probably interrupted.
610 """
d77c3dfd 611
59ae15a5 612 def __init__(self, downloaded, expected):
2c7ed247 613 # Both in bytes
59ae15a5
PH
614 self.downloaded = downloaded
615 self.expected = expected
d77c3dfd 616
5f6a1245 617
c5a59d93 618def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
619 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
620 # expected HTTP responses to meet HTTP/1.0 or later (see also
621 # https://github.com/rg3/youtube-dl/issues/6727)
622 if sys.version_info < (3, 0):
5a1a2e94 623 kwargs[b'strict'] = True
be4a824d
PH
624 hc = http_class(*args, **kwargs)
625 source_address = ydl_handler._params.get('source_address')
626 if source_address is not None:
627 sa = (source_address, 0)
628 if hasattr(hc, 'source_address'): # Python 2.7+
629 hc.source_address = sa
630 else: # Python 2.6
631 def _hc_connect(self, *args, **kwargs):
632 sock = compat_socket_create_connection(
633 (self.host, self.port), self.timeout, sa)
634 if is_https:
d7932313
PH
635 self.sock = ssl.wrap_socket(
636 sock, self.key_file, self.cert_file,
637 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
638 else:
639 self.sock = sock
640 hc.connect = functools.partial(_hc_connect, hc)
641
642 return hc
643
644
acebc9cd 645class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
646 """Handler for HTTP requests and responses.
647
648 This class, when installed with an OpenerDirector, automatically adds
649 the standard headers to every HTTP request and handles gzipped and
650 deflated responses from web servers. If compression is to be avoided in
651 a particular request, the original request in the program code only has
652 to include the HTTP header "Youtubedl-No-Compression", which will be
653 removed before making the real request.
654
655 Part of this code was copied from:
656
657 http://techknack.net/python-urllib2-handlers/
658
659 Andrew Rowls, the author of that code, agreed to release it to the
660 public domain.
661 """
662
be4a824d
PH
663 def __init__(self, params, *args, **kwargs):
664 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
665 self._params = params
666
667 def http_open(self, req):
668 return self.do_open(functools.partial(
c5a59d93 669 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
670 req)
671
59ae15a5
PH
672 @staticmethod
673 def deflate(data):
674 try:
675 return zlib.decompress(data, -zlib.MAX_WBITS)
676 except zlib.error:
677 return zlib.decompress(data)
678
679 @staticmethod
680 def addinfourl_wrapper(stream, headers, url, code):
681 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
682 return compat_urllib_request.addinfourl(stream, headers, url, code)
683 ret = compat_urllib_request.addinfourl(stream, headers, url)
684 ret.code = code
685 return ret
686
acebc9cd 687 def http_request(self, req):
51f267d9
S
688 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
689 # always respected by websites, some tend to give out URLs with non percent-encoded
690 # non-ASCII characters (see telemb.py, ard.py [#3412])
691 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
692 # To work around aforementioned issue we will replace request's original URL with
693 # percent-encoded one
694 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
695 # the code of this workaround has been moved here from YoutubeDL.urlopen()
696 url = req.get_full_url()
697 url_escaped = escape_url(url)
698
699 # Substitute URL if any change after escaping
700 if url != url_escaped:
701 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
702 new_req = req_type(
703 url_escaped, data=req.data, headers=req.headers,
704 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
705 new_req.timeout = req.timeout
706 req = new_req
707
33ac271b 708 for h, v in std_headers.items():
3d5f7a39
JK
709 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
710 # The dict keys are capitalized because of this bug by urllib
711 if h.capitalize() not in req.headers:
33ac271b 712 req.add_header(h, v)
59ae15a5
PH
713 if 'Youtubedl-no-compression' in req.headers:
714 if 'Accept-encoding' in req.headers:
715 del req.headers['Accept-encoding']
716 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
717
718 if sys.version_info < (2, 7) and '#' in req.get_full_url():
719 # Python 2.6 is brain-dead when it comes to fragments
720 req._Request__original = req._Request__original.partition('#')[0]
721 req._Request__r_type = req._Request__r_type.partition('#')[0]
722
59ae15a5
PH
723 return req
724
acebc9cd 725 def http_response(self, req, resp):
59ae15a5
PH
726 old_resp = resp
727 # gzip
728 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
729 content = resp.read()
730 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
731 try:
732 uncompressed = io.BytesIO(gz.read())
733 except IOError as original_ioerror:
734 # There may be junk add the end of the file
735 # See http://stackoverflow.com/q/4928560/35070 for details
736 for i in range(1, 1024):
737 try:
738 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
739 uncompressed = io.BytesIO(gz.read())
740 except IOError:
741 continue
742 break
743 else:
744 raise original_ioerror
745 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
746 resp.msg = old_resp.msg
747 # deflate
748 if resp.headers.get('Content-encoding', '') == 'deflate':
749 gz = io.BytesIO(self.deflate(resp.read()))
750 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
751 resp.msg = old_resp.msg
ad729172
S
752 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
753 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
754 if 300 <= resp.code < 400:
755 location = resp.headers.get('Location')
756 if location:
757 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
758 if sys.version_info >= (3, 0):
759 location = location.encode('iso-8859-1').decode('utf-8')
760 location_escaped = escape_url(location)
761 if location != location_escaped:
762 del resp.headers['Location']
763 resp.headers['Location'] = location_escaped
59ae15a5 764 return resp
0f8d03f8 765
acebc9cd
PH
766 https_request = http_request
767 https_response = http_response
bf50b038 768
5de90176 769
be4a824d
PH
770class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
771 def __init__(self, params, https_conn_class=None, *args, **kwargs):
772 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
773 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
774 self._params = params
775
776 def https_open(self, req):
4f264c02
JMF
777 kwargs = {}
778 if hasattr(self, '_context'): # python > 2.6
779 kwargs['context'] = self._context
780 if hasattr(self, '_check_hostname'): # python 3.x
781 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
782 return self.do_open(functools.partial(
783 _create_http_connection, self, self._https_conn_class, True),
4f264c02 784 req, **kwargs)
be4a824d
PH
785
786
a6420bf5
S
787class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
788 def __init__(self, cookiejar=None):
789 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
790
791 def http_response(self, request, response):
792 # Python 2 will choke on next HTTP request in row if there are non-ASCII
793 # characters in Set-Cookie HTTP header of last response (see
794 # https://github.com/rg3/youtube-dl/issues/6769).
795 # In order to at least prevent crashing we will percent encode Set-Cookie
796 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
797 # if sys.version_info < (3, 0) and response.headers:
798 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
799 # set_cookie = response.headers.get(set_cookie_header)
800 # if set_cookie:
801 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
802 # if set_cookie != set_cookie_escaped:
803 # del response.headers[set_cookie_header]
804 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
805 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
806
807 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
808 https_response = http_response
809
810
08b38d54 811def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
812 """ Return a UNIX timestamp from the given date """
813
814 if date_str is None:
815 return None
816
52c3a6e4
S
817 date_str = re.sub(r'\.[0-9]+', '', date_str)
818
08b38d54
PH
819 if timezone is None:
820 m = re.search(
52c3a6e4 821 r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
08b38d54
PH
822 date_str)
823 if not m:
912b38b4
PH
824 timezone = datetime.timedelta()
825 else:
08b38d54
PH
826 date_str = date_str[:-len(m.group(0))]
827 if not m.group('sign'):
828 timezone = datetime.timedelta()
829 else:
830 sign = 1 if m.group('sign') == '+' else -1
831 timezone = datetime.timedelta(
832 hours=sign * int(m.group('hours')),
833 minutes=sign * int(m.group('minutes')))
52c3a6e4
S
834 try:
835 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
836 dt = datetime.datetime.strptime(date_str, date_format) - timezone
837 return calendar.timegm(dt.timetuple())
838 except ValueError:
839 pass
912b38b4
PH
840
841
42bdd9d0 842def unified_strdate(date_str, day_first=True):
bf50b038 843 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
844
845 if date_str is None:
846 return None
bf50b038 847 upload_date = None
5f6a1245 848 # Replace commas
026fcc04 849 date_str = date_str.replace(',', ' ')
bf50b038 850 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
851 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
852 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 853 # Remove AM/PM + timezone
9bb8e0a3 854 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 855
19e1d359
JMF
856 format_expressions = [
857 '%d %B %Y',
0f99566c 858 '%d %b %Y',
19e1d359
JMF
859 '%B %d %Y',
860 '%b %d %Y',
78ff59d0
PP
861 '%b %dst %Y %I:%M%p',
862 '%b %dnd %Y %I:%M%p',
863 '%b %dth %Y %I:%M%p',
a69801e2 864 '%Y %m %d',
19e1d359 865 '%Y-%m-%d',
fe556f1b 866 '%Y/%m/%d',
19e1d359 867 '%Y/%m/%d %H:%M:%S',
5d73273f 868 '%Y-%m-%d %H:%M:%S',
e9be9a6a 869 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 870 '%d.%m.%Y %H:%M',
b047de6f 871 '%d.%m.%Y %H.%M',
19e1d359 872 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
873 '%Y-%m-%dT%H:%M:%S.%fZ',
874 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 875 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 876 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 877 '%Y-%m-%dT%H:%M',
19e1d359 878 ]
42bdd9d0
PH
879 if day_first:
880 format_expressions.extend([
79c21abb 881 '%d-%m-%Y',
776dc399
S
882 '%d.%m.%Y',
883 '%d/%m/%Y',
884 '%d/%m/%y',
42bdd9d0
PH
885 '%d/%m/%Y %H:%M:%S',
886 ])
887 else:
888 format_expressions.extend([
79c21abb 889 '%m-%d-%Y',
776dc399
S
890 '%m.%d.%Y',
891 '%m/%d/%Y',
892 '%m/%d/%y',
42bdd9d0
PH
893 '%m/%d/%Y %H:%M:%S',
894 ])
bf50b038
JMF
895 for expression in format_expressions:
896 try:
897 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 898 except ValueError:
bf50b038 899 pass
42393ce2
PH
900 if upload_date is None:
901 timetuple = email.utils.parsedate_tz(date_str)
902 if timetuple:
903 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
904 return upload_date
905
5f6a1245 906
28e614de 907def determine_ext(url, default_ext='unknown_video'):
f4776371
S
908 if url is None:
909 return default_ext
28e614de 910 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
911 if re.match(r'^[A-Za-z0-9]+$', guess):
912 return guess
913 else:
cbdbb766 914 return default_ext
73e79f2a 915
5f6a1245 916
d4051a8e 917def subtitles_filename(filename, sub_lang, sub_format):
28e614de 918 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 919
5f6a1245 920
bd558525 921def date_from_str(date_str):
37254abc
JMF
922 """
923 Return a datetime object from a string in the format YYYYMMDD or
924 (now|today)[+-][0-9](day|week|month|year)(s)?"""
925 today = datetime.date.today()
f8795e10 926 if date_str in ('now', 'today'):
37254abc 927 return today
f8795e10
PH
928 if date_str == 'yesterday':
929 return today - datetime.timedelta(days=1)
37254abc
JMF
930 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
931 if match is not None:
932 sign = match.group('sign')
933 time = int(match.group('time'))
934 if sign == '-':
935 time = -time
936 unit = match.group('unit')
5f6a1245 937 # A bad aproximation?
37254abc
JMF
938 if unit == 'month':
939 unit = 'day'
940 time *= 30
941 elif unit == 'year':
942 unit = 'day'
943 time *= 365
944 unit += 's'
945 delta = datetime.timedelta(**{unit: time})
946 return today + delta
bd558525 947 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
948
949
e63fc1be 950def hyphenate_date(date_str):
951 """
952 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
953 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
954 if match is not None:
955 return '-'.join(match.groups())
956 else:
957 return date_str
958
5f6a1245 959
bd558525
JMF
960class DateRange(object):
961 """Represents a time interval between two dates"""
5f6a1245 962
bd558525
JMF
963 def __init__(self, start=None, end=None):
964 """start and end must be strings in the format accepted by date"""
965 if start is not None:
966 self.start = date_from_str(start)
967 else:
968 self.start = datetime.datetime.min.date()
969 if end is not None:
970 self.end = date_from_str(end)
971 else:
972 self.end = datetime.datetime.max.date()
37254abc 973 if self.start > self.end:
bd558525 974 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 975
bd558525
JMF
976 @classmethod
977 def day(cls, day):
978 """Returns a range that only contains the given day"""
5f6a1245
JW
979 return cls(day, day)
980
bd558525
JMF
981 def __contains__(self, date):
982 """Check if the date is in the range"""
37254abc
JMF
983 if not isinstance(date, datetime.date):
984 date = date_from_str(date)
985 return self.start <= date <= self.end
5f6a1245 986
bd558525 987 def __str__(self):
5f6a1245 988 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
989
990
991def platform_name():
992 """ Returns the platform name as a compat_str """
993 res = platform.platform()
994 if isinstance(res, bytes):
995 res = res.decode(preferredencoding())
996
997 assert isinstance(res, compat_str)
998 return res
c257baff
PH
999
1000
b58ddb32
PH
1001def _windows_write_string(s, out):
1002 """ Returns True if the string was written using special methods,
1003 False if it has yet to be written out."""
1004 # Adapted from http://stackoverflow.com/a/3259271/35070
1005
1006 import ctypes
1007 import ctypes.wintypes
1008
1009 WIN_OUTPUT_IDS = {
1010 1: -11,
1011 2: -12,
1012 }
1013
a383a98a
PH
1014 try:
1015 fileno = out.fileno()
1016 except AttributeError:
1017 # If the output stream doesn't have a fileno, it's virtual
1018 return False
aa42e873
PH
1019 except io.UnsupportedOperation:
1020 # Some strange Windows pseudo files?
1021 return False
b58ddb32
PH
1022 if fileno not in WIN_OUTPUT_IDS:
1023 return False
1024
e2f89ec7 1025 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1026 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 1027 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
1028 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1029
e2f89ec7 1030 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1031 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1032 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 1033 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
1034 written = ctypes.wintypes.DWORD(0)
1035
6ac4e806 1036 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
1037 FILE_TYPE_CHAR = 0x0002
1038 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1039 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1040 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1041 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 1042 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
1043 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1044
1045 def not_a_console(handle):
1046 if handle == INVALID_HANDLE_VALUE or handle is None:
1047 return True
8fb3ac36
PH
1048 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1049 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1050
1051 if not_a_console(h):
1052 return False
1053
d1b9c912
PH
1054 def next_nonbmp_pos(s):
1055 try:
1056 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1057 except StopIteration:
1058 return len(s)
1059
1060 while s:
1061 count = min(next_nonbmp_pos(s), 1024)
1062
b58ddb32 1063 ret = WriteConsoleW(
d1b9c912 1064 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1065 if ret == 0:
1066 raise OSError('Failed to write string')
d1b9c912
PH
1067 if not count: # We just wrote a non-BMP character
1068 assert written.value == 2
1069 s = s[1:]
1070 else:
1071 assert written.value > 0
1072 s = s[written.value:]
b58ddb32
PH
1073 return True
1074
1075
734f90bb 1076def write_string(s, out=None, encoding=None):
7459e3a2
PH
1077 if out is None:
1078 out = sys.stderr
8bf48f23 1079 assert type(s) == compat_str
7459e3a2 1080
b58ddb32
PH
1081 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1082 if _windows_write_string(s, out):
1083 return
1084
7459e3a2
PH
1085 if ('b' in getattr(out, 'mode', '') or
1086 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1087 byt = s.encode(encoding or preferredencoding(), 'ignore')
1088 out.write(byt)
1089 elif hasattr(out, 'buffer'):
1090 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1091 byt = s.encode(enc, 'ignore')
1092 out.buffer.write(byt)
1093 else:
8bf48f23 1094 out.write(s)
7459e3a2
PH
1095 out.flush()
1096
1097
48ea9cea
PH
1098def bytes_to_intlist(bs):
1099 if not bs:
1100 return []
1101 if isinstance(bs[0], int): # Python 3
1102 return list(bs)
1103 else:
1104 return [ord(c) for c in bs]
1105
c257baff 1106
cba892fa 1107def intlist_to_bytes(xs):
1108 if not xs:
1109 return b''
eb4157fd 1110 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1111
1112
c1c9a79c
PH
1113# Cross-platform file locking
1114if sys.platform == 'win32':
1115 import ctypes.wintypes
1116 import msvcrt
1117
1118 class OVERLAPPED(ctypes.Structure):
1119 _fields_ = [
1120 ('Internal', ctypes.wintypes.LPVOID),
1121 ('InternalHigh', ctypes.wintypes.LPVOID),
1122 ('Offset', ctypes.wintypes.DWORD),
1123 ('OffsetHigh', ctypes.wintypes.DWORD),
1124 ('hEvent', ctypes.wintypes.HANDLE),
1125 ]
1126
1127 kernel32 = ctypes.windll.kernel32
1128 LockFileEx = kernel32.LockFileEx
1129 LockFileEx.argtypes = [
1130 ctypes.wintypes.HANDLE, # hFile
1131 ctypes.wintypes.DWORD, # dwFlags
1132 ctypes.wintypes.DWORD, # dwReserved
1133 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1134 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1135 ctypes.POINTER(OVERLAPPED) # Overlapped
1136 ]
1137 LockFileEx.restype = ctypes.wintypes.BOOL
1138 UnlockFileEx = kernel32.UnlockFileEx
1139 UnlockFileEx.argtypes = [
1140 ctypes.wintypes.HANDLE, # hFile
1141 ctypes.wintypes.DWORD, # dwReserved
1142 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1143 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1144 ctypes.POINTER(OVERLAPPED) # Overlapped
1145 ]
1146 UnlockFileEx.restype = ctypes.wintypes.BOOL
1147 whole_low = 0xffffffff
1148 whole_high = 0x7fffffff
1149
1150 def _lock_file(f, exclusive):
1151 overlapped = OVERLAPPED()
1152 overlapped.Offset = 0
1153 overlapped.OffsetHigh = 0
1154 overlapped.hEvent = 0
1155 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1156 handle = msvcrt.get_osfhandle(f.fileno())
1157 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1158 whole_low, whole_high, f._lock_file_overlapped_p):
1159 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1160
1161 def _unlock_file(f):
1162 assert f._lock_file_overlapped_p
1163 handle = msvcrt.get_osfhandle(f.fileno())
1164 if not UnlockFileEx(handle, 0,
1165 whole_low, whole_high, f._lock_file_overlapped_p):
1166 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1167
1168else:
1169 import fcntl
1170
1171 def _lock_file(f, exclusive):
2582bebe 1172 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1173
1174 def _unlock_file(f):
2582bebe 1175 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1176
1177
1178class locked_file(object):
1179 def __init__(self, filename, mode, encoding=None):
1180 assert mode in ['r', 'a', 'w']
1181 self.f = io.open(filename, mode, encoding=encoding)
1182 self.mode = mode
1183
1184 def __enter__(self):
1185 exclusive = self.mode != 'r'
1186 try:
1187 _lock_file(self.f, exclusive)
1188 except IOError:
1189 self.f.close()
1190 raise
1191 return self
1192
1193 def __exit__(self, etype, value, traceback):
1194 try:
1195 _unlock_file(self.f)
1196 finally:
1197 self.f.close()
1198
1199 def __iter__(self):
1200 return iter(self.f)
1201
1202 def write(self, *args):
1203 return self.f.write(*args)
1204
1205 def read(self, *args):
1206 return self.f.read(*args)
4eb7f1d1
JMF
1207
1208
4644ac55
S
1209def get_filesystem_encoding():
1210 encoding = sys.getfilesystemencoding()
1211 return encoding if encoding is not None else 'utf-8'
1212
1213
4eb7f1d1 1214def shell_quote(args):
a6a173c2 1215 quoted_args = []
4644ac55 1216 encoding = get_filesystem_encoding()
a6a173c2
JMF
1217 for a in args:
1218 if isinstance(a, bytes):
1219 # We may get a filename encoded with 'encodeFilename'
1220 a = a.decode(encoding)
1221 quoted_args.append(pipes.quote(a))
28e614de 1222 return ' '.join(quoted_args)
9d4660ca
PH
1223
1224
1225def smuggle_url(url, data):
1226 """ Pass additional data in a URL for internal use. """
1227
1228 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1229 {'__youtubedl_smuggle': json.dumps(data)})
1230 return url + '#' + sdata
9d4660ca
PH
1231
1232
79f82953 1233def unsmuggle_url(smug_url, default=None):
83e865a3 1234 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1235 return smug_url, default
28e614de
PH
1236 url, _, sdata = smug_url.rpartition('#')
1237 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1238 data = json.loads(jsond)
1239 return url, data
02dbf93f
PH
1240
1241
02dbf93f
PH
1242def format_bytes(bytes):
1243 if bytes is None:
28e614de 1244 return 'N/A'
02dbf93f
PH
1245 if type(bytes) is str:
1246 bytes = float(bytes)
1247 if bytes == 0.0:
1248 exponent = 0
1249 else:
1250 exponent = int(math.log(bytes, 1024.0))
28e614de 1251 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1252 converted = float(bytes) / float(1024 ** exponent)
28e614de 1253 return '%.2f%s' % (converted, suffix)
f53c966a 1254
1c088fa8 1255
be64b5b0
PH
1256def parse_filesize(s):
1257 if s is None:
1258 return None
1259
1260 # The lower-case forms are of course incorrect and inofficial,
1261 # but we support those too
1262 _UNIT_TABLE = {
1263 'B': 1,
1264 'b': 1,
1265 'KiB': 1024,
1266 'KB': 1000,
1267 'kB': 1024,
1268 'Kb': 1000,
1269 'MiB': 1024 ** 2,
1270 'MB': 1000 ** 2,
1271 'mB': 1024 ** 2,
1272 'Mb': 1000 ** 2,
1273 'GiB': 1024 ** 3,
1274 'GB': 1000 ** 3,
1275 'gB': 1024 ** 3,
1276 'Gb': 1000 ** 3,
1277 'TiB': 1024 ** 4,
1278 'TB': 1000 ** 4,
1279 'tB': 1024 ** 4,
1280 'Tb': 1000 ** 4,
1281 'PiB': 1024 ** 5,
1282 'PB': 1000 ** 5,
1283 'pB': 1024 ** 5,
1284 'Pb': 1000 ** 5,
1285 'EiB': 1024 ** 6,
1286 'EB': 1000 ** 6,
1287 'eB': 1024 ** 6,
1288 'Eb': 1000 ** 6,
1289 'ZiB': 1024 ** 7,
1290 'ZB': 1000 ** 7,
1291 'zB': 1024 ** 7,
1292 'Zb': 1000 ** 7,
1293 'YiB': 1024 ** 8,
1294 'YB': 1000 ** 8,
1295 'yB': 1024 ** 8,
1296 'Yb': 1000 ** 8,
1297 }
1298
1299 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1300 m = re.match(
1301 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1302 if not m:
1303 return None
1304
4349c07d
PH
1305 num_str = m.group('num').replace(',', '.')
1306 mult = _UNIT_TABLE[m.group('unit')]
1307 return int(float(num_str) * mult)
be64b5b0
PH
1308
1309
caefb1de
PH
1310def month_by_name(name):
1311 """ Return the number of a month by (locale-independently) English name """
1312
caefb1de 1313 try:
7105440c
YCH
1314 return ENGLISH_MONTH_NAMES.index(name) + 1
1315 except ValueError:
1316 return None
1317
1318
1319def month_by_abbreviation(abbrev):
1320 """ Return the number of a month by (locale-independently) English
1321 abbreviations """
1322
1323 try:
1324 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1325 except ValueError:
1326 return None
18258362
JMF
1327
1328
5aafe895 1329def fix_xml_ampersands(xml_str):
18258362 1330 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1331 return re.sub(
1332 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1333 '&amp;',
5aafe895 1334 xml_str)
e3946f98
PH
1335
1336
1337def setproctitle(title):
8bf48f23 1338 assert isinstance(title, compat_str)
e3946f98
PH
1339 try:
1340 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1341 except OSError:
1342 return
6eefe533
PH
1343 title_bytes = title.encode('utf-8')
1344 buf = ctypes.create_string_buffer(len(title_bytes))
1345 buf.value = title_bytes
e3946f98 1346 try:
6eefe533 1347 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1348 except AttributeError:
1349 return # Strange libc, just skip this
d7dda168
PH
1350
1351
1352def remove_start(s, start):
1353 if s.startswith(start):
1354 return s[len(start):]
1355 return s
29eb5174
PH
1356
1357
2b9faf55
PH
1358def remove_end(s, end):
1359 if s.endswith(end):
1360 return s[:-len(end)]
1361 return s
1362
1363
29eb5174 1364def url_basename(url):
9b8aaeed 1365 path = compat_urlparse.urlparse(url).path
28e614de 1366 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1367
1368
1369class HEADRequest(compat_urllib_request.Request):
1370 def get_method(self):
1371 return "HEAD"
7217e148
PH
1372
1373
9732d77e 1374def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1375 if get_attr:
1376 if v is not None:
1377 v = getattr(v, get_attr, None)
9572013d
PH
1378 if v == '':
1379 v = None
1812afb7
S
1380 if v is None:
1381 return default
1382 try:
1383 return int(v) * invscale // scale
1384 except ValueError:
af98f8ff 1385 return default
9732d77e 1386
9572013d 1387
40a90862
JMF
1388def str_or_none(v, default=None):
1389 return default if v is None else compat_str(v)
1390
9732d77e
PH
1391
1392def str_to_int(int_str):
48d4681e 1393 """ A more relaxed version of int_or_none """
9732d77e
PH
1394 if int_str is None:
1395 return None
28e614de 1396 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1397 return int(int_str)
608d11f5
PH
1398
1399
9732d77e 1400def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1401 if v is None:
1402 return default
1403 try:
1404 return float(v) * invscale / scale
1405 except ValueError:
1406 return default
43f775e4
PH
1407
1408
608d11f5 1409def parse_duration(s):
8f9312c3 1410 if not isinstance(s, compat_basestring):
608d11f5
PH
1411 return None
1412
ca7b3246
S
1413 s = s.strip()
1414
608d11f5 1415 m = re.match(
9d22a7df 1416 r'''(?ix)(?:P?T)?
e8df5cee 1417 (?:
9c29bc69 1418 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1419 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1420
9c29bc69 1421 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1422 (?:
8f4b58d7
PH
1423 (?:
1424 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1425 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1426 )?
6a68bb57
PH
1427 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1428 )?
e8df5cee
PH
1429 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1430 )$''', s)
608d11f5
PH
1431 if not m:
1432 return None
e8df5cee
PH
1433 res = 0
1434 if m.group('only_mins'):
1435 return float_or_none(m.group('only_mins'), invscale=60)
1436 if m.group('only_hours'):
1437 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1438 if m.group('secs'):
1439 res += int(m.group('secs'))
3e675fab
PH
1440 if m.group('mins_reversed'):
1441 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1442 if m.group('mins'):
1443 res += int(m.group('mins')) * 60
e8df5cee
PH
1444 if m.group('hours'):
1445 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1446 if m.group('hours_reversed'):
1447 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1448 if m.group('days'):
1449 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1450 if m.group('ms'):
1451 res += float(m.group('ms'))
608d11f5 1452 return res
91d7d0b3
JMF
1453
1454
e65e4c88 1455def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1456 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1457 return (
1458 '{0}.{1}{2}'.format(name, ext, real_ext)
1459 if not expected_real_ext or real_ext[1:] == expected_real_ext
1460 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1461
1462
b3ed15b7
S
1463def replace_extension(filename, ext, expected_real_ext=None):
1464 name, real_ext = os.path.splitext(filename)
1465 return '{0}.{1}'.format(
1466 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1467 ext)
1468
1469
d70ad093
PH
1470def check_executable(exe, args=[]):
1471 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1472 args can be a list of arguments for a short output (like -version) """
1473 try:
1474 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1475 except OSError:
1476 return False
1477 return exe
b7ab0590
PH
1478
1479
95807118 1480def get_exe_version(exe, args=['--version'],
cae97f65 1481 version_re=None, unrecognized='present'):
95807118
PH
1482 """ Returns the version of the specified executable,
1483 or False if the executable is not present """
1484 try:
cae97f65 1485 out, _ = subprocess.Popen(
54116803 1486 [encodeArgument(exe)] + args,
95807118
PH
1487 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1488 except OSError:
1489 return False
cae97f65
PH
1490 if isinstance(out, bytes): # Python 2.x
1491 out = out.decode('ascii', 'ignore')
1492 return detect_exe_version(out, version_re, unrecognized)
1493
1494
1495def detect_exe_version(output, version_re=None, unrecognized='present'):
1496 assert isinstance(output, compat_str)
1497 if version_re is None:
1498 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1499 m = re.search(version_re, output)
95807118
PH
1500 if m:
1501 return m.group(1)
1502 else:
1503 return unrecognized
1504
1505
b7ab0590 1506class PagedList(object):
dd26ced1
PH
1507 def __len__(self):
1508 # This is only useful for tests
1509 return len(self.getslice())
1510
9c44d242
PH
1511
1512class OnDemandPagedList(PagedList):
1513 def __init__(self, pagefunc, pagesize):
1514 self._pagefunc = pagefunc
1515 self._pagesize = pagesize
1516
b7ab0590
PH
1517 def getslice(self, start=0, end=None):
1518 res = []
1519 for pagenum in itertools.count(start // self._pagesize):
1520 firstid = pagenum * self._pagesize
1521 nextfirstid = pagenum * self._pagesize + self._pagesize
1522 if start >= nextfirstid:
1523 continue
1524
1525 page_results = list(self._pagefunc(pagenum))
1526
1527 startv = (
1528 start % self._pagesize
1529 if firstid <= start < nextfirstid
1530 else 0)
1531
1532 endv = (
1533 ((end - 1) % self._pagesize) + 1
1534 if (end is not None and firstid <= end <= nextfirstid)
1535 else None)
1536
1537 if startv != 0 or endv is not None:
1538 page_results = page_results[startv:endv]
1539 res.extend(page_results)
1540
1541 # A little optimization - if current page is not "full", ie. does
1542 # not contain page_size videos then we can assume that this page
1543 # is the last one - there are no more ids on further pages -
1544 # i.e. no need to query again.
1545 if len(page_results) + startv < self._pagesize:
1546 break
1547
1548 # If we got the whole page, but the next page is not interesting,
1549 # break out early as well
1550 if end == nextfirstid:
1551 break
1552 return res
81c2f20b
PH
1553
1554
9c44d242
PH
1555class InAdvancePagedList(PagedList):
1556 def __init__(self, pagefunc, pagecount, pagesize):
1557 self._pagefunc = pagefunc
1558 self._pagecount = pagecount
1559 self._pagesize = pagesize
1560
1561 def getslice(self, start=0, end=None):
1562 res = []
1563 start_page = start // self._pagesize
1564 end_page = (
1565 self._pagecount if end is None else (end // self._pagesize + 1))
1566 skip_elems = start - start_page * self._pagesize
1567 only_more = None if end is None else end - start
1568 for pagenum in range(start_page, end_page):
1569 page = list(self._pagefunc(pagenum))
1570 if skip_elems:
1571 page = page[skip_elems:]
1572 skip_elems = None
1573 if only_more is not None:
1574 if len(page) < only_more:
1575 only_more -= len(page)
1576 else:
1577 page = page[:only_more]
1578 res.extend(page)
1579 break
1580 res.extend(page)
1581 return res
1582
1583
81c2f20b 1584def uppercase_escape(s):
676eb3f2 1585 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1586 return re.sub(
a612753d 1587 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1588 lambda m: unicode_escape(m.group(0))[0],
1589 s)
0fe2ff78
YCH
1590
1591
1592def lowercase_escape(s):
1593 unicode_escape = codecs.getdecoder('unicode_escape')
1594 return re.sub(
1595 r'\\u[0-9a-fA-F]{4}',
1596 lambda m: unicode_escape(m.group(0))[0],
1597 s)
b53466e1 1598
d05cfe06
S
1599
1600def escape_rfc3986(s):
1601 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1602 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1603 s = s.encode('utf-8')
ecc0c5ee 1604 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1605
1606
1607def escape_url(url):
1608 """Escape URL as suggested by RFC 3986"""
1609 url_parsed = compat_urllib_parse_urlparse(url)
1610 return url_parsed._replace(
1611 path=escape_rfc3986(url_parsed.path),
1612 params=escape_rfc3986(url_parsed.params),
1613 query=escape_rfc3986(url_parsed.query),
1614 fragment=escape_rfc3986(url_parsed.fragment)
1615 ).geturl()
1616
b53466e1 1617try:
28e614de 1618 struct.pack('!I', 0)
b53466e1
PH
1619except TypeError:
1620 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1621 def struct_pack(spec, *args):
1622 if isinstance(spec, compat_str):
1623 spec = spec.encode('ascii')
1624 return struct.pack(spec, *args)
1625
1626 def struct_unpack(spec, *args):
1627 if isinstance(spec, compat_str):
1628 spec = spec.encode('ascii')
1629 return struct.unpack(spec, *args)
1630else:
1631 struct_pack = struct.pack
1632 struct_unpack = struct.unpack
62e609ab
PH
1633
1634
1635def read_batch_urls(batch_fd):
1636 def fixup(url):
1637 if not isinstance(url, compat_str):
1638 url = url.decode('utf-8', 'replace')
28e614de 1639 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1640 if url.startswith(BOM_UTF8):
1641 url = url[len(BOM_UTF8):]
1642 url = url.strip()
1643 if url.startswith(('#', ';', ']')):
1644 return False
1645 return url
1646
1647 with contextlib.closing(batch_fd) as fd:
1648 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1649
1650
1651def urlencode_postdata(*args, **kargs):
1652 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1653
1654
16392824
S
1655def encode_dict(d, encoding='utf-8'):
1656 return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
1657
1658
0990305d
PH
1659try:
1660 etree_iter = xml.etree.ElementTree.Element.iter
1661except AttributeError: # Python <=2.6
1662 etree_iter = lambda n: n.findall('.//*')
1663
1664
bcf89ce6
PH
1665def parse_xml(s):
1666 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1667 def doctype(self, name, pubid, system):
1668 pass # Ignore doctypes
1669
1670 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1671 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1672 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1673 # Fix up XML parser in Python 2.x
1674 if sys.version_info < (3, 0):
1675 for n in etree_iter(tree):
1676 if n.text is not None:
1677 if not isinstance(n.text, compat_str):
1678 n.text = n.text.decode('utf-8')
1679 return tree
e68301af
PH
1680
1681
a1a530b0
PH
1682US_RATINGS = {
1683 'G': 0,
1684 'PG': 10,
1685 'PG-13': 13,
1686 'R': 16,
1687 'NC': 18,
1688}
fac55558
PH
1689
1690
146c80e2
S
1691def parse_age_limit(s):
1692 if s is None:
d838b1bd 1693 return None
146c80e2 1694 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1695 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1696
1697
fac55558 1698def strip_jsonp(code):
609a61e3
PH
1699 return re.sub(
1700 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1701
1702
e05f6939
PH
1703def js_to_json(code):
1704 def fix_kv(m):
e7b6d122
PH
1705 v = m.group(0)
1706 if v in ('true', 'false', 'null'):
1707 return v
1708 if v.startswith('"'):
d01949dc
S
1709 v = re.sub(r"\\'", "'", v[1:-1])
1710 elif v.startswith("'"):
e7b6d122
PH
1711 v = v[1:-1]
1712 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1713 '\\\\': '\\\\',
1714 "\\'": "'",
1715 '"': '\\"',
1716 }[m.group(0)], v)
1717 return '"%s"' % v
e05f6939
PH
1718
1719 res = re.sub(r'''(?x)
d305dd73
PH
1720 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1721 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1722 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1723 ''', fix_kv, code)
ba9e68f4 1724 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1725 return res
1726
1727
478c2c61
PH
1728def qualities(quality_ids):
1729 """ Get a numeric quality value out of a list of possible values """
1730 def q(qid):
1731 try:
1732 return quality_ids.index(qid)
1733 except ValueError:
1734 return -1
1735 return q
1736
acd69589
PH
1737
1738DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1739
a020a0dc
PH
1740
1741def limit_length(s, length):
1742 """ Add ellipses to overly long strings """
1743 if s is None:
1744 return None
1745 ELLIPSES = '...'
1746 if len(s) > length:
1747 return s[:length - len(ELLIPSES)] + ELLIPSES
1748 return s
48844745
PH
1749
1750
1751def version_tuple(v):
5f9b8394 1752 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1753
1754
1755def is_outdated_version(version, limit, assume_new=True):
1756 if not version:
1757 return not assume_new
1758 try:
1759 return version_tuple(version) < version_tuple(limit)
1760 except ValueError:
1761 return not assume_new
732ea2f0
PH
1762
1763
1764def ytdl_is_updateable():
1765 """ Returns if youtube-dl can be updated with -U """
1766 from zipimport import zipimporter
1767
1768 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1769
1770
1771def args_to_str(args):
1772 # Get a short string representation for a subprocess command
1773 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1774
1775
c460bdd5
PH
1776def mimetype2ext(mt):
1777 _, _, res = mt.rpartition('/')
1778
1779 return {
1780 'x-ms-wmv': 'wmv',
1781 'x-mp4-fragmented': 'mp4',
ecee5724 1782 'ttml+xml': 'ttml',
c460bdd5
PH
1783 }.get(res, res)
1784
1785
2ccd1b10
PH
1786def urlhandle_detect_ext(url_handle):
1787 try:
1788 url_handle.headers
1789 getheader = lambda h: url_handle.headers[h]
1790 except AttributeError: # Python < 3
1791 getheader = url_handle.info().getheader
1792
b55ee18f
PH
1793 cd = getheader('Content-Disposition')
1794 if cd:
1795 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1796 if m:
1797 e = determine_ext(m.group('filename'), default_ext=None)
1798 if e:
1799 return e
1800
c460bdd5 1801 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1802
1803
1e399778
YCH
1804def encode_data_uri(data, mime_type):
1805 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1806
1807
05900629
PH
1808def age_restricted(content_limit, age_limit):
1809 """ Returns True iff the content should be blocked """
1810
1811 if age_limit is None: # No limit set
1812 return False
1813 if content_limit is None:
1814 return False # Content available for everyone
1815 return age_limit < content_limit
61ca9a80
PH
1816
1817
1818def is_html(first_bytes):
1819 """ Detect whether a file contains HTML by examining its first bytes. """
1820
1821 BOMS = [
1822 (b'\xef\xbb\xbf', 'utf-8'),
1823 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1824 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1825 (b'\xff\xfe', 'utf-16-le'),
1826 (b'\xfe\xff', 'utf-16-be'),
1827 ]
1828 for bom, enc in BOMS:
1829 if first_bytes.startswith(bom):
1830 s = first_bytes[len(bom):].decode(enc, 'replace')
1831 break
1832 else:
1833 s = first_bytes.decode('utf-8', 'replace')
1834
1835 return re.match(r'^\s*<', s)
a055469f
PH
1836
1837
1838def determine_protocol(info_dict):
1839 protocol = info_dict.get('protocol')
1840 if protocol is not None:
1841 return protocol
1842
1843 url = info_dict['url']
1844 if url.startswith('rtmp'):
1845 return 'rtmp'
1846 elif url.startswith('mms'):
1847 return 'mms'
1848 elif url.startswith('rtsp'):
1849 return 'rtsp'
1850
1851 ext = determine_ext(url)
1852 if ext == 'm3u8':
1853 return 'm3u8'
1854 elif ext == 'f4m':
1855 return 'f4m'
1856
1857 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1858
1859
1860def render_table(header_row, data):
1861 """ Render a list of rows, each as a list of values """
1862 table = [header_row] + data
1863 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1864 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1865 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1866
1867
1868def _match_one(filter_part, dct):
1869 COMPARISON_OPERATORS = {
1870 '<': operator.lt,
1871 '<=': operator.le,
1872 '>': operator.gt,
1873 '>=': operator.ge,
1874 '=': operator.eq,
1875 '!=': operator.ne,
1876 }
1877 operator_rex = re.compile(r'''(?x)\s*
1878 (?P<key>[a-z_]+)
1879 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1880 (?:
1881 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1882 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1883 )
1884 \s*$
1885 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1886 m = operator_rex.search(filter_part)
1887 if m:
1888 op = COMPARISON_OPERATORS[m.group('op')]
1889 if m.group('strval') is not None:
1890 if m.group('op') not in ('=', '!='):
1891 raise ValueError(
1892 'Operator %s does not support string values!' % m.group('op'))
1893 comparison_value = m.group('strval')
1894 else:
1895 try:
1896 comparison_value = int(m.group('intval'))
1897 except ValueError:
1898 comparison_value = parse_filesize(m.group('intval'))
1899 if comparison_value is None:
1900 comparison_value = parse_filesize(m.group('intval') + 'B')
1901 if comparison_value is None:
1902 raise ValueError(
1903 'Invalid integer value %r in filter part %r' % (
1904 m.group('intval'), filter_part))
1905 actual_value = dct.get(m.group('key'))
1906 if actual_value is None:
1907 return m.group('none_inclusive')
1908 return op(actual_value, comparison_value)
1909
1910 UNARY_OPERATORS = {
1911 '': lambda v: v is not None,
1912 '!': lambda v: v is None,
1913 }
1914 operator_rex = re.compile(r'''(?x)\s*
1915 (?P<op>%s)\s*(?P<key>[a-z_]+)
1916 \s*$
1917 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1918 m = operator_rex.search(filter_part)
1919 if m:
1920 op = UNARY_OPERATORS[m.group('op')]
1921 actual_value = dct.get(m.group('key'))
1922 return op(actual_value)
1923
1924 raise ValueError('Invalid filter part %r' % filter_part)
1925
1926
1927def match_str(filter_str, dct):
1928 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1929
1930 return all(
1931 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1932
1933
1934def match_filter_func(filter_str):
1935 def _match_func(info_dict):
1936 if match_str(filter_str, info_dict):
1937 return None
1938 else:
1939 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1940 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1941 return _match_func
91410c9b
PH
1942
1943
bf6427d2
YCH
1944def parse_dfxp_time_expr(time_expr):
1945 if not time_expr:
1946 return 0.0
1947
1948 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1949 if mobj:
1950 return float(mobj.group('time_offset'))
1951
1952 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1953 if mobj:
1954 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1955
1956
c1c924ab
YCH
1957def srt_subtitles_timecode(seconds):
1958 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
1959
1960
1961def dfxp2srt(dfxp_data):
4e335771
YCH
1962 _x = functools.partial(xpath_with_ns, ns_map={
1963 'ttml': 'http://www.w3.org/ns/ttml',
1964 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
1965 })
bf6427d2
YCH
1966
1967 def parse_node(node):
1968 str_or_empty = functools.partial(str_or_none, default='')
1969
1970 out = str_or_empty(node.text)
1971
1972 for child in node:
4e335771 1973 if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
bf6427d2 1974 out += '\n' + str_or_empty(child.tail)
4e335771 1975 elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
bf6427d2
YCH
1976 out += str_or_empty(parse_node(child))
1977 else:
1978 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1979
1980 return out
1981
1982 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1983 out = []
4e335771 1984 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
1985
1986 if not paras:
1987 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
1988
1989 for para, index in zip(paras, itertools.count(1)):
7dff0363
YCH
1990 begin_time = parse_dfxp_time_expr(para.attrib['begin'])
1991 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
1992 if not end_time:
1993 end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
bf6427d2
YCH
1994 out.append('%d\n%s --> %s\n%s\n\n' % (
1995 index,
c1c924ab
YCH
1996 srt_subtitles_timecode(begin_time),
1997 srt_subtitles_timecode(end_time),
bf6427d2
YCH
1998 parse_node(para)))
1999
2000 return ''.join(out)
2001
2002
66e289ba
S
2003def cli_option(params, command_option, param):
2004 param = params.get(param)
2005 return [command_option, param] if param is not None else []
2006
2007
2008def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2009 param = params.get(param)
2010 assert isinstance(param, bool)
2011 if separator:
2012 return [command_option + separator + (true_value if param else false_value)]
2013 return [command_option, true_value if param else false_value]
2014
2015
2016def cli_valueless_option(params, command_option, param, expected_value=True):
2017 param = params.get(param)
2018 return [command_option] if param == expected_value else []
2019
2020
2021def cli_configuration_args(params, param, default=[]):
2022 ex_args = params.get(param)
2023 if ex_args is None:
2024 return default
2025 assert isinstance(ex_args, list)
2026 return ex_args
2027
2028
39672624
YCH
2029class ISO639Utils(object):
2030 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2031 _lang_map = {
2032 'aa': 'aar',
2033 'ab': 'abk',
2034 'ae': 'ave',
2035 'af': 'afr',
2036 'ak': 'aka',
2037 'am': 'amh',
2038 'an': 'arg',
2039 'ar': 'ara',
2040 'as': 'asm',
2041 'av': 'ava',
2042 'ay': 'aym',
2043 'az': 'aze',
2044 'ba': 'bak',
2045 'be': 'bel',
2046 'bg': 'bul',
2047 'bh': 'bih',
2048 'bi': 'bis',
2049 'bm': 'bam',
2050 'bn': 'ben',
2051 'bo': 'bod',
2052 'br': 'bre',
2053 'bs': 'bos',
2054 'ca': 'cat',
2055 'ce': 'che',
2056 'ch': 'cha',
2057 'co': 'cos',
2058 'cr': 'cre',
2059 'cs': 'ces',
2060 'cu': 'chu',
2061 'cv': 'chv',
2062 'cy': 'cym',
2063 'da': 'dan',
2064 'de': 'deu',
2065 'dv': 'div',
2066 'dz': 'dzo',
2067 'ee': 'ewe',
2068 'el': 'ell',
2069 'en': 'eng',
2070 'eo': 'epo',
2071 'es': 'spa',
2072 'et': 'est',
2073 'eu': 'eus',
2074 'fa': 'fas',
2075 'ff': 'ful',
2076 'fi': 'fin',
2077 'fj': 'fij',
2078 'fo': 'fao',
2079 'fr': 'fra',
2080 'fy': 'fry',
2081 'ga': 'gle',
2082 'gd': 'gla',
2083 'gl': 'glg',
2084 'gn': 'grn',
2085 'gu': 'guj',
2086 'gv': 'glv',
2087 'ha': 'hau',
2088 'he': 'heb',
2089 'hi': 'hin',
2090 'ho': 'hmo',
2091 'hr': 'hrv',
2092 'ht': 'hat',
2093 'hu': 'hun',
2094 'hy': 'hye',
2095 'hz': 'her',
2096 'ia': 'ina',
2097 'id': 'ind',
2098 'ie': 'ile',
2099 'ig': 'ibo',
2100 'ii': 'iii',
2101 'ik': 'ipk',
2102 'io': 'ido',
2103 'is': 'isl',
2104 'it': 'ita',
2105 'iu': 'iku',
2106 'ja': 'jpn',
2107 'jv': 'jav',
2108 'ka': 'kat',
2109 'kg': 'kon',
2110 'ki': 'kik',
2111 'kj': 'kua',
2112 'kk': 'kaz',
2113 'kl': 'kal',
2114 'km': 'khm',
2115 'kn': 'kan',
2116 'ko': 'kor',
2117 'kr': 'kau',
2118 'ks': 'kas',
2119 'ku': 'kur',
2120 'kv': 'kom',
2121 'kw': 'cor',
2122 'ky': 'kir',
2123 'la': 'lat',
2124 'lb': 'ltz',
2125 'lg': 'lug',
2126 'li': 'lim',
2127 'ln': 'lin',
2128 'lo': 'lao',
2129 'lt': 'lit',
2130 'lu': 'lub',
2131 'lv': 'lav',
2132 'mg': 'mlg',
2133 'mh': 'mah',
2134 'mi': 'mri',
2135 'mk': 'mkd',
2136 'ml': 'mal',
2137 'mn': 'mon',
2138 'mr': 'mar',
2139 'ms': 'msa',
2140 'mt': 'mlt',
2141 'my': 'mya',
2142 'na': 'nau',
2143 'nb': 'nob',
2144 'nd': 'nde',
2145 'ne': 'nep',
2146 'ng': 'ndo',
2147 'nl': 'nld',
2148 'nn': 'nno',
2149 'no': 'nor',
2150 'nr': 'nbl',
2151 'nv': 'nav',
2152 'ny': 'nya',
2153 'oc': 'oci',
2154 'oj': 'oji',
2155 'om': 'orm',
2156 'or': 'ori',
2157 'os': 'oss',
2158 'pa': 'pan',
2159 'pi': 'pli',
2160 'pl': 'pol',
2161 'ps': 'pus',
2162 'pt': 'por',
2163 'qu': 'que',
2164 'rm': 'roh',
2165 'rn': 'run',
2166 'ro': 'ron',
2167 'ru': 'rus',
2168 'rw': 'kin',
2169 'sa': 'san',
2170 'sc': 'srd',
2171 'sd': 'snd',
2172 'se': 'sme',
2173 'sg': 'sag',
2174 'si': 'sin',
2175 'sk': 'slk',
2176 'sl': 'slv',
2177 'sm': 'smo',
2178 'sn': 'sna',
2179 'so': 'som',
2180 'sq': 'sqi',
2181 'sr': 'srp',
2182 'ss': 'ssw',
2183 'st': 'sot',
2184 'su': 'sun',
2185 'sv': 'swe',
2186 'sw': 'swa',
2187 'ta': 'tam',
2188 'te': 'tel',
2189 'tg': 'tgk',
2190 'th': 'tha',
2191 'ti': 'tir',
2192 'tk': 'tuk',
2193 'tl': 'tgl',
2194 'tn': 'tsn',
2195 'to': 'ton',
2196 'tr': 'tur',
2197 'ts': 'tso',
2198 'tt': 'tat',
2199 'tw': 'twi',
2200 'ty': 'tah',
2201 'ug': 'uig',
2202 'uk': 'ukr',
2203 'ur': 'urd',
2204 'uz': 'uzb',
2205 've': 'ven',
2206 'vi': 'vie',
2207 'vo': 'vol',
2208 'wa': 'wln',
2209 'wo': 'wol',
2210 'xh': 'xho',
2211 'yi': 'yid',
2212 'yo': 'yor',
2213 'za': 'zha',
2214 'zh': 'zho',
2215 'zu': 'zul',
2216 }
2217
2218 @classmethod
2219 def short2long(cls, code):
2220 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2221 return cls._lang_map.get(code[:2])
2222
2223 @classmethod
2224 def long2short(cls, code):
2225 """Convert language code from ISO 639-2/T to ISO 639-1"""
2226 for short_name, long_name in cls._lang_map.items():
2227 if long_name == code:
2228 return short_name
2229
2230
4eb10f66
YCH
2231class ISO3166Utils(object):
2232 # From http://data.okfn.org/data/core/country-list
2233 _country_map = {
2234 'AF': 'Afghanistan',
2235 'AX': 'Åland Islands',
2236 'AL': 'Albania',
2237 'DZ': 'Algeria',
2238 'AS': 'American Samoa',
2239 'AD': 'Andorra',
2240 'AO': 'Angola',
2241 'AI': 'Anguilla',
2242 'AQ': 'Antarctica',
2243 'AG': 'Antigua and Barbuda',
2244 'AR': 'Argentina',
2245 'AM': 'Armenia',
2246 'AW': 'Aruba',
2247 'AU': 'Australia',
2248 'AT': 'Austria',
2249 'AZ': 'Azerbaijan',
2250 'BS': 'Bahamas',
2251 'BH': 'Bahrain',
2252 'BD': 'Bangladesh',
2253 'BB': 'Barbados',
2254 'BY': 'Belarus',
2255 'BE': 'Belgium',
2256 'BZ': 'Belize',
2257 'BJ': 'Benin',
2258 'BM': 'Bermuda',
2259 'BT': 'Bhutan',
2260 'BO': 'Bolivia, Plurinational State of',
2261 'BQ': 'Bonaire, Sint Eustatius and Saba',
2262 'BA': 'Bosnia and Herzegovina',
2263 'BW': 'Botswana',
2264 'BV': 'Bouvet Island',
2265 'BR': 'Brazil',
2266 'IO': 'British Indian Ocean Territory',
2267 'BN': 'Brunei Darussalam',
2268 'BG': 'Bulgaria',
2269 'BF': 'Burkina Faso',
2270 'BI': 'Burundi',
2271 'KH': 'Cambodia',
2272 'CM': 'Cameroon',
2273 'CA': 'Canada',
2274 'CV': 'Cape Verde',
2275 'KY': 'Cayman Islands',
2276 'CF': 'Central African Republic',
2277 'TD': 'Chad',
2278 'CL': 'Chile',
2279 'CN': 'China',
2280 'CX': 'Christmas Island',
2281 'CC': 'Cocos (Keeling) Islands',
2282 'CO': 'Colombia',
2283 'KM': 'Comoros',
2284 'CG': 'Congo',
2285 'CD': 'Congo, the Democratic Republic of the',
2286 'CK': 'Cook Islands',
2287 'CR': 'Costa Rica',
2288 'CI': 'Côte d\'Ivoire',
2289 'HR': 'Croatia',
2290 'CU': 'Cuba',
2291 'CW': 'Curaçao',
2292 'CY': 'Cyprus',
2293 'CZ': 'Czech Republic',
2294 'DK': 'Denmark',
2295 'DJ': 'Djibouti',
2296 'DM': 'Dominica',
2297 'DO': 'Dominican Republic',
2298 'EC': 'Ecuador',
2299 'EG': 'Egypt',
2300 'SV': 'El Salvador',
2301 'GQ': 'Equatorial Guinea',
2302 'ER': 'Eritrea',
2303 'EE': 'Estonia',
2304 'ET': 'Ethiopia',
2305 'FK': 'Falkland Islands (Malvinas)',
2306 'FO': 'Faroe Islands',
2307 'FJ': 'Fiji',
2308 'FI': 'Finland',
2309 'FR': 'France',
2310 'GF': 'French Guiana',
2311 'PF': 'French Polynesia',
2312 'TF': 'French Southern Territories',
2313 'GA': 'Gabon',
2314 'GM': 'Gambia',
2315 'GE': 'Georgia',
2316 'DE': 'Germany',
2317 'GH': 'Ghana',
2318 'GI': 'Gibraltar',
2319 'GR': 'Greece',
2320 'GL': 'Greenland',
2321 'GD': 'Grenada',
2322 'GP': 'Guadeloupe',
2323 'GU': 'Guam',
2324 'GT': 'Guatemala',
2325 'GG': 'Guernsey',
2326 'GN': 'Guinea',
2327 'GW': 'Guinea-Bissau',
2328 'GY': 'Guyana',
2329 'HT': 'Haiti',
2330 'HM': 'Heard Island and McDonald Islands',
2331 'VA': 'Holy See (Vatican City State)',
2332 'HN': 'Honduras',
2333 'HK': 'Hong Kong',
2334 'HU': 'Hungary',
2335 'IS': 'Iceland',
2336 'IN': 'India',
2337 'ID': 'Indonesia',
2338 'IR': 'Iran, Islamic Republic of',
2339 'IQ': 'Iraq',
2340 'IE': 'Ireland',
2341 'IM': 'Isle of Man',
2342 'IL': 'Israel',
2343 'IT': 'Italy',
2344 'JM': 'Jamaica',
2345 'JP': 'Japan',
2346 'JE': 'Jersey',
2347 'JO': 'Jordan',
2348 'KZ': 'Kazakhstan',
2349 'KE': 'Kenya',
2350 'KI': 'Kiribati',
2351 'KP': 'Korea, Democratic People\'s Republic of',
2352 'KR': 'Korea, Republic of',
2353 'KW': 'Kuwait',
2354 'KG': 'Kyrgyzstan',
2355 'LA': 'Lao People\'s Democratic Republic',
2356 'LV': 'Latvia',
2357 'LB': 'Lebanon',
2358 'LS': 'Lesotho',
2359 'LR': 'Liberia',
2360 'LY': 'Libya',
2361 'LI': 'Liechtenstein',
2362 'LT': 'Lithuania',
2363 'LU': 'Luxembourg',
2364 'MO': 'Macao',
2365 'MK': 'Macedonia, the Former Yugoslav Republic of',
2366 'MG': 'Madagascar',
2367 'MW': 'Malawi',
2368 'MY': 'Malaysia',
2369 'MV': 'Maldives',
2370 'ML': 'Mali',
2371 'MT': 'Malta',
2372 'MH': 'Marshall Islands',
2373 'MQ': 'Martinique',
2374 'MR': 'Mauritania',
2375 'MU': 'Mauritius',
2376 'YT': 'Mayotte',
2377 'MX': 'Mexico',
2378 'FM': 'Micronesia, Federated States of',
2379 'MD': 'Moldova, Republic of',
2380 'MC': 'Monaco',
2381 'MN': 'Mongolia',
2382 'ME': 'Montenegro',
2383 'MS': 'Montserrat',
2384 'MA': 'Morocco',
2385 'MZ': 'Mozambique',
2386 'MM': 'Myanmar',
2387 'NA': 'Namibia',
2388 'NR': 'Nauru',
2389 'NP': 'Nepal',
2390 'NL': 'Netherlands',
2391 'NC': 'New Caledonia',
2392 'NZ': 'New Zealand',
2393 'NI': 'Nicaragua',
2394 'NE': 'Niger',
2395 'NG': 'Nigeria',
2396 'NU': 'Niue',
2397 'NF': 'Norfolk Island',
2398 'MP': 'Northern Mariana Islands',
2399 'NO': 'Norway',
2400 'OM': 'Oman',
2401 'PK': 'Pakistan',
2402 'PW': 'Palau',
2403 'PS': 'Palestine, State of',
2404 'PA': 'Panama',
2405 'PG': 'Papua New Guinea',
2406 'PY': 'Paraguay',
2407 'PE': 'Peru',
2408 'PH': 'Philippines',
2409 'PN': 'Pitcairn',
2410 'PL': 'Poland',
2411 'PT': 'Portugal',
2412 'PR': 'Puerto Rico',
2413 'QA': 'Qatar',
2414 'RE': 'Réunion',
2415 'RO': 'Romania',
2416 'RU': 'Russian Federation',
2417 'RW': 'Rwanda',
2418 'BL': 'Saint Barthélemy',
2419 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2420 'KN': 'Saint Kitts and Nevis',
2421 'LC': 'Saint Lucia',
2422 'MF': 'Saint Martin (French part)',
2423 'PM': 'Saint Pierre and Miquelon',
2424 'VC': 'Saint Vincent and the Grenadines',
2425 'WS': 'Samoa',
2426 'SM': 'San Marino',
2427 'ST': 'Sao Tome and Principe',
2428 'SA': 'Saudi Arabia',
2429 'SN': 'Senegal',
2430 'RS': 'Serbia',
2431 'SC': 'Seychelles',
2432 'SL': 'Sierra Leone',
2433 'SG': 'Singapore',
2434 'SX': 'Sint Maarten (Dutch part)',
2435 'SK': 'Slovakia',
2436 'SI': 'Slovenia',
2437 'SB': 'Solomon Islands',
2438 'SO': 'Somalia',
2439 'ZA': 'South Africa',
2440 'GS': 'South Georgia and the South Sandwich Islands',
2441 'SS': 'South Sudan',
2442 'ES': 'Spain',
2443 'LK': 'Sri Lanka',
2444 'SD': 'Sudan',
2445 'SR': 'Suriname',
2446 'SJ': 'Svalbard and Jan Mayen',
2447 'SZ': 'Swaziland',
2448 'SE': 'Sweden',
2449 'CH': 'Switzerland',
2450 'SY': 'Syrian Arab Republic',
2451 'TW': 'Taiwan, Province of China',
2452 'TJ': 'Tajikistan',
2453 'TZ': 'Tanzania, United Republic of',
2454 'TH': 'Thailand',
2455 'TL': 'Timor-Leste',
2456 'TG': 'Togo',
2457 'TK': 'Tokelau',
2458 'TO': 'Tonga',
2459 'TT': 'Trinidad and Tobago',
2460 'TN': 'Tunisia',
2461 'TR': 'Turkey',
2462 'TM': 'Turkmenistan',
2463 'TC': 'Turks and Caicos Islands',
2464 'TV': 'Tuvalu',
2465 'UG': 'Uganda',
2466 'UA': 'Ukraine',
2467 'AE': 'United Arab Emirates',
2468 'GB': 'United Kingdom',
2469 'US': 'United States',
2470 'UM': 'United States Minor Outlying Islands',
2471 'UY': 'Uruguay',
2472 'UZ': 'Uzbekistan',
2473 'VU': 'Vanuatu',
2474 'VE': 'Venezuela, Bolivarian Republic of',
2475 'VN': 'Viet Nam',
2476 'VG': 'Virgin Islands, British',
2477 'VI': 'Virgin Islands, U.S.',
2478 'WF': 'Wallis and Futuna',
2479 'EH': 'Western Sahara',
2480 'YE': 'Yemen',
2481 'ZM': 'Zambia',
2482 'ZW': 'Zimbabwe',
2483 }
2484
2485 @classmethod
2486 def short2full(cls, code):
2487 """Convert an ISO 3166-2 country code to the corresponding full name"""
2488 return cls._country_map.get(code.upper())
2489
2490
91410c9b 2491class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2492 def __init__(self, proxies=None):
2493 # Set default handlers
2494 for type in ('http', 'https'):
2495 setattr(self, '%s_open' % type,
2496 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2497 meth(r, proxy, type))
2498 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2499
91410c9b 2500 def proxy_open(self, req, proxy, type):
2461f79d 2501 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2502 if req_proxy is not None:
2503 proxy = req_proxy
2461f79d
PH
2504 del req.headers['Ytdl-request-proxy']
2505
2506 if proxy == '__noproxy__':
2507 return None # No Proxy
91410c9b
PH
2508 return compat_urllib_request.ProxyHandler.proxy_open(
2509 self, req, proxy, type)